1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
53 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static int vect_min_worthwhile_factor (enum tree_code);
76 cost_for_stmt (tree stmt)
78 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
80 switch (STMT_VINFO_TYPE (stmt_info))
82 case load_vec_info_type:
83 return TARG_SCALAR_LOAD_COST;
84 case store_vec_info_type:
85 return TARG_SCALAR_STORE_COST;
86 case op_vec_info_type:
87 case condition_vec_info_type:
88 case assignment_vec_info_type:
89 case reduc_vec_info_type:
90 case induc_vec_info_type:
91 case type_promotion_vec_info_type:
92 case type_demotion_vec_info_type:
93 case type_conversion_vec_info_type:
94 case call_vec_info_type:
95 return TARG_SCALAR_STMT_COST;
96 case undef_vec_info_type:
103 /* Function vect_estimate_min_profitable_iters
105 Return the number of iterations required for the vector version of the
106 loop to be profitable relative to the cost of the scalar version of the
109 TODO: Take profile info into account before making vectorization
110 decisions, if available. */
113 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
116 int min_profitable_iters;
117 int peel_iters_prologue;
118 int peel_iters_epilogue;
119 int vec_inside_cost = 0;
120 int vec_outside_cost = 0;
121 int scalar_single_iter_cost = 0;
122 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
124 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
125 int nbbs = loop->num_nodes;
128 /* Cost model disabled. */
129 if (!flag_vect_cost_model)
131 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "cost model disabled.");
136 /* Requires loop versioning tests to handle misalignment.
137 FIXME: Make cost depend on number of stmts in may_misalign list. */
139 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
141 vec_outside_cost += TARG_COND_BRANCH_COST;
142 if (vect_print_dump_info (REPORT_DETAILS))
143 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
147 /* Count statements in scalar loop. Using this as scalar cost for a single
150 TODO: Add outer loop support.
152 TODO: Consider assigning different costs to different scalar
155 for (i = 0; i < nbbs; i++)
157 block_stmt_iterator si;
158 basic_block bb = bbs[i];
160 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
162 tree stmt = bsi_stmt (si);
163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
164 if (!STMT_VINFO_RELEVANT_P (stmt_info)
165 && !STMT_VINFO_LIVE_P (stmt_info))
167 scalar_single_iter_cost += cost_for_stmt (stmt);
168 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
169 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
173 /* Add additional cost for the peeled instructions in prologue and epilogue
176 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
177 at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
179 TODO: Build an expression that represents peel_iters for prologue and
180 epilogue to be used in a run-time test. */
182 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
184 if (byte_misalign < 0)
186 peel_iters_prologue = (vf - 1)/2;
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "cost model: "
189 "prologue peel iters set to (vf-1)/2.");
191 /* If peeling for alignment is unknown, loop bound of main loop becomes
193 peel_iters_epilogue = (vf - 1)/2;
194 if (vect_print_dump_info (REPORT_DETAILS))
195 fprintf (vect_dump, "cost model: "
196 "epilogue peel iters set to (vf-1)/2 because "
197 "peeling for alignment is unknown .");
203 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
204 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
205 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
206 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
208 peel_iters_prologue = nelements - (byte_misalign / element_size);
211 peel_iters_prologue = 0;
213 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
215 peel_iters_epilogue = (vf - 1)/2;
216 if (vect_print_dump_info (REPORT_DETAILS))
217 fprintf (vect_dump, "cost model: "
218 "epilogue peel iters set to (vf-1)/2 because "
219 "loop iterations are unknown .");
223 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
224 peel_iters_prologue = niters < peel_iters_prologue ?
225 niters : peel_iters_prologue;
226 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
230 /* Requires a prologue loop when peeling to handle misalignment. Add cost of
231 two guards, one for the peeled loop and one for the vector loop. */
233 if (peel_iters_prologue)
235 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
236 if (vect_print_dump_info (REPORT_DETAILS))
237 fprintf (vect_dump, "cost model: Adding cost of checks for "
241 /* Requires an epilogue loop to finish up remaining iterations after vector
242 loop. Add cost of two guards, one for the peeled loop and one for the
245 if (peel_iters_epilogue
246 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
247 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
249 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
250 if (vect_print_dump_info (REPORT_DETAILS))
251 fprintf (vect_dump, "cost model : Adding cost of checks for "
255 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
256 + (peel_iters_epilogue * scalar_single_iter_cost);
258 /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
259 information we provide for the target is whether testing against the
260 threshold involves a runtime test. */
261 if (targetm.vectorize.builtin_vectorization_cost)
263 bool runtime_test = false;
265 /* If the number of iterations is unknown, or the
266 peeling-for-misalignment amount is unknown, we eill have to generate
267 a runtime test to test the loop count against the threshold. */
268 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
269 || (byte_misalign < 0))
272 targetm.vectorize.builtin_vectorization_cost (runtime_test);
273 if (vect_print_dump_info (REPORT_DETAILS))
274 fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
275 targetm.vectorize.builtin_vectorization_cost (runtime_test));
278 /* Calculate number of iterations required to make the vector version
279 profitable, relative to the loop bodies only. The following condition
280 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
281 SIC = scalar iteration cost, VIC = vector iteration cost,
282 VOC = vector outside cost and VF = vectorization factor. */
284 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
286 if (vec_outside_cost == 0)
287 min_profitable_iters = 1;
290 min_profitable_iters = (vec_outside_cost * vf)
291 / ((scalar_single_iter_cost * vf)
294 if ((scalar_single_iter_cost * vf * min_profitable_iters)
295 <= ((vec_inside_cost * min_profitable_iters)
296 + (vec_outside_cost * vf)))
297 min_profitable_iters++;
300 /* vector version will never be profitable. */
303 if (vect_print_dump_info (REPORT_DETAILS))
304 fprintf (vect_dump, "cost model: vector iteration cost = %d "
305 "is divisible by scalar iteration cost = %d by a factor "
306 "greater than or equal to the vectorization factor = %d .",
307 vec_inside_cost, scalar_single_iter_cost, vf);
311 if (vect_print_dump_info (REPORT_DETAILS))
313 fprintf (vect_dump, "Cost model analysis: \n");
314 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
316 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
318 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
319 fprintf (vect_dump, " prologue iterations: %d\n",
320 peel_iters_prologue);
321 fprintf (vect_dump, " epilogue iterations: %d\n",
322 peel_iters_epilogue);
323 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
324 min_profitable_iters);
325 fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
326 min_profitable_iters < vf ? vf : min_profitable_iters);
329 min_profitable_iters =
330 min_profitable_iters < vf ? vf : min_profitable_iters;
332 /* Because the condition we create is:
333 if (niters <= min_profitable_iters)
334 then skip the vectorized loop. */
335 min_profitable_iters--;
336 return min_profitable_iters;
340 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
341 functions. Design better to avoid maintenance issues. */
343 /* Function vect_model_reduction_cost.
345 Models cost for a reduction operation, including the vector ops
346 generated within the strip-mine loop, the initial definition before
347 the loop, and the epilogue code that must be generated. */
350 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
359 enum machine_mode mode;
360 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
361 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
363 /* Cost of reduction op inside loop. */
364 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
366 reduction_op = TREE_OPERAND (operation, op_type-1);
367 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
368 mode = TYPE_MODE (vectype);
369 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
372 orig_stmt = STMT_VINFO_STMT (stmt_info);
374 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
376 /* Add in cost for initial definition. */
377 outer_cost += TARG_SCALAR_TO_VEC_COST;
379 /* Determine cost of epilogue code.
381 We have a reduction operator that will reduce the vector in one statement.
382 Also requires scalar extract. */
384 if (reduc_code < NUM_TREE_CODES)
385 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
388 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
390 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
391 int element_bitsize = tree_low_cst (bitsize, 1);
392 int nelements = vec_size_in_bits / element_bitsize;
394 optab = optab_for_tree_code (code, vectype);
396 /* We have a whole vector shift available. */
397 if (VECTOR_MODE_P (mode)
398 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
399 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
400 /* Final reduction via vector shifts and the reduction operator. Also
401 requires scalar extract. */
402 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
403 + TARG_VEC_TO_SCALAR_COST);
405 /* Use extracts and reduction op for final reduction. For N elements,
406 we have N extracts and N-1 reduction ops. */
407 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
410 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
412 if (vect_print_dump_info (REPORT_DETAILS))
413 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
414 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
415 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
419 /* Function vect_model_induction_cost.
421 Models cost for induction operations. */
424 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
426 /* loop cost for vec_loop. */
427 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
428 /* prologue cost for vec_init and vec_step. */
429 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
431 if (vect_print_dump_info (REPORT_DETAILS))
432 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
433 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
434 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
438 /* Function vect_model_simple_cost.
440 Models cost for simple operations, i.e. those that only emit ncopies of a
441 single op. Right now, this does not account for multiple insns that could
442 be generated for the single vector op. We will handle that shortly. */
445 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt)
449 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
451 /* FORNOW: Assuming maximum 2 args per stmts. */
454 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
455 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST;
458 if (vect_print_dump_info (REPORT_DETAILS))
459 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
460 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
461 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
465 /* Function vect_cost_strided_group_size
467 For strided load or store, return the group_size only if it is the first
468 load or store of a group, else return 1. This ensures that group size is
469 only returned once per group. */
472 vect_cost_strided_group_size (stmt_vec_info stmt_info)
474 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
476 if (first_stmt == STMT_VINFO_STMT (stmt_info))
477 return DR_GROUP_SIZE (stmt_info);
483 /* Function vect_model_store_cost
485 Models cost for stores. In the case of strided accesses, one access
486 has the overhead of the strided access attributed to it. */
489 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt)
494 if (dt == vect_constant_def || dt == vect_invariant_def)
495 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST;
497 /* Strided access? */
498 if (DR_GROUP_FIRST_DR (stmt_info))
499 group_size = vect_cost_strided_group_size (stmt_info);
500 /* Not a strided access. */
504 /* Is this an access in a group of stores, which provide strided access?
505 If so, add in the cost of the permutes. */
508 /* Uses a high and low interleave operation for each needed permute. */
509 cost = ncopies * exact_log2(group_size) * group_size
510 * TARG_VEC_STMT_COST;
512 if (vect_print_dump_info (REPORT_DETAILS))
513 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
518 /* Costs of the stores. */
519 cost += ncopies * TARG_VEC_STORE_COST;
521 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
523 if (vect_print_dump_info (REPORT_DETAILS))
524 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
525 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
526 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
530 /* Function vect_model_load_cost
532 Models cost for loads. In the case of strided accesses, the last access
533 has the overhead of the strided access attributed to it. Since unaligned
534 accesses are supported for loads, we also account for the costs of the
535 access scheme chosen. */
538 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
543 int alignment_support_cheme;
545 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
547 /* Strided accesses? */
548 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
551 group_size = vect_cost_strided_group_size (stmt_info);
552 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
554 /* Not a strided access. */
561 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
563 /* Is this an access in a group of loads providing strided access?
564 If so, add in the cost of the permutes. */
567 /* Uses an even and odd extract operations for each needed permute. */
568 inner_cost = ncopies * exact_log2(group_size) * group_size
569 * TARG_VEC_STMT_COST;
571 if (vect_print_dump_info (REPORT_DETAILS))
572 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
577 /* The loads themselves. */
578 switch (alignment_support_cheme)
582 inner_cost += ncopies * TARG_VEC_LOAD_COST;
584 if (vect_print_dump_info (REPORT_DETAILS))
585 fprintf (vect_dump, "vect_model_load_cost: aligned.");
589 case dr_unaligned_supported:
591 /* Here, we assign an additional cost for the unaligned load. */
592 inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
594 if (vect_print_dump_info (REPORT_DETAILS))
595 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
600 case dr_unaligned_software_pipeline:
604 if (vect_print_dump_info (REPORT_DETAILS))
605 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
608 /* Unaligned software pipeline has a load of an address, an initial
609 load, and possibly a mask operation to "prime" the loop. However,
610 if this is an access in a group of loads, which provide strided
611 access, then the above cost should only be considered for one
612 access in the group. Inside the loop, there is a load op
613 and a realignment op. */
615 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
617 outer_cost = 2*TARG_VEC_STMT_COST;
618 if (targetm.vectorize.builtin_mask_for_load)
619 outer_cost += TARG_VEC_STMT_COST;
622 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
624 inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
633 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
635 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
637 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
638 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
643 /* Function vect_get_new_vect_var.
645 Returns a name for a new variable. The current naming scheme appends the
646 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
647 the name of vectorizer generated variables, and appends that to NAME if
651 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
658 case vect_simple_var:
661 case vect_scalar_var:
664 case vect_pointer_var:
673 char* tmp = concat (prefix, name, NULL);
674 new_vect_var = create_tmp_var (type, tmp);
678 new_vect_var = create_tmp_var (type, prefix);
680 /* Mark vector typed variable as a gimple register variable. */
681 if (TREE_CODE (type) == VECTOR_TYPE)
682 DECL_GIMPLE_REG_P (new_vect_var) = true;
688 /* Function vect_create_addr_base_for_vector_ref.
690 Create an expression that computes the address of the first memory location
691 that will be accessed for a data reference.
694 STMT: The statement containing the data reference.
695 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
696 OFFSET: Optional. If supplied, it is be added to the initial address.
699 1. Return an SSA_NAME whose value is the address of the memory location of
700 the first vector of the data reference.
701 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
702 these statement(s) which define the returned SSA_NAME.
704 FORNOW: We are only handling array accesses with step 1. */
707 vect_create_addr_base_for_vector_ref (tree stmt,
711 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
712 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
713 tree data_ref_base_expr = unshare_expr (DR_BASE_ADDRESS (dr));
714 tree base_name = build_fold_indirect_ref (data_ref_base_expr);
715 tree data_ref_base_var;
719 tree addr_base, addr_expr;
721 tree base_offset = unshare_expr (DR_OFFSET (dr));
722 tree init = unshare_expr (DR_INIT (dr));
723 tree vect_ptr_type, addr_expr2;
726 /* Create data_ref_base */
727 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base_expr), "batmp");
728 add_referenced_var (data_ref_base_var);
729 data_ref_base = force_gimple_operand (data_ref_base_expr, &new_base_stmt,
730 true, data_ref_base_var);
731 append_to_statement_list_force(new_base_stmt, new_stmt_list);
733 /* Create base_offset */
734 base_offset = size_binop (PLUS_EXPR, base_offset, init);
735 base_offset = fold_convert (sizetype, base_offset);
736 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
737 add_referenced_var (dest);
738 base_offset = force_gimple_operand (base_offset, &new_stmt, true, dest);
739 append_to_statement_list_force (new_stmt, new_stmt_list);
743 tree tmp = create_tmp_var (sizetype, "offset");
746 /* For interleaved access step we divide STEP by the size of the
747 interleaving group. */
748 if (DR_GROUP_SIZE (stmt_info))
749 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
750 build_int_cst (TREE_TYPE (offset),
751 DR_GROUP_SIZE (stmt_info)));
755 add_referenced_var (tmp);
756 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
757 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
758 base_offset, offset);
759 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
760 append_to_statement_list_force (new_stmt, new_stmt_list);
763 /* base + base_offset */
764 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
767 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
769 /* addr_expr = addr_base */
770 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
771 get_name (base_name));
772 add_referenced_var (addr_expr);
773 vec_stmt = fold_convert (vect_ptr_type, addr_base);
774 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
775 get_name (base_name));
776 add_referenced_var (addr_expr2);
777 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
778 append_to_statement_list_force (new_stmt, new_stmt_list);
780 if (vect_print_dump_info (REPORT_DETAILS))
782 fprintf (vect_dump, "created ");
783 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
789 /* Function vect_create_data_ref_ptr.
791 Create a new pointer to vector type (vp), that points to the first location
792 accessed in the loop by STMT, along with the def-use update chain to
793 appropriately advance the pointer through the loop iterations. Also set
794 aliasing information for the pointer. This vector pointer is used by the
795 callers to this function to create a memory reference expression for vector
799 1. STMT: a stmt that references memory. Expected to be of the form
800 GIMPLE_MODIFY_STMT <name, data-ref> or
801 GIMPLE_MODIFY_STMT <data-ref, name>.
802 2. BSI: block_stmt_iterator where new stmts can be added.
803 3. OFFSET (optional): an offset to be added to the initial address accessed
804 by the data-ref in STMT.
805 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
806 pointing to the initial address.
807 5. TYPE: if not NULL indicates the required type of the data-ref
810 1. Declare a new ptr to vector_type, and have it point to the base of the
811 data reference (initial addressed accessed by the data reference).
812 For example, for vector of type V8HI, the following code is generated:
815 vp = (v8hi *)initial_address;
817 if OFFSET is not supplied:
818 initial_address = &a[init];
819 if OFFSET is supplied:
820 initial_address = &a[init + OFFSET];
822 Return the initial_address in INITIAL_ADDRESS.
824 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
825 update the pointer in each iteration of the loop.
827 Return the increment stmt that updates the pointer in PTR_INCR.
829 3. Return the pointer. */
832 vect_create_data_ref_ptr (tree stmt,
833 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
834 tree offset, tree *initial_address, tree *ptr_incr,
835 bool only_init, tree type)
838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
839 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
840 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
841 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
847 tree new_stmt_list = NULL_TREE;
848 edge pe = loop_preheader_edge (loop);
851 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
853 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
855 if (vect_print_dump_info (REPORT_DETAILS))
857 tree data_ref_base = base_name;
858 fprintf (vect_dump, "create vector-pointer variable to type: ");
859 print_generic_expr (vect_dump, vectype, TDF_SLIM);
860 if (TREE_CODE (data_ref_base) == VAR_DECL)
861 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
862 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
863 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
864 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
865 fprintf (vect_dump, " vectorizing a record based array ref: ");
866 else if (TREE_CODE (data_ref_base) == SSA_NAME)
867 fprintf (vect_dump, " vectorizing a pointer ref: ");
868 print_generic_expr (vect_dump, base_name, TDF_SLIM);
871 /** (1) Create the new vector-pointer variable: **/
873 vect_ptr_type = build_pointer_type (type);
875 vect_ptr_type = build_pointer_type (vectype);
876 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
877 get_name (base_name));
878 add_referenced_var (vect_ptr);
880 /** (2) Add aliasing information to the new vector-pointer:
881 (The points-to info (DR_PTR_INFO) may be defined later.) **/
883 tag = DR_SYMBOL_TAG (dr);
886 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
887 tag must be created with tag added to its may alias list. */
889 new_type_alias (vect_ptr, tag, DR_REF (dr));
891 set_symbol_mem_tag (vect_ptr, tag);
893 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
895 /** (3) Calculate the initial address the vector-pointer, and set
896 the vector-pointer to point to it before the loop: **/
898 /* Create: (&(base[init_val+offset]) in the loop preheader. */
899 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
901 pe = loop_preheader_edge (loop);
902 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
903 gcc_assert (!new_bb);
904 *initial_address = new_temp;
906 /* Create: p = (vectype *) initial_base */
907 vec_stmt = fold_convert (vect_ptr_type, new_temp);
908 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
909 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
910 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
911 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
912 gcc_assert (!new_bb);
915 /** (4) Handle the updating of the vector-pointer inside the loop: **/
917 if (only_init) /* No update in loop is required. */
919 /* Copy the points-to information if it exists. */
920 if (DR_PTR_INFO (dr))
921 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
922 return vect_ptr_init;
926 block_stmt_iterator incr_bsi;
928 tree indx_before_incr, indx_after_incr;
931 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
932 create_iv (vect_ptr_init,
933 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
934 NULL_TREE, loop, &incr_bsi, insert_after,
935 &indx_before_incr, &indx_after_incr);
936 incr = bsi_stmt (incr_bsi);
937 set_stmt_info (stmt_ann (incr),
938 new_stmt_vec_info (incr, loop_vinfo));
940 /* Copy the points-to information if it exists. */
941 if (DR_PTR_INFO (dr))
943 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
944 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
946 merge_alias_info (vect_ptr_init, indx_before_incr);
947 merge_alias_info (vect_ptr_init, indx_after_incr);
951 return indx_before_incr;
956 /* Function bump_vector_ptr
958 Increment a pointer (to a vector type) by vector-size. Connect the new
959 increment stmt to the existing def-use update-chain of the pointer.
961 The pointer def-use update-chain before this function:
962 DATAREF_PTR = phi (p_0, p_2)
964 PTR_INCR: p_2 = DATAREF_PTR + step
966 The pointer def-use update-chain after this function:
967 DATAREF_PTR = phi (p_0, p_2)
969 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
971 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
974 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
976 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
977 The increment amount across iterations is also expected to be
979 BSI - location where the new update stmt is to be placed.
980 STMT - the original scalar memory-access stmt that is being vectorized.
982 Output: Return NEW_DATAREF_PTR as illustrated above.
987 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
990 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
991 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
992 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
993 tree vptr_type = TREE_TYPE (dataref_ptr);
994 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
995 tree update = TYPE_SIZE_UNIT (vectype);
999 tree new_dataref_ptr;
1001 incr_stmt = build_gimple_modify_stmt (ptr_var,
1002 build2 (POINTER_PLUS_EXPR, vptr_type,
1003 dataref_ptr, update));
1004 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1005 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
1006 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
1008 /* Update the vector-pointer's cross-iteration increment. */
1009 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1011 tree use = USE_FROM_PTR (use_p);
1013 if (use == dataref_ptr)
1014 SET_USE (use_p, new_dataref_ptr);
1016 gcc_assert (tree_int_cst_compare (use, update) == 0);
1019 /* Copy the points-to information if it exists. */
1020 if (DR_PTR_INFO (dr))
1021 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1022 merge_alias_info (new_dataref_ptr, dataref_ptr);
1024 return new_dataref_ptr;
1028 /* Function vect_create_destination_var.
1030 Create a new temporary of type VECTYPE. */
1033 vect_create_destination_var (tree scalar_dest, tree vectype)
1036 const char *new_name;
1038 enum vect_var_kind kind;
1040 kind = vectype ? vect_simple_var : vect_scalar_var;
1041 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1043 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1045 new_name = get_name (scalar_dest);
1048 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1049 add_referenced_var (vec_dest);
1055 /* Function vect_init_vector.
1057 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1058 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1059 used in the vectorization of STMT. */
1062 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
1064 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1065 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1066 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1074 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1075 add_referenced_var (new_var);
1077 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1078 new_temp = make_ssa_name (new_var, init_stmt);
1079 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1081 pe = loop_preheader_edge (loop);
1082 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1083 gcc_assert (!new_bb);
1085 if (vect_print_dump_info (REPORT_DETAILS))
1087 fprintf (vect_dump, "created new init_stmt: ");
1088 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1091 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1096 /* Function get_initial_def_for_induction
1099 IV_PHI - the initial value of the induction variable
1102 Return a vector variable, initialized with the first VF values of
1103 the induction variable. E.g., for an iv with IV_PHI='X' and
1104 evolution S, for a vector of 4 units, we want to return:
1105 [X, X + S, X + 2*S, X + 3*S]. */
1108 get_initial_def_for_induction (tree iv_phi)
1110 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1111 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1112 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1113 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1114 tree vectype = get_vectype_for_scalar_type (scalar_type);
1115 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1116 edge pe = loop_preheader_edge (loop);
1118 block_stmt_iterator bsi;
1119 tree vec, vec_init, vec_step, t;
1124 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1125 tree init_expr, step_expr;
1126 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1129 int ncopies = vf / nunits;
1131 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1133 tree stmt = NULL_TREE;
1134 block_stmt_iterator si;
1135 basic_block bb = bb_for_stmt (iv_phi);
1137 gcc_assert (phi_info);
1138 gcc_assert (ncopies >= 1);
1140 /* Find the first insertion point in the BB. */
1141 si = bsi_after_labels (bb);
1142 stmt = bsi_stmt (si);
1144 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
1145 gcc_assert (access_fn);
1146 ok = vect_is_simple_iv_evolution (loop->num, access_fn,
1147 &init_expr, &step_expr);
1150 /* Create the vector that holds the initial_value of the induction. */
1151 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1152 add_referenced_var (new_var);
1154 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1157 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1158 gcc_assert (!new_bb);
1162 t = tree_cons (NULL_TREE, new_name, t);
1163 for (i = 1; i < nunits; i++)
1167 /* Create: new_name = new_name + step_expr */
1168 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1169 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1170 new_name = make_ssa_name (new_var, init_stmt);
1171 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1173 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1174 gcc_assert (!new_bb);
1176 if (vect_print_dump_info (REPORT_DETAILS))
1178 fprintf (vect_dump, "created new init_stmt: ");
1179 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1181 t = tree_cons (NULL_TREE, new_name, t);
1183 vec = build_constructor_from_list (vectype, nreverse (t));
1184 vec_init = vect_init_vector (stmt, vec, vectype);
1187 /* Create the vector that holds the step of the induction. */
1188 expr = build_int_cst (scalar_type, vf);
1189 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1191 for (i = 0; i < nunits; i++)
1192 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1193 vec = build_constructor_from_list (vectype, t);
1194 vec_step = vect_init_vector (stmt, vec, vectype);
1197 /* Create the following def-use cycle:
1199 vec_init = [X, X+S, X+2*S, X+3*S]
1200 vec_step = [VF*S, VF*S, VF*S, VF*S]
1202 vec_iv = PHI <vec_init, vec_loop>
1206 vec_loop = vec_iv + vec_step; */
1208 /* Create the induction-phi that defines the induction-operand. */
1209 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1210 add_referenced_var (vec_dest);
1211 induction_phi = create_phi_node (vec_dest, loop->header);
1212 set_stmt_info (get_stmt_ann (induction_phi),
1213 new_stmt_vec_info (induction_phi, loop_vinfo));
1214 induc_def = PHI_RESULT (induction_phi);
1216 /* Create the iv update inside the loop */
1217 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1218 build2 (PLUS_EXPR, vectype,
1219 induc_def, vec_step));
1220 vec_def = make_ssa_name (vec_dest, new_stmt);
1221 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1222 bsi = bsi_for_stmt (stmt);
1223 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1225 /* Set the arguments of the phi node: */
1226 add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
1227 add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
1230 /* In case the vectorization factor (VF) is bigger than the number
1231 of elements that we can fit in a vectype (nunits), we have to generate
1232 more than one vector stmt - i.e - we need to "unroll" the
1233 vector stmt by a factor VF/nunits. For more details see documentation
1234 in vectorizable_operation. */
1238 stmt_vec_info prev_stmt_vinfo;
1240 /* Create the vector that holds the step of the induction. */
1241 expr = build_int_cst (scalar_type, nunits);
1242 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1244 for (i = 0; i < nunits; i++)
1245 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1246 vec = build_constructor_from_list (vectype, t);
1247 vec_step = vect_init_vector (stmt, vec, vectype);
1249 vec_def = induc_def;
1250 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1251 for (i = 1; i < ncopies; i++)
1255 /* vec_i = vec_prev + vec_{step*nunits} */
1256 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1257 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1258 vec_def = make_ssa_name (vec_dest, new_stmt);
1259 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1260 bsi = bsi_for_stmt (stmt);
1261 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1263 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1264 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1268 if (vect_print_dump_info (REPORT_DETAILS))
1270 fprintf (vect_dump, "transform induction: created def-use cycle:");
1271 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1272 fprintf (vect_dump, "\n");
1273 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1276 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1281 /* Function vect_get_vec_def_for_operand.
1283 OP is an operand in STMT. This function returns a (vector) def that will be
1284 used in the vectorized stmt for STMT.
1286 In the case that OP is an SSA_NAME which is defined in the loop, then
1287 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1289 In case OP is an invariant or constant, a new stmt that creates a vector def
1290 needs to be introduced. */
1293 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1298 stmt_vec_info def_stmt_info = NULL;
1299 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1300 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1301 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1302 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1303 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1309 enum vect_def_type dt;
1313 if (vect_print_dump_info (REPORT_DETAILS))
1315 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1316 print_generic_expr (vect_dump, op, TDF_SLIM);
1319 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1320 gcc_assert (is_simple_use);
1321 if (vect_print_dump_info (REPORT_DETAILS))
1325 fprintf (vect_dump, "def = ");
1326 print_generic_expr (vect_dump, def, TDF_SLIM);
1330 fprintf (vect_dump, " def_stmt = ");
1331 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1337 /* Case 1: operand is a constant. */
1338 case vect_constant_def:
1343 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1344 if (vect_print_dump_info (REPORT_DETAILS))
1345 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1347 for (i = nunits - 1; i >= 0; --i)
1349 t = tree_cons (NULL_TREE, op, t);
1351 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1352 vec_cst = build_vector (vector_type, t);
1354 return vect_init_vector (stmt, vec_cst, vector_type);
1357 /* Case 2: operand is defined outside the loop - loop invariant. */
1358 case vect_invariant_def:
1363 /* Create 'vec_inv = {inv,inv,..,inv}' */
1364 if (vect_print_dump_info (REPORT_DETAILS))
1365 fprintf (vect_dump, "Create vector_inv.");
1367 for (i = nunits - 1; i >= 0; --i)
1369 t = tree_cons (NULL_TREE, def, t);
1372 /* FIXME: use build_constructor directly. */
1373 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1374 vec_inv = build_constructor_from_list (vector_type, t);
1376 return vect_init_vector (stmt, vec_inv, vector_type);
1379 /* Case 3: operand is defined inside the loop. */
1383 *scalar_def = def_stmt;
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info = vinfo_for_stmt (def_stmt);
1387 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1388 gcc_assert (vec_stmt);
1389 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1393 /* Case 4: operand is defined by a loop header phi - reduction */
1394 case vect_reduction_def:
1396 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1398 /* Get the def before the loop */
1399 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1400 return get_initial_def_for_reduction (stmt, op, scalar_def);
1403 /* Case 5: operand is defined by loop-header phi - induction. */
1404 case vect_induction_def:
1406 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1408 /* Get the def before the loop */
1409 return get_initial_def_for_induction (def_stmt);
1418 /* Function vect_get_vec_def_for_stmt_copy
1420 Return a vector-def for an operand. This function is used when the
1421 vectorized stmt to be created (by the caller to this function) is a "copy"
1422 created in case the vectorized result cannot fit in one vector, and several
1423 copies of the vector-stmt are required. In this case the vector-def is
1424 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1425 of the stmt that defines VEC_OPRND.
1426 DT is the type of the vector def VEC_OPRND.
1429 In case the vectorization factor (VF) is bigger than the number
1430 of elements that can fit in a vectype (nunits), we have to generate
1431 more than one vector stmt to vectorize the scalar stmt. This situation
1432 arises when there are multiple data-types operated upon in the loop; the
1433 smallest data-type determines the VF, and as a result, when vectorizing
1434 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1435 vector stmt (each computing a vector of 'nunits' results, and together
1436 computing 'VF' results in each iteration). This function is called when
1437 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1438 which VF=16 and nunits=4, so the number of copies required is 4):
1440 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1442 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1443 VS1.1: vx.1 = memref1 VS1.2
1444 VS1.2: vx.2 = memref2 VS1.3
1445 VS1.3: vx.3 = memref3
1447 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1448 VSnew.1: vz1 = vx.1 + ... VSnew.2
1449 VSnew.2: vz2 = vx.2 + ... VSnew.3
1450 VSnew.3: vz3 = vx.3 + ...
1452 The vectorization of S1 is explained in vectorizable_load.
1453 The vectorization of S2:
1454 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1455 the function 'vect_get_vec_def_for_operand' is called to
1456 get the relevant vector-def for each operand of S2. For operand x it
1457 returns the vector-def 'vx.0'.
1459 To create the remaining copies of the vector-stmt (VSnew.j), this
1460 function is called to get the relevant vector-def for each operand. It is
1461 obtained from the respective VS1.j stmt, which is recorded in the
1462 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1464 For example, to obtain the vector-def 'vx.1' in order to create the
1465 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1466 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1467 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1468 and return its def ('vx.1').
1469 Overall, to create the above sequence this function will be called 3 times:
1470 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1471 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1472 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1475 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1477 tree vec_stmt_for_operand;
1478 stmt_vec_info def_stmt_info;
1480 /* Do nothing; can reuse same def. */
1481 if (dt == vect_invariant_def || dt == vect_constant_def )
1484 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1485 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1486 gcc_assert (def_stmt_info);
1487 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1488 gcc_assert (vec_stmt_for_operand);
1489 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1495 /* Function vect_finish_stmt_generation.
1497 Insert a new stmt. */
1500 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1501 block_stmt_iterator *bsi)
1503 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1504 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1506 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1507 set_stmt_info (get_stmt_ann (vec_stmt),
1508 new_stmt_vec_info (vec_stmt, loop_vinfo));
1510 if (vect_print_dump_info (REPORT_DETAILS))
1512 fprintf (vect_dump, "add new stmt: ");
1513 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
1516 /* Make sure bsi points to the stmt that is being vectorized. */
1517 gcc_assert (stmt == bsi_stmt (*bsi));
1519 #ifdef USE_MAPPED_LOCATION
1520 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
1522 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
1527 /* Function get_initial_def_for_reduction
1530 STMT - a stmt that performs a reduction operation in the loop.
1531 INIT_VAL - the initial value of the reduction variable
1534 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
1535 of the reduction (used for adjusting the epilog - see below).
1536 Return a vector variable, initialized according to the operation that STMT
1537 performs. This vector will be used as the initial value of the
1538 vector of partial results.
1540 Option1 (adjust in epilog): Initialize the vector as follows:
1543 min/max: [init_val,init_val,..,init_val,init_val]
1544 bit and/or: [init_val,init_val,..,init_val,init_val]
1545 and when necessary (e.g. add/mult case) let the caller know
1546 that it needs to adjust the result by init_val.
1548 Option2: Initialize the vector as follows:
1549 add: [0,0,...,0,init_val]
1550 mult: [1,1,...,1,init_val]
1551 min/max: [init_val,init_val,...,init_val]
1552 bit and/or: [init_val,init_val,...,init_val]
1553 and no adjustments are needed.
1555 For example, for the following code:
1561 STMT is 's = s + a[i]', and the reduction variable is 's'.
1562 For a vector of 4 units, we want to return either [0,0,0,init_val],
1563 or [0,0,0,0] and let the caller know that it needs to adjust
1564 the result at the end by 'init_val'.
1566 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
1567 initialization vector is simpler (same element in all entries).
1568 A cost model should help decide between these two schemes. */
1571 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
1573 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1574 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1575 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1576 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1577 tree type = TREE_TYPE (init_val);
1585 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
1586 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
1590 case WIDEN_SUM_EXPR:
1593 *adjustment_def = init_val;
1594 /* Create a vector of zeros for init_def. */
1595 if (INTEGRAL_TYPE_P (type))
1596 def_for_init = build_int_cst (type, 0);
1598 def_for_init = build_real (type, dconst0);
1599 for (i = nunits - 1; i >= 0; --i)
1600 t = tree_cons (NULL_TREE, def_for_init, t);
1601 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
1602 init_def = build_vector (vector_type, t);
1607 *adjustment_def = NULL_TREE;
1619 /* Function vect_create_epilog_for_reduction
1621 Create code at the loop-epilog to finalize the result of a reduction
1624 VECT_DEF is a vector of partial results.
1625 REDUC_CODE is the tree-code for the epilog reduction.
1626 STMT is the scalar reduction stmt that is being vectorized.
1627 REDUCTION_PHI is the phi-node that carries the reduction computation.
1630 1. Creates the reduction def-use cycle: sets the arguments for
1632 The loop-entry argument is the vectorized initial-value of the reduction.
1633 The loop-latch argument is VECT_DEF - the vector of partial sums.
1634 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1635 by applying the operation specified by REDUC_CODE if available, or by
1636 other means (whole-vector shifts or a scalar loop).
1637 The function also creates a new phi node at the loop exit to preserve
1638 loop-closed form, as illustrated below.
1640 The flow at the entry to this function:
1643 vec_def = phi <null, null> # REDUCTION_PHI
1644 VECT_DEF = vector_stmt # vectorized form of STMT
1645 s_loop = scalar_stmt # (scalar) STMT
1647 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1651 The above is transformed by this function into:
1654 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1655 VECT_DEF = vector_stmt # vectorized form of STMT
1656 s_loop = scalar_stmt # (scalar) STMT
1658 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1659 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1660 v_out2 = reduce <v_out1>
1661 s_out3 = extract_field <v_out2, 0>
1662 s_out4 = adjust_result <s_out3>
1668 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1669 enum tree_code reduc_code, tree reduction_phi)
1671 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1673 enum machine_mode mode;
1674 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1675 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1676 basic_block exit_bb;
1680 block_stmt_iterator exit_bsi;
1685 tree new_scalar_dest, exit_phi;
1686 tree bitsize, bitpos, bytesize;
1687 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1688 tree scalar_initial_def;
1689 tree vec_initial_def;
1691 imm_use_iterator imm_iter;
1692 use_operand_p use_p;
1693 bool extract_scalar_result;
1697 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
1700 op_type = TREE_OPERAND_LENGTH (operation);
1701 reduction_op = TREE_OPERAND (operation, op_type-1);
1702 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1703 mode = TYPE_MODE (vectype);
1705 /*** 1. Create the reduction def-use cycle ***/
1707 /* 1.1 set the loop-entry arg of the reduction-phi: */
1708 /* For the case of reduction, vect_get_vec_def_for_operand returns
1709 the scalar def before the loop, that defines the initial value
1710 of the reduction variable. */
1711 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1712 &scalar_initial_def);
1713 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1715 /* 1.2 set the loop-latch arg for the reduction-phi: */
1716 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1718 if (vect_print_dump_info (REPORT_DETAILS))
1720 fprintf (vect_dump, "transform reduction: created def-use cycle:");
1721 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1722 fprintf (vect_dump, "\n");
1723 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1727 /*** 2. Create epilog code
1728 The reduction epilog code operates across the elements of the vector
1729 of partial results computed by the vectorized loop.
1730 The reduction epilog code consists of:
1731 step 1: compute the scalar result in a vector (v_out2)
1732 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1733 step 3: adjust the scalar result (s_out3) if needed.
1735 Step 1 can be accomplished using one the following three schemes:
1736 (scheme 1) using reduc_code, if available.
1737 (scheme 2) using whole-vector shifts, if available.
1738 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1741 The overall epilog code looks like this:
1743 s_out0 = phi <s_loop> # original EXIT_PHI
1744 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1745 v_out2 = reduce <v_out1> # step 1
1746 s_out3 = extract_field <v_out2, 0> # step 2
1747 s_out4 = adjust_result <s_out3> # step 3
1749 (step 3 is optional, and step2 1 and 2 may be combined).
1750 Lastly, the uses of s_out0 are replaced by s_out4.
1754 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1755 v_out1 = phi <v_loop> */
1757 exit_bb = single_exit (loop)->dest;
1758 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1759 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1760 exit_bsi = bsi_after_labels (exit_bb);
1762 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1763 (i.e. when reduc_code is not available) and in the final adjustment
1764 code (if needed). Also get the original scalar reduction variable as
1765 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1766 represents a reduction pattern), the tree-code and scalar-def are
1767 taken from the original stmt that the pattern-stmt (STMT) replaces.
1768 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1769 are taken from STMT. */
1771 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1774 /* Regular reduction */
1779 /* Reduction pattern */
1780 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1781 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1782 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1784 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1785 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1786 scalar_type = TREE_TYPE (scalar_dest);
1787 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1788 bitsize = TYPE_SIZE (scalar_type);
1789 bytesize = TYPE_SIZE_UNIT (scalar_type);
1791 /* 2.3 Create the reduction code, using one of the three schemes described
1794 if (reduc_code < NUM_TREE_CODES)
1798 /*** Case 1: Create:
1799 v_out2 = reduc_expr <v_out1> */
1801 if (vect_print_dump_info (REPORT_DETAILS))
1802 fprintf (vect_dump, "Reduce using direct vector reduction.");
1804 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1805 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
1806 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1807 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1808 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1809 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1811 extract_scalar_result = true;
1815 enum tree_code shift_code = 0;
1816 bool have_whole_vector_shift = true;
1818 int element_bitsize = tree_low_cst (bitsize, 1);
1819 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1822 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
1823 shift_code = VEC_RSHIFT_EXPR;
1825 have_whole_vector_shift = false;
1827 /* Regardless of whether we have a whole vector shift, if we're
1828 emulating the operation via tree-vect-generic, we don't want
1829 to use it. Only the first round of the reduction is likely
1830 to still be profitable via emulation. */
1831 /* ??? It might be better to emit a reduction tree code here, so that
1832 tree-vect-generic can expand the first round via bit tricks. */
1833 if (!VECTOR_MODE_P (mode))
1834 have_whole_vector_shift = false;
1837 optab optab = optab_for_tree_code (code, vectype);
1838 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
1839 have_whole_vector_shift = false;
1842 if (have_whole_vector_shift)
1844 /*** Case 2: Create:
1845 for (offset = VS/2; offset >= element_size; offset/=2)
1847 Create: va' = vec_shift <va, offset>
1848 Create: va = vop <va, va'>
1851 if (vect_print_dump_info (REPORT_DETAILS))
1852 fprintf (vect_dump, "Reduce using vector shifts");
1854 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1855 new_temp = PHI_RESULT (new_phi);
1857 for (bit_offset = vec_size_in_bits/2;
1858 bit_offset >= element_bitsize;
1861 tree bitpos = size_int (bit_offset);
1862 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
1863 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1864 new_name = make_ssa_name (vec_dest, epilog_stmt);
1865 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1866 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1868 tmp = build2 (code, vectype, new_name, new_temp);
1869 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1870 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1871 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1872 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1875 extract_scalar_result = true;
1881 /*** Case 3: Create:
1882 s = extract_field <v_out2, 0>
1883 for (offset = element_size;
1884 offset < vector_size;
1885 offset += element_size;)
1887 Create: s' = extract_field <v_out2, offset>
1888 Create: s = op <s, s'>
1891 if (vect_print_dump_info (REPORT_DETAILS))
1892 fprintf (vect_dump, "Reduce using scalar code. ");
1894 vec_temp = PHI_RESULT (new_phi);
1895 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1896 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1898 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1899 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1900 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1901 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1902 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1904 for (bit_offset = element_bitsize;
1905 bit_offset < vec_size_in_bits;
1906 bit_offset += element_bitsize)
1909 tree bitpos = bitsize_int (bit_offset);
1910 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1913 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1914 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1915 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1916 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1917 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1919 tmp = build2 (code, scalar_type, new_name, new_temp);
1920 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1921 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1922 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1923 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1926 extract_scalar_result = false;
1930 /* 2.4 Extract the final scalar result. Create:
1931 s_out3 = extract_field <v_out2, bitpos> */
1933 if (extract_scalar_result)
1937 if (vect_print_dump_info (REPORT_DETAILS))
1938 fprintf (vect_dump, "extract scalar result");
1940 if (BYTES_BIG_ENDIAN)
1941 bitpos = size_binop (MULT_EXPR,
1942 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1943 TYPE_SIZE (scalar_type));
1945 bitpos = bitsize_zero_node;
1947 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1948 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1949 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1950 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1951 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1952 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1955 /* 2.4 Adjust the final result by the initial value of the reduction
1956 variable. (When such adjustment is not needed, then
1957 'scalar_initial_def' is zero).
1960 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1962 if (scalar_initial_def)
1964 tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
1965 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1966 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1967 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1968 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1971 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1973 /* Find the loop-closed-use at the loop exit of the original scalar result.
1974 (The reduction result is expected to have two immediate uses - one at the
1975 latch block, and one at the loop exit). */
1977 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1979 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1981 exit_phi = USE_STMT (use_p);
1985 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1986 gcc_assert (exit_phi);
1987 /* Replace the uses: */
1988 orig_name = PHI_RESULT (exit_phi);
1989 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1990 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1991 SET_USE (use_p, new_temp);
1995 /* Function vectorizable_reduction.
1997 Check if STMT performs a reduction operation that can be vectorized.
1998 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1999 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2000 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2002 This function also handles reduction idioms (patterns) that have been
2003 recognized in advance during vect_pattern_recog. In this case, STMT may be
2005 X = pattern_expr (arg0, arg1, ..., X)
2006 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2007 sequence that had been detected and replaced by the pattern-stmt (STMT).
2009 In some cases of reduction patterns, the type of the reduction variable X is
2010 different than the type of the other arguments of STMT.
2011 In such cases, the vectype that is used when transforming STMT into a vector
2012 stmt is different than the vectype that is used to determine the
2013 vectorization factor, because it consists of a different number of elements
2014 than the actual number of elements that are being operated upon in parallel.
2016 For example, consider an accumulation of shorts into an int accumulator.
2017 On some targets it's possible to vectorize this pattern operating on 8
2018 shorts at a time (hence, the vectype for purposes of determining the
2019 vectorization factor should be V8HI); on the other hand, the vectype that
2020 is used to create the vector form is actually V4SI (the type of the result).
2022 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2023 indicates what is the actual level of parallelism (V8HI in the example), so
2024 that the right vectorization factor would be derived. This vectype
2025 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2026 be used to create the vectorized stmt. The right vectype for the vectorized
2027 stmt is obtained from the type of the result X:
2028 get_vectype_for_scalar_type (TREE_TYPE (X))
2030 This means that, contrary to "regular" reductions (or "regular" stmts in
2031 general), the following equation:
2032 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2033 does *NOT* necessarily hold for reduction patterns. */
2036 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2041 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2042 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2043 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2044 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2045 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2047 enum tree_code code, orig_code, epilog_reduc_code = 0;
2048 enum machine_mode vec_mode;
2050 optab optab, reduc_optab;
2051 tree new_temp = NULL_TREE;
2053 enum vect_def_type dt;
2058 stmt_vec_info orig_stmt_info;
2059 tree expr = NULL_TREE;
2061 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2062 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2063 stmt_vec_info prev_stmt_info;
2065 tree new_stmt = NULL_TREE;
2068 gcc_assert (ncopies >= 1);
2070 /* 1. Is vectorizable reduction? */
2072 /* Not supportable if the reduction variable is used in the loop. */
2073 if (STMT_VINFO_RELEVANT_P (stmt_info))
2076 if (!STMT_VINFO_LIVE_P (stmt_info))
2079 /* Make sure it was already recognized as a reduction computation. */
2080 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2083 /* 2. Has this been recognized as a reduction pattern?
2085 Check if STMT represents a pattern that has been recognized
2086 in earlier analysis stages. For stmts that represent a pattern,
2087 the STMT_VINFO_RELATED_STMT field records the last stmt in
2088 the original sequence that constitutes the pattern. */
2090 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2093 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2094 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2095 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2096 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2099 /* 3. Check the operands of the operation. The first operands are defined
2100 inside the loop body. The last operand is the reduction variable,
2101 which is defined by the loop-header-phi. */
2103 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2105 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2106 code = TREE_CODE (operation);
2107 op_type = TREE_OPERAND_LENGTH (operation);
2108 if (op_type != binary_op && op_type != ternary_op)
2110 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2111 scalar_type = TREE_TYPE (scalar_dest);
2113 /* All uses but the last are expected to be defined in the loop.
2114 The last use is the reduction variable. */
2115 for (i = 0; i < op_type-1; i++)
2117 op = TREE_OPERAND (operation, i);
2118 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2119 gcc_assert (is_simple_use);
2120 if (dt != vect_loop_def
2121 && dt != vect_invariant_def
2122 && dt != vect_constant_def
2123 && dt != vect_induction_def)
2127 op = TREE_OPERAND (operation, i);
2128 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2129 gcc_assert (is_simple_use);
2130 gcc_assert (dt == vect_reduction_def);
2131 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2133 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
2135 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
2137 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2140 /* 4. Supportable by target? */
2142 /* 4.1. check support for the operation in the loop */
2143 optab = optab_for_tree_code (code, vectype);
2146 if (vect_print_dump_info (REPORT_DETAILS))
2147 fprintf (vect_dump, "no optab.");
2150 vec_mode = TYPE_MODE (vectype);
2151 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
2153 if (vect_print_dump_info (REPORT_DETAILS))
2154 fprintf (vect_dump, "op not supported by target.");
2155 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2156 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2157 < vect_min_worthwhile_factor (code))
2159 if (vect_print_dump_info (REPORT_DETAILS))
2160 fprintf (vect_dump, "proceeding using word mode.");
2163 /* Worthwhile without SIMD support? */
2164 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2165 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2166 < vect_min_worthwhile_factor (code))
2168 if (vect_print_dump_info (REPORT_DETAILS))
2169 fprintf (vect_dump, "not worthwhile without SIMD support.");
2173 /* 4.2. Check support for the epilog operation.
2175 If STMT represents a reduction pattern, then the type of the
2176 reduction variable may be different than the type of the rest
2177 of the arguments. For example, consider the case of accumulation
2178 of shorts into an int accumulator; The original code:
2179 S1: int_a = (int) short_a;
2180 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2183 STMT: int_acc = widen_sum <short_a, int_acc>
2186 1. The tree-code that is used to create the vector operation in the
2187 epilog code (that reduces the partial results) is not the
2188 tree-code of STMT, but is rather the tree-code of the original
2189 stmt from the pattern that STMT is replacing. I.e, in the example
2190 above we want to use 'widen_sum' in the loop, but 'plus' in the
2192 2. The type (mode) we use to check available target support
2193 for the vector operation to be created in the *epilog*, is
2194 determined by the type of the reduction variable (in the example
2195 above we'd check this: plus_optab[vect_int_mode]).
2196 However the type (mode) we use to check available target support
2197 for the vector operation to be created *inside the loop*, is
2198 determined by the type of the other arguments to STMT (in the
2199 example we'd check this: widen_sum_optab[vect_short_mode]).
2201 This is contrary to "regular" reductions, in which the types of all
2202 the arguments are the same as the type of the reduction variable.
2203 For "regular" reductions we can therefore use the same vector type
2204 (and also the same tree-code) when generating the epilog code and
2205 when generating the code inside the loop. */
2209 /* This is a reduction pattern: get the vectype from the type of the
2210 reduction variable, and get the tree-code from orig_stmt. */
2211 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2212 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2213 vec_mode = TYPE_MODE (vectype);
2217 /* Regular reduction: use the same vectype and tree-code as used for
2218 the vector code inside the loop can be used for the epilog code. */
2222 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2224 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2227 if (vect_print_dump_info (REPORT_DETAILS))
2228 fprintf (vect_dump, "no optab for reduction.");
2229 epilog_reduc_code = NUM_TREE_CODES;
2231 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
2233 if (vect_print_dump_info (REPORT_DETAILS))
2234 fprintf (vect_dump, "reduc op not supported by target.");
2235 epilog_reduc_code = NUM_TREE_CODES;
2238 if (!vec_stmt) /* transformation not required. */
2240 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2241 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2247 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "transform reduction.");
2250 /* Create the destination vector */
2251 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2253 /* Create the reduction-phi that defines the reduction-operand. */
2254 new_phi = create_phi_node (vec_dest, loop->header);
2256 /* In case the vectorization factor (VF) is bigger than the number
2257 of elements that we can fit in a vectype (nunits), we have to generate
2258 more than one vector stmt - i.e - we need to "unroll" the
2259 vector stmt by a factor VF/nunits. For more details see documentation
2260 in vectorizable_operation. */
2262 prev_stmt_info = NULL;
2263 for (j = 0; j < ncopies; j++)
2268 op = TREE_OPERAND (operation, 0);
2269 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2270 if (op_type == ternary_op)
2272 op = TREE_OPERAND (operation, 1);
2273 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2276 /* Get the vector def for the reduction variable from the phi node */
2277 reduc_def = PHI_RESULT (new_phi);
2281 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2282 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2283 if (op_type == ternary_op)
2284 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2286 /* Get the vector def for the reduction variable from the vectorized
2287 reduction operation generated in the previous iteration (j-1) */
2288 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2291 /* Arguments are ready. create the new vector stmt. */
2292 if (op_type == binary_op)
2293 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2295 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2297 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2298 new_temp = make_ssa_name (vec_dest, new_stmt);
2299 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2300 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2303 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2305 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2306 prev_stmt_info = vinfo_for_stmt (new_stmt);
2309 /* Finalize the reduction-phi (set it's arguments) and create the
2310 epilog reduction code. */
2311 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2315 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2316 a function declaration if the target has a vectorized version
2317 of the function, or NULL_TREE if the function cannot be vectorized. */
2320 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2322 tree fndecl = get_callee_fndecl (call);
2323 enum built_in_function code;
2325 /* We only handle functions that do not read or clobber memory -- i.e.
2326 const or novops ones. */
2327 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2331 || TREE_CODE (fndecl) != FUNCTION_DECL
2332 || !DECL_BUILT_IN (fndecl))
2335 code = DECL_FUNCTION_CODE (fndecl);
2336 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2340 /* Function vectorizable_call.
2342 Check if STMT performs a function call that can be vectorized.
2343 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2344 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2345 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2348 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2354 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2355 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2356 tree vectype_out, vectype_in;
2359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2360 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2361 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2363 int ncopies, j, nargs;
2364 call_expr_arg_iterator iter;
2366 enum { NARROW, NONE, WIDEN } modifier;
2368 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2371 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2374 /* FORNOW: not yet supported. */
2375 if (STMT_VINFO_LIVE_P (stmt_info))
2377 if (vect_print_dump_info (REPORT_DETAILS))
2378 fprintf (vect_dump, "value used after loop.");
2382 /* Is STMT a vectorizable call? */
2383 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2386 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2389 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2390 if (TREE_CODE (operation) != CALL_EXPR)
2393 /* Process function arguments. */
2394 rhs_type = NULL_TREE;
2396 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2398 /* Bail out if the function has more than two arguments, we
2399 do not have interesting builtin functions to vectorize with
2400 more than two arguments. */
2404 /* We can only handle calls with arguments of the same type. */
2406 && rhs_type != TREE_TYPE (op))
2408 if (vect_print_dump_info (REPORT_DETAILS))
2409 fprintf (vect_dump, "argument types differ.");
2412 rhs_type = TREE_TYPE (op);
2414 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
2416 if (vect_print_dump_info (REPORT_DETAILS))
2417 fprintf (vect_dump, "use not simple.");
2424 /* No arguments is also not good. */
2428 vectype_in = get_vectype_for_scalar_type (rhs_type);
2429 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2431 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2432 vectype_out = get_vectype_for_scalar_type (lhs_type);
2433 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2436 if (nunits_in == nunits_out / 2)
2438 else if (nunits_out == nunits_in)
2440 else if (nunits_out == nunits_in / 2)
2445 /* For now, we only vectorize functions if a target specific builtin
2446 is available. TODO -- in some cases, it might be profitable to
2447 insert the calls for pieces of the vector, in order to be able
2448 to vectorize other operations in the loop. */
2449 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
2450 if (fndecl == NULL_TREE)
2452 if (vect_print_dump_info (REPORT_DETAILS))
2453 fprintf (vect_dump, "function is not vectorizable.");
2458 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
2460 if (modifier == NARROW)
2461 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2463 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2465 /* Sanity check: make sure that at least one copy of the vectorized stmt
2466 needs to be generated. */
2467 gcc_assert (ncopies >= 1);
2469 if (!vec_stmt) /* transformation not required. */
2471 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2472 if (vect_print_dump_info (REPORT_DETAILS))
2473 fprintf (vect_dump, "=== vectorizable_call ===");
2474 vect_model_simple_cost (stmt_info, ncopies, dt);
2480 if (vect_print_dump_info (REPORT_DETAILS))
2481 fprintf (vect_dump, "transform operation.");
2484 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2485 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2487 prev_stmt_info = NULL;
2491 for (j = 0; j < ncopies; ++j)
2493 /* Build argument list for the vectorized call. */
2494 /* FIXME: Rewrite this so that it doesn't
2495 construct a temporary list. */
2498 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2502 = vect_get_vec_def_for_operand (op, stmt, NULL);
2505 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2507 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2511 vargs = nreverse (vargs);
2513 rhs = build_function_call_expr (fndecl, vargs);
2514 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2515 new_temp = make_ssa_name (vec_dest, new_stmt);
2516 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2518 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2521 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2523 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2525 prev_stmt_info = vinfo_for_stmt (new_stmt);
2531 for (j = 0; j < ncopies; ++j)
2533 /* Build argument list for the vectorized call. */
2534 /* FIXME: Rewrite this so that it doesn't
2535 construct a temporary list. */
2538 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2543 = vect_get_vec_def_for_operand (op, stmt, NULL);
2545 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2550 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
2552 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2555 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2556 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
2560 vargs = nreverse (vargs);
2562 rhs = build_function_call_expr (fndecl, vargs);
2563 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2564 new_temp = make_ssa_name (vec_dest, new_stmt);
2565 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2567 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2570 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2572 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2574 prev_stmt_info = vinfo_for_stmt (new_stmt);
2577 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2582 /* No current target implements this case. */
2586 /* The call in STMT might prevent it from being removed in dce.
2587 We however cannot remove it here, due to the way the ssa name
2588 it defines is mapped to the new definition. So just replace
2589 rhs of the statement with something harmless. */
2590 type = TREE_TYPE (scalar_dest);
2591 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
2598 /* Function vect_gen_widened_results_half
2600 Create a vector stmt whose code, type, number of arguments, and result
2601 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2602 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2603 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2604 needs to be created (DECL is a function-decl of a target-builtin).
2605 STMT is the original scalar stmt that we are vectorizing. */
2608 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2609 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2610 tree vec_dest, block_stmt_iterator *bsi,
2619 /* Generate half of the widened result: */
2620 if (code == CALL_EXPR)
2622 /* Target specific support */
2623 if (op_type == binary_op)
2624 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
2626 expr = build_call_expr (decl, 1, vec_oprnd0);
2630 /* Generic support */
2631 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2632 if (op_type == binary_op)
2633 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2635 expr = build1 (code, vectype, vec_oprnd0);
2637 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2638 new_temp = make_ssa_name (vec_dest, new_stmt);
2639 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2640 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2642 if (code == CALL_EXPR)
2644 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2646 if (TREE_CODE (sym) == SSA_NAME)
2647 sym = SSA_NAME_VAR (sym);
2648 mark_sym_for_renaming (sym);
2656 /* Function vectorizable_conversion.
2658 Check if STMT performs a conversion operation, that can be vectorized.
2659 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2660 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2661 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2664 vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
2671 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2673 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2674 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2675 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2678 enum vect_def_type dt0;
2680 stmt_vec_info prev_stmt_info;
2683 tree vectype_out, vectype_in;
2686 tree rhs_type, lhs_type;
2688 enum { NARROW, NONE, WIDEN } modifier;
2690 /* Is STMT a vectorizable conversion? */
2692 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2695 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2698 if (STMT_VINFO_LIVE_P (stmt_info))
2700 /* FORNOW: not yet supported. */
2701 if (vect_print_dump_info (REPORT_DETAILS))
2702 fprintf (vect_dump, "value used after loop.");
2706 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2709 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2712 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2713 code = TREE_CODE (operation);
2714 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2717 /* Check types of lhs and rhs */
2718 op0 = TREE_OPERAND (operation, 0);
2719 rhs_type = TREE_TYPE (op0);
2720 vectype_in = get_vectype_for_scalar_type (rhs_type);
2721 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2723 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2724 lhs_type = TREE_TYPE (scalar_dest);
2725 vectype_out = get_vectype_for_scalar_type (lhs_type);
2726 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2729 if (nunits_in == nunits_out / 2)
2731 else if (nunits_out == nunits_in)
2733 else if (nunits_out == nunits_in / 2)
2738 if (modifier == NONE)
2739 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
2741 /* Bail out if the types are both integral or non-integral */
2742 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
2743 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
2746 if (modifier == NARROW)
2747 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2749 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2751 /* Sanity check: make sure that at least one copy of the vectorized stmt
2752 needs to be generated. */
2753 gcc_assert (ncopies >= 1);
2755 /* Check the operands of the operation. */
2756 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2758 if (vect_print_dump_info (REPORT_DETAILS))
2759 fprintf (vect_dump, "use not simple.");
2763 /* Supportable by target? */
2764 if ((modifier == NONE
2765 && !targetm.vectorize.builtin_conversion (code, vectype_in))
2766 || (modifier == WIDEN
2767 && !supportable_widening_operation (code, stmt, vectype_in,
2770 || (modifier == NARROW
2771 && !supportable_narrowing_operation (code, stmt, vectype_in,
2774 if (vect_print_dump_info (REPORT_DETAILS))
2775 fprintf (vect_dump, "op not supported by target.");
2779 if (modifier != NONE)
2780 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2782 if (!vec_stmt) /* transformation not required. */
2784 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2789 if (vect_print_dump_info (REPORT_DETAILS))
2790 fprintf (vect_dump, "transform conversion.");
2793 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2795 prev_stmt_info = NULL;
2799 for (j = 0; j < ncopies; j++)
2805 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2807 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2810 targetm.vectorize.builtin_conversion (code, vectype_in);
2811 new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0);
2813 /* Arguments are ready. create the new vector stmt. */
2814 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
2815 new_temp = make_ssa_name (vec_dest, new_stmt);
2816 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2817 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2818 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2820 if (TREE_CODE (sym) == SSA_NAME)
2821 sym = SSA_NAME_VAR (sym);
2822 mark_sym_for_renaming (sym);
2826 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2828 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2829 prev_stmt_info = vinfo_for_stmt (new_stmt);
2834 /* In case the vectorization factor (VF) is bigger than the number
2835 of elements that we can fit in a vectype (nunits), we have to
2836 generate more than one vector stmt - i.e - we need to "unroll"
2837 the vector stmt by a factor VF/nunits. */
2838 for (j = 0; j < ncopies; j++)
2841 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2843 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2845 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2847 /* Generate first half of the widened result: */
2849 = vect_gen_widened_results_half (code1, vectype_out, decl1,
2850 vec_oprnd0, vec_oprnd1,
2851 unary_op, vec_dest, bsi, stmt);
2853 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2855 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2856 prev_stmt_info = vinfo_for_stmt (new_stmt);
2858 /* Generate second half of the widened result: */
2860 = vect_gen_widened_results_half (code2, vectype_out, decl2,
2861 vec_oprnd0, vec_oprnd1,
2862 unary_op, vec_dest, bsi, stmt);
2863 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2864 prev_stmt_info = vinfo_for_stmt (new_stmt);
2869 /* In case the vectorization factor (VF) is bigger than the number
2870 of elements that we can fit in a vectype (nunits), we have to
2871 generate more than one vector stmt - i.e - we need to "unroll"
2872 the vector stmt by a factor VF/nunits. */
2873 for (j = 0; j < ncopies; j++)
2878 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2879 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2883 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2884 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2887 /* Arguments are ready. Create the new vector stmt. */
2888 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2889 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2890 new_temp = make_ssa_name (vec_dest, new_stmt);
2891 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2892 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2895 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2897 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2899 prev_stmt_info = vinfo_for_stmt (new_stmt);
2902 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2908 /* Function vectorizable_assignment.
2910 Check if STMT performs an assignment (copy) that can be vectorized.
2911 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2912 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2913 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2916 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2922 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2923 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2924 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2927 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2928 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2929 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2931 gcc_assert (ncopies >= 1);
2933 return false; /* FORNOW */
2935 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2938 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2941 /* FORNOW: not yet supported. */
2942 if (STMT_VINFO_LIVE_P (stmt_info))
2944 if (vect_print_dump_info (REPORT_DETAILS))
2945 fprintf (vect_dump, "value used after loop.");
2949 /* Is vectorizable assignment? */
2950 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2953 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2954 if (TREE_CODE (scalar_dest) != SSA_NAME)
2957 op = GIMPLE_STMT_OPERAND (stmt, 1);
2958 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
2960 if (vect_print_dump_info (REPORT_DETAILS))
2961 fprintf (vect_dump, "use not simple.");
2965 if (!vec_stmt) /* transformation not required. */
2967 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2968 if (vect_print_dump_info (REPORT_DETAILS))
2969 fprintf (vect_dump, "=== vectorizable_assignment ===");
2970 vect_model_simple_cost (stmt_info, ncopies, dt);
2975 if (vect_print_dump_info (REPORT_DETAILS))
2976 fprintf (vect_dump, "transform assignment.");
2979 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2982 op = GIMPLE_STMT_OPERAND (stmt, 1);
2983 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2985 /* Arguments are ready. create the new vector stmt. */
2986 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd);
2987 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2988 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
2989 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2995 /* Function vect_min_worthwhile_factor.
2997 For a loop where we could vectorize the operation indicated by CODE,
2998 return the minimum vectorization factor that makes it worthwhile
2999 to use generic vectors. */
3001 vect_min_worthwhile_factor (enum tree_code code)
3022 /* Function vectorizable_induction
3024 Check if PHI performs an induction computation that can be vectorized.
3025 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3026 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3027 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3030 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3033 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3034 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3035 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3036 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3037 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3040 gcc_assert (ncopies >= 1);
3042 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3045 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3047 if (STMT_VINFO_LIVE_P (stmt_info))
3049 /* FORNOW: not yet supported. */
3050 if (vect_print_dump_info (REPORT_DETAILS))
3051 fprintf (vect_dump, "value used after loop.");
3055 if (TREE_CODE (phi) != PHI_NODE)
3058 if (!vec_stmt) /* transformation not required. */
3060 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3061 if (vect_print_dump_info (REPORT_DETAILS))
3062 fprintf (vect_dump, "=== vectorizable_induction ===");
3063 vect_model_induction_cost (stmt_info, ncopies);
3069 if (vect_print_dump_info (REPORT_DETAILS))
3070 fprintf (vect_dump, "transform induction phi.");
3072 vec_def = get_initial_def_for_induction (phi);
3073 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3078 /* Function vectorizable_operation.
3080 Check if STMT performs a binary or unary operation that can be vectorized.
3081 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3082 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3083 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3086 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3091 tree op0, op1 = NULL;
3092 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3093 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3094 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3095 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3096 enum tree_code code;
3097 enum machine_mode vec_mode;
3102 enum machine_mode optab_op2_mode;
3104 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3106 stmt_vec_info prev_stmt_info;
3107 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3110 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3113 gcc_assert (ncopies >= 1);
3115 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3118 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3121 /* FORNOW: not yet supported. */
3122 if (STMT_VINFO_LIVE_P (stmt_info))
3124 if (vect_print_dump_info (REPORT_DETAILS))
3125 fprintf (vect_dump, "value used after loop.");
3129 /* Is STMT a vectorizable binary/unary operation? */
3130 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3133 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3136 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3137 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3138 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3139 if (nunits_out != nunits_in)
3142 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3143 code = TREE_CODE (operation);
3145 /* For pointer addition, we should use the normal plus for
3146 the vector addition. */
3147 if (code == POINTER_PLUS_EXPR)
3150 optab = optab_for_tree_code (code, vectype);
3152 /* Support only unary or binary operations. */
3153 op_type = TREE_OPERAND_LENGTH (operation);
3154 if (op_type != unary_op && op_type != binary_op)
3156 if (vect_print_dump_info (REPORT_DETAILS))
3157 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3161 op0 = TREE_OPERAND (operation, 0);
3162 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3164 if (vect_print_dump_info (REPORT_DETAILS))
3165 fprintf (vect_dump, "use not simple.");
3169 if (op_type == binary_op)
3171 op1 = TREE_OPERAND (operation, 1);
3172 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3174 if (vect_print_dump_info (REPORT_DETAILS))
3175 fprintf (vect_dump, "use not simple.");
3180 /* Supportable by target? */
3183 if (vect_print_dump_info (REPORT_DETAILS))
3184 fprintf (vect_dump, "no optab.");
3187 vec_mode = TYPE_MODE (vectype);
3188 icode = (int) optab_handler (optab, vec_mode)->insn_code;
3189 if (icode == CODE_FOR_nothing)
3191 if (vect_print_dump_info (REPORT_DETAILS))
3192 fprintf (vect_dump, "op not supported by target.");
3193 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3194 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3195 < vect_min_worthwhile_factor (code))
3197 if (vect_print_dump_info (REPORT_DETAILS))
3198 fprintf (vect_dump, "proceeding using word mode.");
3201 /* Worthwhile without SIMD support? */
3202 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3203 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3204 < vect_min_worthwhile_factor (code))
3206 if (vect_print_dump_info (REPORT_DETAILS))
3207 fprintf (vect_dump, "not worthwhile without SIMD support.");
3211 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3213 /* FORNOW: not yet supported. */
3214 if (!VECTOR_MODE_P (vec_mode))
3217 /* Invariant argument is needed for a vector shift
3218 by a scalar shift operand. */
3219 optab_op2_mode = insn_data[icode].operand[2].mode;
3220 if (! (VECTOR_MODE_P (optab_op2_mode)
3221 || dt[1] == vect_constant_def
3222 || dt[1] == vect_invariant_def))
3224 if (vect_print_dump_info (REPORT_DETAILS))
3225 fprintf (vect_dump, "operand mode requires invariant argument.");
3230 if (!vec_stmt) /* transformation not required. */
3232 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3233 if (vect_print_dump_info (REPORT_DETAILS))
3234 fprintf (vect_dump, "=== vectorizable_operation ===");
3235 vect_model_simple_cost (stmt_info, ncopies, dt);
3241 if (vect_print_dump_info (REPORT_DETAILS))
3242 fprintf (vect_dump, "transform binary/unary operation.");
3245 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3247 /* In case the vectorization factor (VF) is bigger than the number
3248 of elements that we can fit in a vectype (nunits), we have to generate
3249 more than one vector stmt - i.e - we need to "unroll" the
3250 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3251 from one copy of the vector stmt to the next, in the field
3252 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3253 stages to find the correct vector defs to be used when vectorizing
3254 stmts that use the defs of the current stmt. The example below illustrates
3255 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3256 4 vectorized stmts):
3258 before vectorization:
3259 RELATED_STMT VEC_STMT
3263 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3265 RELATED_STMT VEC_STMT
3266 VS1_0: vx0 = memref0 VS1_1 -
3267 VS1_1: vx1 = memref1 VS1_2 -
3268 VS1_2: vx2 = memref2 VS1_3 -
3269 VS1_3: vx3 = memref3 - -
3270 S1: x = load - VS1_0
3273 step2: vectorize stmt S2 (done here):
3274 To vectorize stmt S2 we first need to find the relevant vector
3275 def for the first operand 'x'. This is, as usual, obtained from
3276 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3277 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3278 relevant vector def 'vx0'. Having found 'vx0' we can generate
3279 the vector stmt VS2_0, and as usual, record it in the
3280 STMT_VINFO_VEC_STMT of stmt S2.
3281 When creating the second copy (VS2_1), we obtain the relevant vector
3282 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3283 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3284 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3285 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3286 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3287 chain of stmts and pointers:
3288 RELATED_STMT VEC_STMT
3289 VS1_0: vx0 = memref0 VS1_1 -
3290 VS1_1: vx1 = memref1 VS1_2 -
3291 VS1_2: vx2 = memref2 VS1_3 -
3292 VS1_3: vx3 = memref3 - -
3293 S1: x = load - VS1_0
3294 VS2_0: vz0 = vx0 + v1 VS2_1 -
3295 VS2_1: vz1 = vx1 + v1 VS2_2 -
3296 VS2_2: vz2 = vx2 + v1 VS2_3 -
3297 VS2_3: vz3 = vx3 + v1 - -
3298 S2: z = x + 1 - VS2_0 */
3300 prev_stmt_info = NULL;
3301 for (j = 0; j < ncopies; j++)
3306 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3307 if (op_type == binary_op)
3309 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3311 /* Vector shl and shr insn patterns can be defined with
3312 scalar operand 2 (shift operand). In this case, use
3313 constant or loop invariant op1 directly, without
3314 extending it to vector mode first. */
3315 optab_op2_mode = insn_data[icode].operand[2].mode;
3316 if (!VECTOR_MODE_P (optab_op2_mode))
3318 if (vect_print_dump_info (REPORT_DETAILS))
3319 fprintf (vect_dump, "operand 1 using scalar mode.");
3324 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3329 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3330 if (op_type == binary_op)
3331 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3334 /* Arguments are ready. create the new vector stmt. */
3336 if (op_type == binary_op)
3337 new_stmt = build_gimple_modify_stmt (vec_dest,
3338 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
3340 new_stmt = build_gimple_modify_stmt (vec_dest,
3341 build1 (code, vectype, vec_oprnd0));
3342 new_temp = make_ssa_name (vec_dest, new_stmt);
3343 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3344 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3347 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3349 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3350 prev_stmt_info = vinfo_for_stmt (new_stmt);
3357 /* Function vectorizable_type_demotion
3359 Check if STMT performs a binary or unary operation that involves
3360 type demotion, and if it can be vectorized.
3361 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3362 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3363 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3366 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
3373 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3375 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3376 enum tree_code code, code1 = ERROR_MARK;
3379 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3381 stmt_vec_info prev_stmt_info;
3390 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3393 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3396 /* FORNOW: not yet supported. */
3397 if (STMT_VINFO_LIVE_P (stmt_info))
3399 if (vect_print_dump_info (REPORT_DETAILS))
3400 fprintf (vect_dump, "value used after loop.");
3404 /* Is STMT a vectorizable type-demotion operation? */
3405 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3408 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3411 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3412 code = TREE_CODE (operation);
3413 if (code != NOP_EXPR && code != CONVERT_EXPR)
3416 op0 = TREE_OPERAND (operation, 0);
3417 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3418 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3420 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3421 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3422 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3423 if (nunits_in != nunits_out / 2) /* FORNOW */
3426 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3427 gcc_assert (ncopies >= 1);
3429 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3430 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3431 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3432 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3433 && (code == NOP_EXPR || code == CONVERT_EXPR))))
3436 /* Check the operands of the operation. */
3437 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3439 if (vect_print_dump_info (REPORT_DETAILS))
3440 fprintf (vect_dump, "use not simple.");
3444 /* Supportable by target? */
3445 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
3448 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3450 if (!vec_stmt) /* transformation not required. */
3452 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3453 if (vect_print_dump_info (REPORT_DETAILS))
3454 fprintf (vect_dump, "=== vectorizable_demotion ===");
3455 vect_model_simple_cost (stmt_info, ncopies, dt);
3460 if (vect_print_dump_info (REPORT_DETAILS))
3461 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3465 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3467 /* In case the vectorization factor (VF) is bigger than the number
3468 of elements that we can fit in a vectype (nunits), we have to generate
3469 more than one vector stmt - i.e - we need to "unroll" the
3470 vector stmt by a factor VF/nunits. */
3471 prev_stmt_info = NULL;
3472 for (j = 0; j < ncopies; j++)
3477 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3478 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3482 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3483 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3486 /* Arguments are ready. Create the new vector stmt. */
3487 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3488 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3489 new_temp = make_ssa_name (vec_dest, new_stmt);
3490 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3491 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3494 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3496 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3498 prev_stmt_info = vinfo_for_stmt (new_stmt);
3501 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3506 /* Function vectorizable_type_promotion
3508 Check if STMT performs a binary or unary operation that involves
3509 type promotion, and if it can be vectorized.
3510 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3511 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3512 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3515 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
3521 tree op0, op1 = NULL;
3522 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3523 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3524 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3525 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3526 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3529 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3531 stmt_vec_info prev_stmt_info;
3539 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3542 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3545 /* FORNOW: not yet supported. */
3546 if (STMT_VINFO_LIVE_P (stmt_info))
3548 if (vect_print_dump_info (REPORT_DETAILS))
3549 fprintf (vect_dump, "value used after loop.");
3553 /* Is STMT a vectorizable type-promotion operation? */
3554 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3557 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3560 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3561 code = TREE_CODE (operation);
3562 if (code != NOP_EXPR && code != CONVERT_EXPR
3563 && code != WIDEN_MULT_EXPR)
3566 op0 = TREE_OPERAND (operation, 0);
3567 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3568 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3570 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3571 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3572 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3573 if (nunits_out != nunits_in / 2) /* FORNOW */
3576 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3577 gcc_assert (ncopies >= 1);
3579 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3580 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3581 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3582 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3583 && (code == CONVERT_EXPR || code == NOP_EXPR))))
3586 /* Check the operands of the operation. */
3587 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3589 if (vect_print_dump_info (REPORT_DETAILS))
3590 fprintf (vect_dump, "use not simple.");
3594 op_type = TREE_CODE_LENGTH (code);
3595 if (op_type == binary_op)
3597 op1 = TREE_OPERAND (operation, 1);
3598 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3600 if (vect_print_dump_info (REPORT_DETAILS))
3601 fprintf (vect_dump, "use not simple.");
3606 /* Supportable by target? */
3607 if (!supportable_widening_operation (code, stmt, vectype_in,
3608 &decl1, &decl2, &code1, &code2))
3611 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3613 if (!vec_stmt) /* transformation not required. */
3615 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3616 if (vect_print_dump_info (REPORT_DETAILS))
3617 fprintf (vect_dump, "=== vectorizable_promotion ===");
3618 vect_model_simple_cost (stmt_info, 2*ncopies, dt);
3624 if (vect_print_dump_info (REPORT_DETAILS))
3625 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3629 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3631 /* In case the vectorization factor (VF) is bigger than the number
3632 of elements that we can fit in a vectype (nunits), we have to generate
3633 more than one vector stmt - i.e - we need to "unroll" the
3634 vector stmt by a factor VF/nunits. */
3636 prev_stmt_info = NULL;
3637 for (j = 0; j < ncopies; j++)
3642 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3643 if (op_type == binary_op)
3644 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3648 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3649 if (op_type == binary_op)
3650 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3653 /* Arguments are ready. Create the new vector stmt. We are creating
3654 two vector defs because the widened result does not fit in one vector.
3655 The vectorized stmt can be expressed as a call to a taregt builtin,
3656 or a using a tree-code. */
3657 /* Generate first half of the widened result: */
3658 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
3659 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3661 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3663 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3664 prev_stmt_info = vinfo_for_stmt (new_stmt);
3666 /* Generate second half of the widened result: */
3667 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
3668 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3669 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3670 prev_stmt_info = vinfo_for_stmt (new_stmt);
3674 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3679 /* Function vect_strided_store_supported.
3681 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3682 and FALSE otherwise. */
3685 vect_strided_store_supported (tree vectype)
3687 optab interleave_high_optab, interleave_low_optab;
3690 mode = (int) TYPE_MODE (vectype);
3692 /* Check that the operation is supported. */
3693 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3695 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3697 if (!interleave_high_optab || !interleave_low_optab)
3699 if (vect_print_dump_info (REPORT_DETAILS))
3700 fprintf (vect_dump, "no optab for interleave.");
3704 if (optab_handler (interleave_high_optab, mode)->insn_code
3706 || optab_handler (interleave_low_optab, mode)->insn_code
3707 == CODE_FOR_nothing)
3709 if (vect_print_dump_info (REPORT_DETAILS))
3710 fprintf (vect_dump, "interleave op not supported by target.");
3717 /* Function vect_permute_store_chain.
3719 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3720 a power of 2, generate interleave_high/low stmts to reorder the data
3721 correctly for the stores. Return the final references for stores in
3724 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3725 The input is 4 vectors each containing 8 elements. We assign a number to each
3726 element, the input sequence is:
3728 1st vec: 0 1 2 3 4 5 6 7
3729 2nd vec: 8 9 10 11 12 13 14 15
3730 3rd vec: 16 17 18 19 20 21 22 23
3731 4th vec: 24 25 26 27 28 29 30 31
3733 The output sequence should be:
3735 1st vec: 0 8 16 24 1 9 17 25
3736 2nd vec: 2 10 18 26 3 11 19 27
3737 3rd vec: 4 12 20 28 5 13 21 30
3738 4th vec: 6 14 22 30 7 15 23 31
3740 i.e., we interleave the contents of the four vectors in their order.
3742 We use interleave_high/low instructions to create such output. The input of
3743 each interleave_high/low operation is two vectors:
3746 the even elements of the result vector are obtained left-to-right from the
3747 high/low elements of the first vector. The odd elements of the result are
3748 obtained left-to-right from the high/low elements of the second vector.
3749 The output of interleave_high will be: 0 4 1 5
3750 and of interleave_low: 2 6 3 7
3753 The permutation is done in log LENGTH stages. In each stage interleave_high
3754 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3755 where the first argument is taken from the first half of DR_CHAIN and the
3756 second argument from it's second half.
3759 I1: interleave_high (1st vec, 3rd vec)
3760 I2: interleave_low (1st vec, 3rd vec)
3761 I3: interleave_high (2nd vec, 4th vec)
3762 I4: interleave_low (2nd vec, 4th vec)
3764 The output for the first stage is:
3766 I1: 0 16 1 17 2 18 3 19
3767 I2: 4 20 5 21 6 22 7 23
3768 I3: 8 24 9 25 10 26 11 27
3769 I4: 12 28 13 29 14 30 15 31
3771 The output of the second stage, i.e. the final result is:
3773 I1: 0 8 16 24 1 9 17 25
3774 I2: 2 10 18 26 3 11 19 27
3775 I3: 4 12 20 28 5 13 21 30
3776 I4: 6 14 22 30 7 15 23 31. */
3779 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3780 unsigned int length,
3782 block_stmt_iterator *bsi,
3783 VEC(tree,heap) **result_chain)
3785 tree perm_dest, perm_stmt, vect1, vect2, high, low;
3786 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3787 tree scalar_dest, tmp;
3790 VEC(tree,heap) *first, *second;
3792 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3793 first = VEC_alloc (tree, heap, length/2);
3794 second = VEC_alloc (tree, heap, length/2);
3796 /* Check that the operation is supported. */
3797 if (!vect_strided_store_supported (vectype))
3800 *result_chain = VEC_copy (tree, heap, dr_chain);
3802 for (i = 0; i < exact_log2 (length); i++)
3804 for (j = 0; j < length/2; j++)
3806 vect1 = VEC_index (tree, dr_chain, j);
3807 vect2 = VEC_index (tree, dr_chain, j+length/2);
3809 /* Create interleaving stmt:
3810 in the case of big endian:
3811 high = interleave_high (vect1, vect2)
3812 and in the case of little endian:
3813 high = interleave_low (vect1, vect2). */
3814 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3815 DECL_GIMPLE_REG_P (perm_dest) = 1;
3816 add_referenced_var (perm_dest);
3817 if (BYTES_BIG_ENDIAN)
3818 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3820 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3821 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3822 high = make_ssa_name (perm_dest, perm_stmt);
3823 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
3824 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3825 VEC_replace (tree, *result_chain, 2*j, high);
3827 /* Create interleaving stmt:
3828 in the case of big endian:
3829 low = interleave_low (vect1, vect2)
3830 and in the case of little endian:
3831 low = interleave_high (vect1, vect2). */
3832 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3833 DECL_GIMPLE_REG_P (perm_dest) = 1;
3834 add_referenced_var (perm_dest);
3835 if (BYTES_BIG_ENDIAN)
3836 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3838 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3839 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3840 low = make_ssa_name (perm_dest, perm_stmt);
3841 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
3842 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3843 VEC_replace (tree, *result_chain, 2*j+1, low);
3845 dr_chain = VEC_copy (tree, heap, *result_chain);
3851 /* Function vectorizable_store.
3853 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3855 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3856 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3857 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3860 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3865 tree vec_oprnd = NULL_TREE;
3866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3867 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3868 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3870 enum machine_mode vec_mode;
3872 enum dr_alignment_support alignment_support_cheme;
3874 enum vect_def_type dt;
3875 stmt_vec_info prev_stmt_info = NULL;
3876 tree dataref_ptr = NULL_TREE;
3877 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3878 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3880 tree next_stmt, first_stmt;
3881 bool strided_store = false;
3882 unsigned int group_size, i;
3883 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3884 gcc_assert (ncopies >= 1);
3886 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3889 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3892 if (STMT_VINFO_LIVE_P (stmt_info))
3894 if (vect_print_dump_info (REPORT_DETAILS))
3895 fprintf (vect_dump, "value used after loop.");
3899 /* Is vectorizable store? */
3901 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3904 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3905 if (TREE_CODE (scalar_dest) != ARRAY_REF
3906 && TREE_CODE (scalar_dest) != INDIRECT_REF
3907 && !DR_GROUP_FIRST_DR (stmt_info))
3910 op = GIMPLE_STMT_OPERAND (stmt, 1);
3911 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3913 if (vect_print_dump_info (REPORT_DETAILS))
3914 fprintf (vect_dump, "use not simple.");
3918 vec_mode = TYPE_MODE (vectype);
3919 /* FORNOW. In some cases can vectorize even if data-type not supported
3920 (e.g. - array initialization with 0). */
3921 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
3924 if (!STMT_VINFO_DATA_REF (stmt_info))
3927 if (DR_GROUP_FIRST_DR (stmt_info))
3929 strided_store = true;
3930 if (!vect_strided_store_supported (vectype))
3934 if (!vec_stmt) /* transformation not required. */
3936 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3937 vect_model_store_cost (stmt_info, ncopies, dt);
3945 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3946 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3947 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3949 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3951 /* We vectorize all the stmts of the interleaving group when we
3952 reach the last stmt in the group. */
3953 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3954 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
3956 *vec_stmt = NULL_TREE;
3967 if (vect_print_dump_info (REPORT_DETAILS))
3968 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3970 dr_chain = VEC_alloc (tree, heap, group_size);
3971 oprnds = VEC_alloc (tree, heap, group_size);
3973 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3974 gcc_assert (alignment_support_cheme);
3975 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
3977 /* In case the vectorization factor (VF) is bigger than the number
3978 of elements that we can fit in a vectype (nunits), we have to generate
3979 more than one vector stmt - i.e - we need to "unroll" the
3980 vector stmt by a factor VF/nunits. For more details see documentation in
3981 vect_get_vec_def_for_copy_stmt. */
3983 /* In case of interleaving (non-unit strided access):
3990 We create vectorized stores starting from base address (the access of the
3991 first stmt in the chain (S2 in the above example), when the last store stmt
3992 of the chain (S4) is reached:
3995 VS2: &base + vec_size*1 = vx0
3996 VS3: &base + vec_size*2 = vx1
3997 VS4: &base + vec_size*3 = vx3
3999 Then permutation statements are generated:
4001 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4002 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4005 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4006 (the order of the data-refs in the output of vect_permute_store_chain
4007 corresponds to the order of scalar stmts in the interleaving chain - see
4008 the documentation of vect_permute_store_chain()).
4010 In case of both multiple types and interleaving, above vector stores and
4011 permutation stmts are created for every copy. The result vector stmts are
4012 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4013 STMT_VINFO_RELATED_STMT for the next copies.
4016 prev_stmt_info = NULL;
4017 for (j = 0; j < ncopies; j++)
4024 /* For interleaved stores we collect vectorized defs for all the
4025 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
4026 as an input to vect_permute_store_chain(), and OPRNDS as an input
4027 to vect_get_vec_def_for_stmt_copy() for the next copy.
4028 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4029 OPRNDS are of size 1. */
4030 next_stmt = first_stmt;
4031 for (i = 0; i < group_size; i++)
4033 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
4034 is the exact number of stmts in the chain. Therefore, NEXT_STMT
4035 can't be NULL_TREE. In case that there is no interleaving,
4036 GROUP_SIZE is 1, and only one iteration of the loop will be
4038 gcc_assert (next_stmt);
4039 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4040 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
4041 VEC_quick_push(tree, dr_chain, vec_oprnd);
4042 VEC_quick_push(tree, oprnds, vec_oprnd);
4043 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4045 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
4046 &dummy, &ptr_incr, false,
4047 TREE_TYPE (vec_oprnd));
4051 /* For interleaved stores we created vectorized defs for all the
4052 defs stored in OPRNDS in the previous iteration (previous copy).
4053 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4054 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4056 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4057 OPRNDS are of size 1. */
4058 for (i = 0; i < group_size; i++)
4060 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
4061 VEC_index (tree, oprnds, i));
4062 VEC_replace(tree, dr_chain, i, vec_oprnd);
4063 VEC_replace(tree, oprnds, i, vec_oprnd);
4065 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4070 result_chain = VEC_alloc (tree, heap, group_size);
4072 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4077 next_stmt = first_stmt;
4078 for (i = 0; i < group_size; i++)
4080 /* For strided stores vectorized defs are interleaved in
4081 vect_permute_store_chain(). */
4083 vec_oprnd = VEC_index(tree, result_chain, i);
4085 data_ref = build_fold_indirect_ref (dataref_ptr);
4086 /* Arguments are ready. Create the new vector stmt. */
4087 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4088 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4089 mark_symbols_for_renaming (new_stmt);
4092 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4094 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4096 prev_stmt_info = vinfo_for_stmt (new_stmt);
4097 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4100 /* Bump the vector pointer. */
4101 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4109 /* Function vect_setup_realignment
4111 This function is called when vectorizing an unaligned load using
4112 the dr_unaligned_software_pipeline scheme.
4113 This function generates the following code at the loop prolog:
4116 msq_init = *(floor(p)); # prolog load
4117 realignment_token = call target_builtin;
4119 msq = phi (msq_init, ---)
4121 The code above sets up a new (vector) pointer, pointing to the first
4122 location accessed by STMT, and a "floor-aligned" load using that pointer.
4123 It also generates code to compute the "realignment-token" (if the relevant
4124 target hook was defined), and creates a phi-node at the loop-header bb
4125 whose arguments are the result of the prolog-load (created by this
4126 function) and the result of a load that takes place in the loop (to be
4127 created by the caller to this function).
4128 The caller to this function uses the phi-result (msq) to create the
4129 realignment code inside the loop, and sets up the missing phi argument,
4133 msq = phi (msq_init, lsq)
4134 lsq = *(floor(p')); # load in loop
4135 result = realign_load (msq, lsq, realignment_token);
4138 STMT - (scalar) load stmt to be vectorized. This load accesses
4139 a memory location that may be unaligned.
4140 BSI - place where new code is to be inserted.
4143 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4144 target hook, if defined.
4145 Return value - the result of the loop-header phi node. */
4148 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4149 tree *realignment_token)
4151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4152 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4153 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4155 edge pe = loop_preheader_edge (loop);
4156 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4169 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
4170 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4171 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
4173 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
4174 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4175 new_temp = make_ssa_name (vec_dest, new_stmt);
4176 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4177 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4178 gcc_assert (!new_bb);
4179 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
4181 /* 2. Create permutation mask, if required, in loop preheader. */
4182 if (targetm.vectorize.builtin_mask_for_load)
4186 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4187 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
4188 vec_dest = vect_create_destination_var (scalar_dest,
4189 TREE_TYPE (new_stmt));
4190 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4191 new_temp = make_ssa_name (vec_dest, new_stmt);
4192 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4193 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4194 gcc_assert (!new_bb);
4195 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
4197 /* The result of the CALL_EXPR to this builtin is determined from
4198 the value of the parameter and no global variables are touched
4199 which makes the builtin a "const" function. Requiring the
4200 builtin to have the "const" attribute makes it unnecessary
4201 to call mark_call_clobbered. */
4202 gcc_assert (TREE_READONLY (builtin_decl));
4205 /* 3. Create msq = phi <msq_init, lsq> in loop */
4206 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4207 msq = make_ssa_name (vec_dest, NULL_TREE);
4208 phi_stmt = create_phi_node (msq, loop->header);
4209 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4210 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
4216 /* Function vect_strided_load_supported.
4218 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
4219 and FALSE otherwise. */
4222 vect_strided_load_supported (tree vectype)
4224 optab perm_even_optab, perm_odd_optab;
4227 mode = (int) TYPE_MODE (vectype);
4229 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
4230 if (!perm_even_optab)
4232 if (vect_print_dump_info (REPORT_DETAILS))
4233 fprintf (vect_dump, "no optab for perm_even.");
4237 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
4239 if (vect_print_dump_info (REPORT_DETAILS))
4240 fprintf (vect_dump, "perm_even op not supported by target.");
4244 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
4245 if (!perm_odd_optab)
4247 if (vect_print_dump_info (REPORT_DETAILS))
4248 fprintf (vect_dump, "no optab for perm_odd.");
4252 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
4254 if (vect_print_dump_info (REPORT_DETAILS))
4255 fprintf (vect_dump, "perm_odd op not supported by target.");
4262 /* Function vect_permute_load_chain.
4264 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4265 a power of 2, generate extract_even/odd stmts to reorder the input data
4266 correctly. Return the final references for loads in RESULT_CHAIN.
4268 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4269 The input is 4 vectors each containing 8 elements. We assign a number to each
4270 element, the input sequence is:
4272 1st vec: 0 1 2 3 4 5 6 7
4273 2nd vec: 8 9 10 11 12 13 14 15
4274 3rd vec: 16 17 18 19 20 21 22 23
4275 4th vec: 24 25 26 27 28 29 30 31
4277 The output sequence should be:
4279 1st vec: 0 4 8 12 16 20 24 28
4280 2nd vec: 1 5 9 13 17 21 25 29
4281 3rd vec: 2 6 10 14 18 22 26 30
4282 4th vec: 3 7 11 15 19 23 27 31
4284 i.e., the first output vector should contain the first elements of each
4285 interleaving group, etc.
4287 We use extract_even/odd instructions to create such output. The input of each
4288 extract_even/odd operation is two vectors
4292 and the output is the vector of extracted even/odd elements. The output of
4293 extract_even will be: 0 2 4 6
4294 and of extract_odd: 1 3 5 7
4297 The permutation is done in log LENGTH stages. In each stage extract_even and
4298 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
4299 order. In our example,
4301 E1: extract_even (1st vec, 2nd vec)
4302 E2: extract_odd (1st vec, 2nd vec)
4303 E3: extract_even (3rd vec, 4th vec)
4304 E4: extract_odd (3rd vec, 4th vec)
4306 The output for the first stage will be:
4308 E1: 0 2 4 6 8 10 12 14
4309 E2: 1 3 5 7 9 11 13 15
4310 E3: 16 18 20 22 24 26 28 30
4311 E4: 17 19 21 23 25 27 29 31
4313 In order to proceed and create the correct sequence for the next stage (or
4314 for the correct output, if the second stage is the last one, as in our
4315 example), we first put the output of extract_even operation and then the
4316 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4317 The input for the second stage is:
4319 1st vec (E1): 0 2 4 6 8 10 12 14
4320 2nd vec (E3): 16 18 20 22 24 26 28 30
4321 3rd vec (E2): 1 3 5 7 9 11 13 15
4322 4th vec (E4): 17 19 21 23 25 27 29 31
4324 The output of the second stage:
4326 E1: 0 4 8 12 16 20 24 28
4327 E2: 2 6 10 14 18 22 26 30
4328 E3: 1 5 9 13 17 21 25 29
4329 E4: 3 7 11 15 19 23 27 31
4331 And RESULT_CHAIN after reordering:
4333 1st vec (E1): 0 4 8 12 16 20 24 28
4334 2nd vec (E3): 1 5 9 13 17 21 25 29
4335 3rd vec (E2): 2 6 10 14 18 22 26 30
4336 4th vec (E4): 3 7 11 15 19 23 27 31. */
4339 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4340 unsigned int length,
4342 block_stmt_iterator *bsi,
4343 VEC(tree,heap) **result_chain)
4345 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
4346 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4351 /* Check that the operation is supported. */
4352 if (!vect_strided_load_supported (vectype))
4355 *result_chain = VEC_copy (tree, heap, dr_chain);
4356 for (i = 0; i < exact_log2 (length); i++)
4358 for (j = 0; j < length; j +=2)
4360 first_vect = VEC_index (tree, dr_chain, j);
4361 second_vect = VEC_index (tree, dr_chain, j+1);
4363 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4364 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4365 DECL_GIMPLE_REG_P (perm_dest) = 1;
4366 add_referenced_var (perm_dest);
4368 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
4369 first_vect, second_vect);
4370 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4372 data_ref = make_ssa_name (perm_dest, perm_stmt);
4373 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4374 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4375 mark_symbols_for_renaming (perm_stmt);
4377 VEC_replace (tree, *result_chain, j/2, data_ref);
4379 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4380 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4381 DECL_GIMPLE_REG_P (perm_dest) = 1;
4382 add_referenced_var (perm_dest);
4384 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
4385 first_vect, second_vect);
4386 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4387 data_ref = make_ssa_name (perm_dest, perm_stmt);
4388 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4389 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4390 mark_symbols_for_renaming (perm_stmt);
4392 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4394 dr_chain = VEC_copy (tree, heap, *result_chain);
4400 /* Function vect_transform_strided_load.
4402 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4403 to perform their permutation and ascribe the result vectorized statements to
4404 the scalar statements.
4408 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
4409 block_stmt_iterator *bsi)
4411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4412 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4413 tree next_stmt, new_stmt;
4414 VEC(tree,heap) *result_chain = NULL;
4415 unsigned int i, gap_count;
4418 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4419 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4420 vectors, that are ready for vector computation. */
4421 result_chain = VEC_alloc (tree, heap, size);
4423 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
4426 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4427 Since we scan the chain starting from it's first node, their order
4428 corresponds the order of data-refs in RESULT_CHAIN. */
4429 next_stmt = first_stmt;
4431 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
4436 /* Skip the gaps. Loads created for the gaps will be removed by dead
4437 code elimination pass later.
4438 DR_GROUP_GAP is the number of steps in elements from the previous
4439 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4440 correspond to the gaps.
4442 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4450 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4451 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4452 copies, and we put the new vector statement in the first available
4454 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4455 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4458 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4459 tree rel_stmt = STMT_VINFO_RELATED_STMT (
4460 vinfo_for_stmt (prev_stmt));
4463 prev_stmt = rel_stmt;
4464 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4466 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
4468 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4470 /* If NEXT_STMT accesses the same DR as the previous statement,
4471 put the same TMP_DATA_REF as its vectorized statement; otherwise
4472 get the next data-ref from RESULT_CHAIN. */
4473 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4481 /* vectorizable_load.
4483 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4485 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4486 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4487 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4490 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4493 tree vec_dest = NULL;
4494 tree data_ref = NULL;
4496 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4497 stmt_vec_info prev_stmt_info;
4498 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4499 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4500 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4501 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4504 tree new_stmt = NULL_TREE;
4506 enum dr_alignment_support alignment_support_cheme;
4507 tree dataref_ptr = NULL_TREE;
4509 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4510 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4511 int i, j, group_size;
4512 tree msq = NULL_TREE, lsq;
4513 tree offset = NULL_TREE;
4514 tree realignment_token = NULL_TREE;
4515 tree phi_stmt = NULL_TREE;
4516 VEC(tree,heap) *dr_chain = NULL;
4517 bool strided_load = false;
4520 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4523 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4526 /* FORNOW: not yet supported. */
4527 if (STMT_VINFO_LIVE_P (stmt_info))
4529 if (vect_print_dump_info (REPORT_DETAILS))
4530 fprintf (vect_dump, "value used after loop.");
4534 /* Is vectorizable load? */
4535 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4538 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4539 if (TREE_CODE (scalar_dest) != SSA_NAME)
4542 op = GIMPLE_STMT_OPERAND (stmt, 1);
4543 if (TREE_CODE (op) != ARRAY_REF
4544 && TREE_CODE (op) != INDIRECT_REF
4545 && !DR_GROUP_FIRST_DR (stmt_info))
4548 if (!STMT_VINFO_DATA_REF (stmt_info))
4551 mode = (int) TYPE_MODE (vectype);
4553 /* FORNOW. In some cases can vectorize even if data-type not supported
4554 (e.g. - data copies). */
4555 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
4557 if (vect_print_dump_info (REPORT_DETAILS))
4558 fprintf (vect_dump, "Aligned load, but unsupported type.");
4562 /* Check if the load is a part of an interleaving chain. */
4563 if (DR_GROUP_FIRST_DR (stmt_info))
4565 strided_load = true;
4567 /* Check if interleaving is supported. */
4568 if (!vect_strided_load_supported (vectype))
4572 if (!vec_stmt) /* transformation not required. */
4574 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4575 vect_model_load_cost (stmt_info, ncopies);
4579 if (vect_print_dump_info (REPORT_DETAILS))
4580 fprintf (vect_dump, "transform load.");
4586 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4587 /* Check if the chain of loads is already vectorized. */
4588 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4590 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4593 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4594 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4595 dr_chain = VEC_alloc (tree, heap, group_size);
4604 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
4605 gcc_assert (alignment_support_cheme);
4608 /* In case the vectorization factor (VF) is bigger than the number
4609 of elements that we can fit in a vectype (nunits), we have to generate
4610 more than one vector stmt - i.e - we need to "unroll" the
4611 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4612 from one copy of the vector stmt to the next, in the field
4613 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4614 stages to find the correct vector defs to be used when vectorizing
4615 stmts that use the defs of the current stmt. The example below illustrates
4616 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4617 4 vectorized stmts):
4619 before vectorization:
4620 RELATED_STMT VEC_STMT
4624 step 1: vectorize stmt S1:
4625 We first create the vector stmt VS1_0, and, as usual, record a
4626 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4627 Next, we create the vector stmt VS1_1, and record a pointer to
4628 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4629 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4631 RELATED_STMT VEC_STMT
4632 VS1_0: vx0 = memref0 VS1_1 -
4633 VS1_1: vx1 = memref1 VS1_2 -
4634 VS1_2: vx2 = memref2 VS1_3 -
4635 VS1_3: vx3 = memref3 - -
4636 S1: x = load - VS1_0
4639 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4640 information we recorded in RELATED_STMT field is used to vectorize
4643 /* In case of interleaving (non-unit strided access):
4650 Vectorized loads are created in the order of memory accesses
4651 starting from the access of the first stmt of the chain:
4654 VS2: vx1 = &base + vec_size*1
4655 VS3: vx3 = &base + vec_size*2
4656 VS4: vx4 = &base + vec_size*3
4658 Then permutation statements are generated:
4660 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4661 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4664 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4665 (the order of the data-refs in the output of vect_permute_load_chain
4666 corresponds to the order of scalar stmts in the interleaving chain - see
4667 the documentation of vect_permute_load_chain()).
4668 The generation of permutation stmts and recording them in
4669 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4671 In case of both multiple types and interleaving, the vector loads and
4672 permutation stmts above are created for every copy. The result vector stmts
4673 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4674 STMT_VINFO_RELATED_STMT for the next copies. */
4676 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4677 on a target that supports unaligned accesses (dr_unaligned_supported)
4678 we generate the following code:
4682 p = p + indx * vectype_size;
4687 Otherwise, the data reference is potentially unaligned on a target that
4688 does not support unaligned accesses (dr_unaligned_software_pipeline) -
4689 then generate the following code, in which the data in each iteration is
4690 obtained by two vector loads, one from the previous iteration, and one
4691 from the current iteration:
4693 msq_init = *(floor(p1))
4694 p2 = initial_addr + VS - 1;
4695 realignment_token = call target_builtin;
4698 p2 = p2 + indx * vectype_size
4700 vec_dest = realign_load (msq, lsq, realignment_token)
4705 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4707 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
4708 phi_stmt = SSA_NAME_DEF_STMT (msq);
4709 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4712 prev_stmt_info = NULL;
4713 for (j = 0; j < ncopies; j++)
4715 /* 1. Create the vector pointer update chain. */
4717 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
4718 &ptr_incr, false, NULL_TREE);
4720 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4722 for (i = 0; i < group_size; i++)
4724 /* 2. Create the vector-load in the loop. */
4725 switch (alignment_support_cheme)
4728 gcc_assert (aligned_access_p (first_dr));
4729 data_ref = build_fold_indirect_ref (dataref_ptr);
4731 case dr_unaligned_supported:
4733 int mis = DR_MISALIGNMENT (first_dr);
4734 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
4736 gcc_assert (!aligned_access_p (first_dr));
4737 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
4739 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
4742 case dr_unaligned_software_pipeline:
4743 gcc_assert (!aligned_access_p (first_dr));
4744 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
4749 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4750 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4751 new_temp = make_ssa_name (vec_dest, new_stmt);
4752 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4753 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4754 mark_symbols_for_renaming (new_stmt);
4756 /* 3. Handle explicit realignment if necessary/supported. */
4757 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4760 <vec_dest = realign_load (msq, lsq, realignment_token)> */
4761 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
4762 if (!realignment_token)
4763 realignment_token = dataref_ptr;
4764 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4766 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
4767 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4768 new_temp = make_ssa_name (vec_dest, new_stmt);
4769 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4770 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4771 if (i == group_size - 1 && j == ncopies - 1)
4772 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
4776 VEC_quick_push (tree, dr_chain, new_temp);
4777 if (i < group_size - 1)
4778 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4783 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
4785 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4786 dr_chain = VEC_alloc (tree, heap, group_size);
4791 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4793 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4794 prev_stmt_info = vinfo_for_stmt (new_stmt);
4802 /* Function vectorizable_live_operation.
4804 STMT computes a value that is used outside the loop. Check if
4805 it can be supported. */
4808 vectorizable_live_operation (tree stmt,
4809 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
4810 tree *vec_stmt ATTRIBUTE_UNUSED)
4813 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4814 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4819 enum vect_def_type dt;
4821 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4823 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4826 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4829 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4832 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4833 op_type = TREE_OPERAND_LENGTH (operation);
4835 /* FORNOW: support only if all uses are invariant. This means
4836 that the scalar operations can remain in place, unvectorized.
4837 The original last scalar value that they compute will be used. */
4839 for (i = 0; i < op_type; i++)
4841 op = TREE_OPERAND (operation, i);
4842 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4844 if (vect_print_dump_info (REPORT_DETAILS))
4845 fprintf (vect_dump, "use not simple.");
4849 if (dt != vect_invariant_def && dt != vect_constant_def)
4853 /* No transformation is required for the cases we currently support. */
4858 /* Function vect_is_simple_cond.
4861 LOOP - the loop that is being vectorized.
4862 COND - Condition that is checked for simple use.
4864 Returns whether a COND can be vectorized. Checks whether
4865 condition operands are supportable using vec_is_simple_use. */
4868 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4872 enum vect_def_type dt;
4874 if (!COMPARISON_CLASS_P (cond))
4877 lhs = TREE_OPERAND (cond, 0);
4878 rhs = TREE_OPERAND (cond, 1);
4880 if (TREE_CODE (lhs) == SSA_NAME)
4882 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4883 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
4886 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4887 && TREE_CODE (lhs) != FIXED_CST)
4890 if (TREE_CODE (rhs) == SSA_NAME)
4892 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4893 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
4896 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4897 && TREE_CODE (rhs) != FIXED_CST)
4903 /* vectorizable_condition.
4905 Check if STMT is conditional modify expression that can be vectorized.
4906 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4907 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4910 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4913 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4915 tree scalar_dest = NULL_TREE;
4916 tree vec_dest = NULL_TREE;
4917 tree op = NULL_TREE;
4918 tree cond_expr, then_clause, else_clause;
4919 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4920 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4921 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4922 tree vec_compare, vec_cond_expr;
4924 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4925 enum machine_mode vec_mode;
4927 enum vect_def_type dt;
4928 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4929 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4931 gcc_assert (ncopies >= 1);
4933 return false; /* FORNOW */
4935 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4938 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4941 /* FORNOW: not yet supported. */
4942 if (STMT_VINFO_LIVE_P (stmt_info))
4944 if (vect_print_dump_info (REPORT_DETAILS))
4945 fprintf (vect_dump, "value used after loop.");
4949 /* Is vectorizable conditional operation? */
4950 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4953 op = GIMPLE_STMT_OPERAND (stmt, 1);
4955 if (TREE_CODE (op) != COND_EXPR)
4958 cond_expr = TREE_OPERAND (op, 0);
4959 then_clause = TREE_OPERAND (op, 1);
4960 else_clause = TREE_OPERAND (op, 2);
4962 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4965 /* We do not handle two different vector types for the condition
4967 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
4970 if (TREE_CODE (then_clause) == SSA_NAME)
4972 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4973 if (!vect_is_simple_use (then_clause, loop_vinfo,
4974 &then_def_stmt, &def, &dt))
4977 else if (TREE_CODE (then_clause) != INTEGER_CST
4978 && TREE_CODE (then_clause) != REAL_CST
4979 && TREE_CODE (then_clause) != FIXED_CST)
4982 if (TREE_CODE (else_clause) == SSA_NAME)
4984 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4985 if (!vect_is_simple_use (else_clause, loop_vinfo,
4986 &else_def_stmt, &def, &dt))
4989 else if (TREE_CODE (else_clause) != INTEGER_CST
4990 && TREE_CODE (else_clause) != REAL_CST
4991 && TREE_CODE (else_clause) != FIXED_CST)
4995 vec_mode = TYPE_MODE (vectype);
4999 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5000 return expand_vec_cond_expr_p (op, vec_mode);
5006 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
5007 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5009 /* Handle cond expr. */
5011 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
5013 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
5014 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
5015 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
5017 /* Arguments are ready. create the new vector stmt. */
5018 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5019 vec_cond_lhs, vec_cond_rhs);
5020 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5021 vec_compare, vec_then_clause, vec_else_clause);
5023 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
5024 new_temp = make_ssa_name (vec_dest, *vec_stmt);
5025 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
5026 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
5031 /* Function vect_transform_stmt.
5033 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5036 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
5038 bool is_store = false;
5039 tree vec_stmt = NULL_TREE;
5040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5041 tree orig_stmt_in_pattern;
5044 switch (STMT_VINFO_TYPE (stmt_info))
5046 case type_demotion_vec_info_type:
5047 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
5051 case type_promotion_vec_info_type:
5052 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
5056 case type_conversion_vec_info_type:
5057 done = vectorizable_conversion (stmt, bsi, &vec_stmt);
5061 case induc_vec_info_type:
5062 done = vectorizable_induction (stmt, bsi, &vec_stmt);
5066 case op_vec_info_type:
5067 done = vectorizable_operation (stmt, bsi, &vec_stmt);
5071 case assignment_vec_info_type:
5072 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
5076 case load_vec_info_type:
5077 done = vectorizable_load (stmt, bsi, &vec_stmt);
5081 case store_vec_info_type:
5082 done = vectorizable_store (stmt, bsi, &vec_stmt);
5084 if (DR_GROUP_FIRST_DR (stmt_info))
5086 /* In case of interleaving, the whole chain is vectorized when the
5087 last store in the chain is reached. Store stmts before the last
5088 one are skipped, and there vec_stmt_info shouldn't be freed
5090 *strided_store = true;
5091 if (STMT_VINFO_VEC_STMT (stmt_info))
5098 case condition_vec_info_type:
5099 done = vectorizable_condition (stmt, bsi, &vec_stmt);
5103 case call_vec_info_type:
5104 done = vectorizable_call (stmt, bsi, &vec_stmt);
5107 case reduc_vec_info_type:
5108 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
5113 if (!STMT_VINFO_LIVE_P (stmt_info))
5115 if (vect_print_dump_info (REPORT_DETAILS))
5116 fprintf (vect_dump, "stmt not supported.");
5121 if (STMT_VINFO_LIVE_P (stmt_info)
5122 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5124 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
5130 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5131 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
5132 if (orig_stmt_in_pattern)
5134 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
5135 /* STMT was inserted by the vectorizer to replace a computation idiom.
5136 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
5137 computed this idiom. We need to record a pointer to VEC_STMT in
5138 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
5139 documentation of vect_pattern_recog. */
5140 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
5142 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
5143 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
5152 /* This function builds ni_name = number of iterations loop executes
5153 on the loop preheader. */
5156 vect_build_loop_niters (loop_vec_info loop_vinfo)
5158 tree ni_name, stmt, var;
5160 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5161 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5163 var = create_tmp_var (TREE_TYPE (ni), "niters");
5164 add_referenced_var (var);
5165 ni_name = force_gimple_operand (ni, &stmt, false, var);
5167 pe = loop_preheader_edge (loop);
5170 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5171 gcc_assert (!new_bb);
5178 /* This function generates the following statements:
5180 ni_name = number of iterations loop executes
5181 ratio = ni_name / vf
5182 ratio_mult_vf_name = ratio * vf
5184 and places them at the loop preheader edge. */
5187 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5189 tree *ratio_mult_vf_name_ptr,
5190 tree *ratio_name_ptr)
5198 tree ratio_mult_vf_name;
5199 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5200 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
5201 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5204 pe = loop_preheader_edge (loop);
5206 /* Generate temporary variable that contains
5207 number of iterations loop executes. */
5209 ni_name = vect_build_loop_niters (loop_vinfo);
5210 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
5212 /* Create: ratio = ni >> log2(vf) */
5214 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
5215 if (!is_gimple_val (ratio_name))
5217 var = create_tmp_var (TREE_TYPE (ni), "bnd");
5218 add_referenced_var (var);
5220 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
5221 pe = loop_preheader_edge (loop);
5222 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5223 gcc_assert (!new_bb);
5226 /* Create: ratio_mult_vf = ratio << log2 (vf). */
5228 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5229 ratio_name, log_vf);
5230 if (!is_gimple_val (ratio_mult_vf_name))
5232 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
5233 add_referenced_var (var);
5235 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
5237 pe = loop_preheader_edge (loop);
5238 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5239 gcc_assert (!new_bb);
5242 *ni_name_ptr = ni_name;
5243 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5244 *ratio_name_ptr = ratio_name;
5250 /* Function vect_update_ivs_after_vectorizer.
5252 "Advance" the induction variables of LOOP to the value they should take
5253 after the execution of LOOP. This is currently necessary because the
5254 vectorizer does not handle induction variables that are used after the
5255 loop. Such a situation occurs when the last iterations of LOOP are
5257 1. We introduced new uses after LOOP for IVs that were not originally used
5258 after LOOP: the IVs of LOOP are now used by an epilog loop.
5259 2. LOOP is going to be vectorized; this means that it will iterate N/VF
5260 times, whereas the loop IVs should be bumped N times.
5263 - LOOP - a loop that is going to be vectorized. The last few iterations
5264 of LOOP were peeled.
5265 - NITERS - the number of iterations that LOOP executes (before it is
5266 vectorized). i.e, the number of times the ivs should be bumped.
5267 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
5268 coming out from LOOP on which there are uses of the LOOP ivs
5269 (this is the path from LOOP->exit to epilog_loop->preheader).
5271 The new definitions of the ivs are placed in LOOP->exit.
5272 The phi args associated with the edge UPDATE_E in the bb
5273 UPDATE_E->dest are updated accordingly.
5275 Assumption 1: Like the rest of the vectorizer, this function assumes
5276 a single loop exit that has a single predecessor.
5278 Assumption 2: The phi nodes in the LOOP header and in update_bb are
5279 organized in the same order.
5281 Assumption 3: The access function of the ivs is simple enough (see
5282 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
5284 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
5285 coming out of LOOP on which the ivs of LOOP are used (this is the path
5286 that leads to the epilog loop; other paths skip the epilog loop). This
5287 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
5288 needs to have its phis updated.
5292 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
5295 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5296 basic_block exit_bb = single_exit (loop)->dest;
5298 basic_block update_bb = update_e->dest;
5300 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
5302 /* Make sure there exists a single-predecessor exit bb: */
5303 gcc_assert (single_pred_p (exit_bb));
5305 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
5307 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
5309 tree access_fn = NULL;
5310 tree evolution_part;
5313 tree var, ni, ni_name;
5314 block_stmt_iterator last_bsi;
5316 if (vect_print_dump_info (REPORT_DETAILS))
5318 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
5319 print_generic_expr (vect_dump, phi, TDF_SLIM);
5322 /* Skip virtual phi's. */
5323 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5325 if (vect_print_dump_info (REPORT_DETAILS))
5326 fprintf (vect_dump, "virtual phi. skip.");
5330 /* Skip reduction phis. */
5331 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
5333 if (vect_print_dump_info (REPORT_DETAILS))
5334 fprintf (vect_dump, "reduc phi. skip.");
5338 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
5339 gcc_assert (access_fn);
5341 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
5342 gcc_assert (evolution_part != NULL_TREE);
5344 /* FORNOW: We do not support IVs whose evolution function is a polynomial
5345 of degree >= 2 or exponential. */
5346 gcc_assert (!tree_is_chrec (evolution_part));
5348 step_expr = evolution_part;
5349 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
5352 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
5353 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
5355 fold_convert (sizetype,
5356 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
5357 niters, step_expr)));
5359 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
5360 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
5361 fold_convert (TREE_TYPE (init_expr),
5368 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
5369 add_referenced_var (var);
5371 last_bsi = bsi_last (exit_bb);
5372 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
5373 true, BSI_SAME_STMT);
5375 /* Fix phi expressions in the successor bb. */
5376 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
5381 /* Function vect_do_peeling_for_loop_bound
5383 Peel the last iterations of the loop represented by LOOP_VINFO.
5384 The peeled iterations form a new epilog loop. Given that the loop now
5385 iterates NITERS times, the new epilog loop iterates
5386 NITERS % VECTORIZATION_FACTOR times.
5388 The original loop will later be made to iterate
5389 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
5392 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
5394 tree ni_name, ratio_mult_vf_name;
5395 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5396 struct loop *new_loop;
5398 basic_block preheader;
5401 int min_scalar_loop_bound;
5402 int min_profitable_iters;
5404 if (vect_print_dump_info (REPORT_DETAILS))
5405 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
5407 initialize_original_copy_tables ();
5409 /* Generate the following variables on the preheader of original loop:
5411 ni_name = number of iteration the original loop executes
5412 ratio = ni_name / vf
5413 ratio_mult_vf_name = ratio * vf */
5414 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
5415 &ratio_mult_vf_name, ratio);
5417 loop_num = loop->num;
5419 /* Analyze cost to set threshhold for vectorized loop. */
5420 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
5421 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
5422 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5424 /* Use the cost model only if it is more conservative than user specified
5427 th = (unsigned) min_scalar_loop_bound;
5428 if (min_profitable_iters
5429 && (!min_scalar_loop_bound
5430 || min_profitable_iters > min_scalar_loop_bound))
5431 th = (unsigned) min_profitable_iters;
5433 if (min_profitable_iters
5434 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5435 && vect_print_dump_info (REPORT_DETAILS))
5436 fprintf (vect_dump, "vectorization may not be profitable.");
5438 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
5439 ratio_mult_vf_name, ni_name, false,
5441 gcc_assert (new_loop);
5442 gcc_assert (loop_num == loop->num);
5443 #ifdef ENABLE_CHECKING
5444 slpeel_verify_cfg_after_peeling (loop, new_loop);
5447 /* A guard that controls whether the new_loop is to be executed or skipped
5448 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
5449 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
5450 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
5451 is on the path where the LOOP IVs are used and need to be updated. */
5453 preheader = loop_preheader_edge (new_loop)->src;
5454 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
5455 update_e = EDGE_PRED (preheader, 0);
5457 update_e = EDGE_PRED (preheader, 1);
5459 /* Update IVs of original loop as if they were advanced
5460 by ratio_mult_vf_name steps. */
5461 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
5463 /* After peeling we have to reset scalar evolution analyzer. */
5466 free_original_copy_tables ();
5470 /* Function vect_gen_niters_for_prolog_loop
5472 Set the number of iterations for the loop represented by LOOP_VINFO
5473 to the minimum between LOOP_NITERS (the original iteration count of the loop)
5474 and the misalignment of DR - the data reference recorded in
5475 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
5476 this loop, the data reference DR will refer to an aligned location.
5478 The following computation is generated:
5480 If the misalignment of DR is known at compile time:
5481 addr_mis = int mis = DR_MISALIGNMENT (dr);
5482 Else, compute address misalignment in bytes:
5483 addr_mis = addr & (vectype_size - 1)
5485 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
5487 (elem_size = element type size; an element is the scalar element
5488 whose type is the inner type of the vectype)
5492 prolog_niters = min ( LOOP_NITERS ,
5493 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
5494 where group_size is the size of the interleaved group.
5496 The above formulas assume that VF == number of elements in the vector. This
5497 may not hold when there are multiple-types in the loop.
5498 In this case, for some data-references in the loop the VF does not represent
5499 the number of elements that fit in the vector. Therefore, instead of VF we
5500 use TYPE_VECTOR_SUBPARTS. */
5503 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
5505 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
5506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5508 tree iters, iters_name;
5511 tree dr_stmt = DR_STMT (dr);
5512 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
5513 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5514 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
5515 tree niters_type = TREE_TYPE (loop_niters);
5517 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
5518 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
5520 if (DR_GROUP_FIRST_DR (stmt_info))
5522 /* For interleaved access element size must be multiplied by the size of
5523 the interleaved group. */
5524 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
5525 DR_GROUP_FIRST_DR (stmt_info)));
5526 element_size *= group_size;
5529 pe = loop_preheader_edge (loop);
5531 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
5533 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
5534 int elem_misalign = byte_misalign / element_size;
5536 if (vect_print_dump_info (REPORT_DETAILS))
5537 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
5538 iters = build_int_cst (niters_type,
5539 (nelements - elem_misalign)&(nelements/group_size-1));
5543 tree new_stmts = NULL_TREE;
5545 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
5546 tree ptr_type = TREE_TYPE (start_addr);
5547 tree size = TYPE_SIZE (ptr_type);
5548 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
5549 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
5550 tree elem_size_log =
5551 build_int_cst (type, exact_log2 (vectype_align/nelements));
5552 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
5553 tree nelements_tree = build_int_cst (type, nelements);
5557 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
5558 gcc_assert (!new_bb);
5560 /* Create: byte_misalign = addr & (vectype_size - 1) */
5562 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
5564 /* Create: elem_misalign = byte_misalign / element_size */
5566 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
5568 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
5569 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
5570 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
5571 iters = fold_convert (niters_type, iters);
5574 /* Create: prolog_loop_niters = min (iters, loop_niters) */
5575 /* If the loop bound is known at compile time we already verified that it is
5576 greater than vf; since the misalignment ('iters') is at most vf, there's
5577 no need to generate the MIN_EXPR in this case. */
5578 if (TREE_CODE (loop_niters) != INTEGER_CST)
5579 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
5581 if (vect_print_dump_info (REPORT_DETAILS))
5583 fprintf (vect_dump, "niters for prolog loop: ");
5584 print_generic_expr (vect_dump, iters, TDF_SLIM);
5587 var = create_tmp_var (niters_type, "prolog_loop_niters");
5588 add_referenced_var (var);
5589 iters_name = force_gimple_operand (iters, &stmt, false, var);
5591 /* Insert stmt on loop preheader edge. */
5594 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5595 gcc_assert (!new_bb);
5602 /* Function vect_update_init_of_dr
5604 NITERS iterations were peeled from LOOP. DR represents a data reference
5605 in LOOP. This function updates the information recorded in DR to
5606 account for the fact that the first NITERS iterations had already been
5607 executed. Specifically, it updates the OFFSET field of DR. */
5610 vect_update_init_of_dr (struct data_reference *dr, tree niters)
5612 tree offset = DR_OFFSET (dr);
5614 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
5615 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
5616 DR_OFFSET (dr) = offset;
5620 /* Function vect_update_inits_of_drs
5622 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
5623 This function updates the information recorded for the data references in
5624 the loop to account for the fact that the first NITERS iterations had
5625 already been executed. Specifically, it updates the initial_condition of
5626 the access_function of all the data_references in the loop. */
5629 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
5632 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
5633 struct data_reference *dr;
5635 if (vect_print_dump_info (REPORT_DETAILS))
5636 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
5638 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
5639 vect_update_init_of_dr (dr, niters);
5643 /* Function vect_do_peeling_for_alignment
5645 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
5646 'niters' is set to the misalignment of one of the data references in the
5647 loop, thereby forcing it to refer to an aligned location at the beginning
5648 of the execution of this loop. The data reference for which we are
5649 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
5652 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
5654 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5655 tree niters_of_prolog_loop, ni_name;
5657 struct loop *new_loop;
5659 if (vect_print_dump_info (REPORT_DETAILS))
5660 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
5662 initialize_original_copy_tables ();
5664 ni_name = vect_build_loop_niters (loop_vinfo);
5665 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
5667 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
5669 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
5670 niters_of_prolog_loop, ni_name, true, 0);
5671 gcc_assert (new_loop);
5672 #ifdef ENABLE_CHECKING
5673 slpeel_verify_cfg_after_peeling (new_loop, loop);
5676 /* Update number of times loop executes. */
5677 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
5678 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
5679 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
5681 /* Update the init conditions of the access functions of all data refs. */
5682 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
5684 /* After peeling we have to reset scalar evolution analyzer. */
5687 free_original_copy_tables ();
5691 /* Function vect_create_cond_for_align_checks.
5693 Create a conditional expression that represents the alignment checks for
5694 all of data references (array element references) whose alignment must be
5698 LOOP_VINFO - two fields of the loop information are used.
5699 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
5700 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
5703 COND_EXPR_STMT_LIST - statements needed to construct the conditional
5705 The returned value is the conditional expression to be used in the if
5706 statement that controls which version of the loop gets executed at runtime.
5708 The algorithm makes two assumptions:
5709 1) The number of bytes "n" in a vector is a power of 2.
5710 2) An address "a" is aligned if a%n is zero and that this
5711 test can be done as a&(n-1) == 0. For example, for 16
5712 byte vectors the test is a&0xf == 0. */
5715 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
5716 tree *cond_expr_stmt_list)
5718 VEC(tree,heap) *may_misalign_stmts
5719 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
5721 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
5725 tree int_ptrsize_type;
5727 tree or_tmp_name = NULL_TREE;
5728 tree and_tmp, and_tmp_name, and_stmt;
5731 /* Check that mask is one less than a power of 2, i.e., mask is
5732 all zeros followed by all ones. */
5733 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
5735 /* CHECKME: what is the best integer or unsigned type to use to hold a
5736 cast from a pointer value? */
5737 psize = TYPE_SIZE (ptr_type_node);
5739 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
5741 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
5742 of the first vector of the i'th data reference. */
5744 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
5746 tree new_stmt_list = NULL_TREE;
5748 tree addr_tmp, addr_tmp_name, addr_stmt;
5749 tree or_tmp, new_or_tmp_name, or_stmt;
5751 /* create: addr_tmp = (int)(address_of_first_vector) */
5752 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
5756 if (new_stmt_list != NULL_TREE)
5757 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
5759 sprintf (tmp_name, "%s%d", "addr2int", i);
5760 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5761 add_referenced_var (addr_tmp);
5762 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
5763 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
5764 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
5765 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
5766 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
5768 /* The addresses are OR together. */
5770 if (or_tmp_name != NULL_TREE)
5772 /* create: or_tmp = or_tmp | addr_tmp */
5773 sprintf (tmp_name, "%s%d", "orptrs", i);
5774 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5775 add_referenced_var (or_tmp);
5776 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
5777 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
5778 or_tmp_name, addr_tmp_name);
5779 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
5780 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
5781 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
5782 or_tmp_name = new_or_tmp_name;
5785 or_tmp_name = addr_tmp_name;
5789 mask_cst = build_int_cst (int_ptrsize_type, mask);
5791 /* create: and_tmp = or_tmp & mask */
5792 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
5793 add_referenced_var (and_tmp);
5794 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
5796 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
5797 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
5798 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
5799 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
5801 /* Make and_tmp the left operand of the conditional test against zero.
5802 if and_tmp has a nonzero bit then some address is unaligned. */
5803 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
5804 return build2 (EQ_EXPR, boolean_type_node,
5805 and_tmp_name, ptrsize_zero);
5808 /* Function vect_vfa_segment_size.
5810 Create an expression that computes the size of segment
5811 that will be accessed for a data reference. The functions takes into
5812 account that realignment loads may access one more vector.
5815 DR: The data reference.
5816 VECT_FACTOR: vectorization factor.
5818 Return an exrpession whose value is the size of segment which will be
5822 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
5824 tree segment_length;
5826 if (vect_supportable_dr_alignment (dr) == dr_unaligned_software_pipeline)
5829 build_int_cst (integer_type_node,
5830 GET_MODE_SIZE (TYPE_MODE (STMT_VINFO_VECTYPE
5831 (vinfo_for_stmt (DR_STMT (dr))))));
5834 fold_convert (sizetype,
5835 fold_build2 (PLUS_EXPR, integer_type_node,
5836 fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
5845 fold_convert (sizetype,
5846 fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
5850 return segment_length;
5853 /* Function vect_create_cond_for_alias_checks.
5855 Create a conditional expression that represents the run-time checks for
5856 overlapping of address ranges represented by a list of data references
5857 relations passed as input.
5860 COND_EXPR - input conditional expression. New conditions will be chained
5861 with logical and operation.
5862 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
5866 COND_EXPR - conditional expression.
5867 COND_EXPR_STMT_LIST - statements needed to construct the conditional
5869 The returned value is the conditional expression to be used in the if
5870 statement that controls which version of the loop gets executed at runtime.
5874 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
5876 tree * cond_expr_stmt_list)
5878 VEC (ddr_p, heap) * may_alias_ddrs =
5879 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
5881 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
5885 tree part_cond_expr;
5887 /* Create expression
5888 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
5889 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
5893 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
5894 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
5896 if (VEC_empty (ddr_p, may_alias_ddrs))
5899 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
5901 tree stmt_a = DR_STMT (DDR_A (ddr));
5902 tree stmt_b = DR_STMT (DDR_B (ddr));
5905 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
5908 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
5911 tree segment_length_a = vect_vfa_segment_size (DDR_A (ddr), vect_factor);
5912 tree segment_length_b = vect_vfa_segment_size (DDR_B (ddr), vect_factor);
5914 if (vect_print_dump_info (REPORT_DR_DETAILS))
5917 "create runtime check for data references ");
5918 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
5919 fprintf (vect_dump, " and ");
5920 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
5925 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
5926 fold_build2 (LT_EXPR, boolean_type_node,
5927 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
5931 fold_build2 (LT_EXPR, boolean_type_node,
5932 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
5938 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
5939 *cond_expr, part_cond_expr);
5941 *cond_expr = part_cond_expr;
5943 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
5944 fprintf (vect_dump, "created %u versioning for alias checks.\n",
5945 VEC_length (ddr_p, may_alias_ddrs));
5949 /* Function vect_transform_loop.
5951 The analysis phase has determined that the loop is vectorizable.
5952 Vectorize the loop - created vectorized stmts to replace the scalar
5953 stmts in the loop, and update the loop exit condition. */
5956 vect_transform_loop (loop_vec_info loop_vinfo)
5958 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5959 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5960 int nbbs = loop->num_nodes;
5961 block_stmt_iterator si, next_si;
5964 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5967 if (vect_print_dump_info (REPORT_DETAILS))
5968 fprintf (vect_dump, "=== vec_transform_loop ===");
5970 /* If the loop has data references that may or may not be aligned or/and
5971 has data reference relations whose independence was not proven then
5972 two versions of the loop need to be generated, one which is vectorized
5973 and one which isn't. A test is then generated to control which of the
5974 loops is executed. The test checks for the alignment of all of the
5975 data references that may or may not be aligned. An additional
5976 sequence of runtime tests is generated for each pairs of DDRs whose
5977 independence was not proven. The vectorized version of loop is
5978 executed only if both alias and alignment tests are passed. */
5980 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
5981 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
5984 tree cond_expr = NULL_TREE;
5985 tree cond_expr_stmt_list = NULL_TREE;
5986 basic_block condition_bb;
5987 block_stmt_iterator cond_exp_bsi;
5988 basic_block merge_bb;
5989 basic_block new_exit_bb;
5991 tree orig_phi, new_phi, arg;
5992 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
5993 tree gimplify_stmt_list;
5995 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
5997 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
5999 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
6000 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
6001 &cond_expr_stmt_list);
6004 fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
6006 force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
6008 append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
6010 initialize_original_copy_tables ();
6011 nloop = loop_version (loop, cond_expr, &condition_bb,
6012 prob, prob, REG_BR_PROB_BASE - prob, true);
6013 free_original_copy_tables();
6015 /** Loop versioning violates an assumption we try to maintain during
6016 vectorization - that the loop exit block has a single predecessor.
6017 After versioning, the exit block of both loop versions is the same
6018 basic block (i.e. it has two predecessors). Just in order to simplify
6019 following transformations in the vectorizer, we fix this situation
6020 here by adding a new (empty) block on the exit-edge of the loop,
6021 with the proper loop-exit phis to maintain loop-closed-form. **/
6023 merge_bb = single_exit (loop)->dest;
6024 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
6025 new_exit_bb = split_edge (single_exit (loop));
6026 new_exit_e = single_exit (loop);
6027 e = EDGE_SUCC (new_exit_bb, 0);
6029 for (orig_phi = phi_nodes (merge_bb); orig_phi;
6030 orig_phi = PHI_CHAIN (orig_phi))
6032 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
6034 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
6035 add_phi_arg (new_phi, arg, new_exit_e);
6036 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
6039 /** end loop-exit-fixes after versioning **/
6041 update_ssa (TODO_update_ssa);
6042 cond_exp_bsi = bsi_last (condition_bb);
6043 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
6046 /* CHECKME: we wouldn't need this if we called update_ssa once
6048 bitmap_zero (vect_memsyms_to_rename);
6050 /* Peel the loop if there are data refs with unknown alignment.
6051 Only one data ref with unknown store is allowed. */
6053 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
6054 vect_do_peeling_for_alignment (loop_vinfo);
6056 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6057 compile time constant), or it is a constant that doesn't divide by the
6058 vectorization factor, then an epilog loop needs to be created.
6059 We therefore duplicate the loop: the original loop will be vectorized,
6060 and will compute the first (n/VF) iterations. The second copy of the loop
6061 will remain scalar and will compute the remaining (n%VF) iterations.
6062 (VF is the vectorization factor). */
6064 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
6065 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
6066 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
6067 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
6069 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6070 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6072 /* 1) Make sure the loop header has exactly two entries
6073 2) Make sure we have a preheader basic block. */
6075 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6077 split_edge (loop_preheader_edge (loop));
6079 /* FORNOW: the vectorizer supports only loops which body consist
6080 of one basic block (header + empty latch). When the vectorizer will
6081 support more involved loop forms, the order by which the BBs are
6082 traversed need to be reconsidered. */
6084 for (i = 0; i < nbbs; i++)
6086 basic_block bb = bbs[i];
6087 stmt_vec_info stmt_info;
6090 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
6092 if (vect_print_dump_info (REPORT_DETAILS))
6094 fprintf (vect_dump, "------>vectorizing phi: ");
6095 print_generic_expr (vect_dump, phi, TDF_SLIM);
6097 stmt_info = vinfo_for_stmt (phi);
6100 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6101 && !STMT_VINFO_LIVE_P (stmt_info))
6104 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6105 != (unsigned HOST_WIDE_INT) vectorization_factor)
6106 && vect_print_dump_info (REPORT_DETAILS))
6107 fprintf (vect_dump, "multiple-types.");
6109 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6111 if (vect_print_dump_info (REPORT_DETAILS))
6112 fprintf (vect_dump, "transform phi.");
6113 vect_transform_stmt (phi, NULL, NULL);
6117 for (si = bsi_start (bb); !bsi_end_p (si);)
6119 tree stmt = bsi_stmt (si);
6122 if (vect_print_dump_info (REPORT_DETAILS))
6124 fprintf (vect_dump, "------>vectorizing statement: ");
6125 print_generic_expr (vect_dump, stmt, TDF_SLIM);
6127 stmt_info = vinfo_for_stmt (stmt);
6128 gcc_assert (stmt_info);
6129 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6130 && !STMT_VINFO_LIVE_P (stmt_info))
6136 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
6137 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6138 != (unsigned HOST_WIDE_INT) vectorization_factor)
6139 && vect_print_dump_info (REPORT_DETAILS))
6140 fprintf (vect_dump, "multiple-types.");
6142 /* -------- vectorize statement ------------ */
6143 if (vect_print_dump_info (REPORT_DETAILS))
6144 fprintf (vect_dump, "transform statement.");
6146 strided_store = false;
6147 is_store = vect_transform_stmt (stmt, &si, &strided_store);
6151 if (DR_GROUP_FIRST_DR (stmt_info))
6153 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6154 interleaving chain was completed - free all the stores in
6156 tree next = DR_GROUP_FIRST_DR (stmt_info);
6158 stmt_vec_info next_stmt_info;
6162 next_si = bsi_for_stmt (next);
6163 next_stmt_info = vinfo_for_stmt (next);
6164 /* Free the attached stmt_vec_info and remove the stmt. */
6165 ann = stmt_ann (next);
6166 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
6167 free (next_stmt_info);
6168 set_stmt_info (ann, NULL);
6169 bsi_remove (&next_si, true);
6172 bsi_remove (&si, true);
6177 /* Free the attached stmt_vec_info and remove the stmt. */
6178 ann = stmt_ann (stmt);
6180 set_stmt_info (ann, NULL);
6181 bsi_remove (&si, true);
6189 slpeel_make_loop_iterate_ntimes (loop, ratio);
6191 mark_set_for_renaming (vect_memsyms_to_rename);
6193 /* The memory tags and pointers in vectorized statements need to
6194 have their SSA forms updated. FIXME, why can't this be delayed
6195 until all the loops have been transformed? */
6196 update_ssa (TODO_update_ssa);
6198 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
6199 fprintf (vect_dump, "LOOP VECTORIZED.");