1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
40 #include "tree-data-ref.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
45 #include "tree-pass.h"
49 /* Utility functions for the code transformation. */
50 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
58 static tree vect_init_vector (tree, tree, tree);
59 static void vect_finish_stmt_generation
60 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void update_vuses_to_preheader (tree, struct loop*);
63 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
64 static tree get_initial_def_for_reduction (tree, tree, tree *);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info, tree *, tree *, tree *);
69 static tree vect_build_loop_niters (loop_vec_info);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
73 static void vect_update_inits_of_drs (loop_vec_info, tree);
74 static int vect_min_worthwhile_factor (enum tree_code);
78 cost_for_stmt (tree stmt)
80 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
82 switch (STMT_VINFO_TYPE (stmt_info))
84 case load_vec_info_type:
85 return TARG_SCALAR_LOAD_COST;
86 case store_vec_info_type:
87 return TARG_SCALAR_STORE_COST;
88 case op_vec_info_type:
89 case condition_vec_info_type:
90 case assignment_vec_info_type:
91 case reduc_vec_info_type:
92 case induc_vec_info_type:
93 case type_promotion_vec_info_type:
94 case type_demotion_vec_info_type:
95 case type_conversion_vec_info_type:
96 case call_vec_info_type:
97 return TARG_SCALAR_STMT_COST;
98 case undef_vec_info_type:
105 /* Function vect_estimate_min_profitable_iters
107 Return the number of iterations required for the vector version of the
108 loop to be profitable relative to the cost of the scalar version of the
111 TODO: Take profile info into account before making vectorization
112 decisions, if available. */
115 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
118 int min_profitable_iters;
119 int peel_iters_prologue;
120 int peel_iters_epilogue;
121 int vec_inside_cost = 0;
122 int vec_outside_cost = 0;
123 int scalar_single_iter_cost = 0;
124 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
125 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
126 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
127 int nbbs = loop->num_nodes;
130 /* Cost model disabled. */
131 if (!flag_vect_cost_model)
133 if (vect_print_dump_info (REPORT_DETAILS))
134 fprintf (vect_dump, "cost model disabled.");
138 /* Requires loop versioning tests to handle misalignment.
139 FIXME: Make cost depend on number of stmts in may_misalign list. */
141 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
143 vec_outside_cost += TARG_COND_BRANCH_COST;
144 if (vect_print_dump_info (REPORT_DETAILS))
145 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
149 /* Count statements in scalar loop. Using this as scalar cost for a single
152 TODO: Add outer loop support.
154 TODO: Consider assigning different costs to different scalar
157 for (i = 0; i < nbbs; i++)
159 block_stmt_iterator si;
160 basic_block bb = bbs[i];
162 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
164 tree stmt = bsi_stmt (si);
165 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
166 if (!STMT_VINFO_RELEVANT_P (stmt_info)
167 && !STMT_VINFO_LIVE_P (stmt_info))
169 scalar_single_iter_cost += cost_for_stmt (stmt);
170 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
171 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
175 /* Add additional cost for the peeled instructions in prologue and epilogue
178 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
179 at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
181 TODO: Build an expression that represents peel_iters for prologue and
182 epilogue to be used in a run-time test. */
184 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
186 if (byte_misalign < 0)
188 peel_iters_prologue = (vf - 1)/2;
189 if (vect_print_dump_info (REPORT_DETAILS))
190 fprintf (vect_dump, "cost model: "
191 "prologue peel iters set to (vf-1)/2.");
193 /* If peeling for alignment is unknown, loop bound of main loop becomes
195 peel_iters_epilogue = (vf - 1)/2;
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "cost model: "
198 "epilogue peel iters set to (vf-1)/2 because "
199 "peeling for alignment is unknown .");
205 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
206 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
207 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
208 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
210 peel_iters_prologue = nelements - (byte_misalign / element_size);
213 peel_iters_prologue = 0;
215 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
217 peel_iters_epilogue = (vf - 1)/2;
218 if (vect_print_dump_info (REPORT_DETAILS))
219 fprintf (vect_dump, "cost model: "
220 "epilogue peel iters set to (vf-1)/2 because "
221 "loop iterations are unknown .");
225 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
226 peel_iters_prologue = niters < peel_iters_prologue ?
227 niters : peel_iters_prologue;
228 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
232 /* Requires a prologue loop when peeling to handle misalignment. Add cost of
233 two guards, one for the peeled loop and one for the vector loop. */
235 if (peel_iters_prologue)
237 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
238 if (vect_print_dump_info (REPORT_DETAILS))
239 fprintf (vect_dump, "cost model: Adding cost of checks for "
243 /* Requires an epilogue loop to finish up remaining iterations after vector
244 loop. Add cost of two guards, one for the peeled loop and one for the
247 if (peel_iters_epilogue
248 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
249 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
251 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
252 if (vect_print_dump_info (REPORT_DETAILS))
253 fprintf (vect_dump, "cost model : Adding cost of checks for "
257 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
258 + (peel_iters_epilogue * scalar_single_iter_cost);
260 /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
261 information we provide for the target is whether testing against the
262 threshold involves a runtime test. */
263 if (targetm.vectorize.builtin_vectorization_cost)
265 bool runtime_test = false;
267 /* If the number of iterations is unknown, or the
268 peeling-for-misalignment amount is unknown, we eill have to generate
269 a runtime test to test the loop count agains the threshold. */
270 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
271 || (byte_misalign < 0))
274 targetm.vectorize.builtin_vectorization_cost (runtime_test);
275 if (vect_print_dump_info (REPORT_DETAILS))
276 fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
277 targetm.vectorize.builtin_vectorization_cost (runtime_test));
280 /* Calculate number of iterations required to make the vector version
281 profitable, relative to the loop bodies only. The following condition
282 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
283 SIC = scalar iteration cost, VIC = vector iteration cost,
284 VOC = vector outside cost and VF = vectorization factor. */
286 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
288 if (vec_outside_cost == 0)
289 min_profitable_iters = 1;
292 min_profitable_iters = (vec_outside_cost * vf)
293 / ((scalar_single_iter_cost * vf)
296 if ((scalar_single_iter_cost * vf * min_profitable_iters)
297 <= ((vec_inside_cost * min_profitable_iters)
298 + (vec_outside_cost * vf)))
299 min_profitable_iters++;
302 /* vector version will never be profitable. */
305 if (vect_print_dump_info (REPORT_DETAILS))
306 fprintf (vect_dump, "cost model: vector iteration cost = %d "
307 "is divisible by scalar iteration cost = %d by a factor "
308 "greater than or equal to the vectorization factor = %d .",
309 vec_inside_cost, scalar_single_iter_cost, vf);
313 if (vect_print_dump_info (REPORT_DETAILS))
315 fprintf (vect_dump, "Cost model analysis: \n");
316 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
318 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
320 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
321 fprintf (vect_dump, " prologue iterations: %d\n",
322 peel_iters_prologue);
323 fprintf (vect_dump, " epilogue iterations: %d\n",
324 peel_iters_epilogue);
325 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
326 min_profitable_iters);
327 fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
328 min_profitable_iters < vf ? vf : min_profitable_iters);
331 min_profitable_iters =
332 min_profitable_iters < vf ? vf : min_profitable_iters;
334 /* Because the condition we create is:
335 if (niters <= min_profitable_iters)
336 then skip the vectorized loop. */
337 min_profitable_iters--;
338 return min_profitable_iters;
342 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
343 functions. Design better to avoid maintenance issues. */
345 /* Function vect_model_reduction_cost.
347 Models cost for a reduction operation, including the vector ops
348 generated within the strip-mine loop, the initial definition before
349 the loop, and the epilogue code that must be generated. */
352 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
361 enum machine_mode mode;
362 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
363 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
365 /* Cost of reduction op inside loop. */
366 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
368 reduction_op = TREE_OPERAND (operation, op_type-1);
369 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
370 mode = TYPE_MODE (vectype);
371 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
374 orig_stmt = STMT_VINFO_STMT (stmt_info);
376 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
378 /* Add in cost for initial definition. */
379 outer_cost += TARG_SCALAR_TO_VEC_COST;
381 /* Determine cost of epilogue code.
383 We have a reduction operator that will reduce the vector in one statement.
384 Also requires scalar extract. */
386 if (reduc_code < NUM_TREE_CODES)
387 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
390 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
392 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
393 int element_bitsize = tree_low_cst (bitsize, 1);
394 int nelements = vec_size_in_bits / element_bitsize;
396 optab = optab_for_tree_code (code, vectype);
398 /* We have a whole vector shift available. */
399 if (VECTOR_MODE_P (mode)
400 && optab->handlers[mode].insn_code != CODE_FOR_nothing
401 && vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
402 /* Final reduction via vector shifts and the reduction operator. Also
403 requires scalar extract. */
404 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
405 + TARG_VEC_TO_SCALAR_COST);
407 /* Use extracts and reduction op for final reduction. For N elements,
408 we have N extracts and N-1 reduction ops. */
409 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
412 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
414 if (vect_print_dump_info (REPORT_DETAILS))
415 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
416 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
417 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
421 /* Function vect_model_induction_cost.
423 Models cost for induction operations. */
426 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
428 /* loop cost for vec_loop. */
429 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
430 /* prologue cost for vec_init and vec_step. */
431 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
433 if (vect_print_dump_info (REPORT_DETAILS))
434 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
435 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
436 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
440 /* Function vect_model_simple_cost.
442 Models cost for simple operations, i.e. those that only emit ncopies of a
443 single op. Right now, this does not account for multiple insns that could
444 be generated for the single vector op. We will handle that shortly. */
447 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt)
451 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
453 /* FORNOW: Assuming maximum 2 args per stmts. */
456 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
457 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST;
460 if (vect_print_dump_info (REPORT_DETAILS))
461 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
462 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
463 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
467 /* Function vect_cost_strided_group_size
469 For strided load or store, return the group_size only if it is the first
470 load or store of a group, else return 1. This ensures that group size is
471 only returned once per group. */
474 vect_cost_strided_group_size (stmt_vec_info stmt_info)
476 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
478 if (first_stmt == STMT_VINFO_STMT (stmt_info))
479 return DR_GROUP_SIZE (stmt_info);
485 /* Function vect_model_store_cost
487 Models cost for stores. In the case of strided accesses, one access
488 has the overhead of the strided access attributed to it. */
491 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt)
496 if (dt == vect_constant_def || dt == vect_invariant_def)
497 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST;
499 /* Strided access? */
500 if (DR_GROUP_FIRST_DR (stmt_info))
501 group_size = vect_cost_strided_group_size (stmt_info);
502 /* Not a strided access. */
506 /* Is this an access in a group of stores, which provide strided access?
507 If so, add in the cost of the permutes. */
510 /* Uses a high and low interleave operation for each needed permute. */
511 cost = ncopies * exact_log2(group_size) * group_size
512 * TARG_VEC_STMT_COST;
514 if (vect_print_dump_info (REPORT_DETAILS))
515 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
520 /* Costs of the stores. */
521 cost += ncopies * TARG_VEC_STORE_COST;
523 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
525 if (vect_print_dump_info (REPORT_DETAILS))
526 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
527 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
528 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
532 /* Function vect_model_load_cost
534 Models cost for loads. In the case of strided accesses, the last access
535 has the overhead of the strided access attributed to it. Since unaligned
536 accesses are supported for loads, we also account for the costs of the
537 access scheme chosen. */
540 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
545 int alignment_support_cheme;
547 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
549 /* Strided accesses? */
550 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
553 group_size = vect_cost_strided_group_size (stmt_info);
554 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
556 /* Not a strided access. */
563 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
565 /* Is this an access in a group of loads providing strided access?
566 If so, add in the cost of the permutes. */
569 /* Uses an even and odd extract operations for each needed permute. */
570 inner_cost = ncopies * exact_log2(group_size) * group_size
571 * TARG_VEC_STMT_COST;
573 if (vect_print_dump_info (REPORT_DETAILS))
574 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
579 /* The loads themselves. */
580 switch (alignment_support_cheme)
584 inner_cost += ncopies * TARG_VEC_LOAD_COST;
586 if (vect_print_dump_info (REPORT_DETAILS))
587 fprintf (vect_dump, "vect_model_load_cost: aligned.");
591 case dr_unaligned_supported:
593 /* Here, we assign an additional cost for the unaligned load. */
594 inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
596 if (vect_print_dump_info (REPORT_DETAILS))
597 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
602 case dr_unaligned_software_pipeline:
606 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
610 /* Unaligned software pipeline has a load of an address, an initial
611 load, and possibly a mask operation to "prime" the loop. However,
612 if this is an access in a group of loads, which provide strided
613 access, then the above cost should only be considered for one
614 access in the group. Inside the loop, there is a load op
615 and a realignment op. */
617 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
619 outer_cost = 2*TARG_VEC_STMT_COST;
620 if (targetm.vectorize.builtin_mask_for_load)
621 outer_cost += TARG_VEC_STMT_COST;
624 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
626 inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
635 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
637 if (vect_print_dump_info (REPORT_DETAILS))
638 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
639 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
640 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
645 /* Function vect_get_new_vect_var.
647 Returns a name for a new variable. The current naming scheme appends the
648 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
649 the name of vectorizer generated variables, and appends that to NAME if
653 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
660 case vect_simple_var:
663 case vect_scalar_var:
666 case vect_pointer_var:
674 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
676 new_vect_var = create_tmp_var (type, prefix);
678 /* Mark vector typed variable as a gimple register variable. */
679 if (TREE_CODE (type) == VECTOR_TYPE)
680 DECL_GIMPLE_REG_P (new_vect_var) = true;
686 /* Function vect_create_addr_base_for_vector_ref.
688 Create an expression that computes the address of the first memory location
689 that will be accessed for a data reference.
692 STMT: The statement containing the data reference.
693 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
694 OFFSET: Optional. If supplied, it is be added to the initial address.
697 1. Return an SSA_NAME whose value is the address of the memory location of
698 the first vector of the data reference.
699 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
700 these statement(s) which define the returned SSA_NAME.
702 FORNOW: We are only handling array accesses with step 1. */
705 vect_create_addr_base_for_vector_ref (tree stmt,
709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
710 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
711 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
712 tree base_name = build_fold_indirect_ref (data_ref_base);
714 tree addr_base, addr_expr;
716 tree base_offset = unshare_expr (DR_OFFSET (dr));
717 tree init = unshare_expr (DR_INIT (dr));
718 tree vect_ptr_type, addr_expr2;
720 /* Create base_offset */
721 base_offset = size_binop (PLUS_EXPR, base_offset, init);
722 base_offset = fold_convert (sizetype, base_offset);
723 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
724 add_referenced_var (dest);
725 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
726 append_to_statement_list_force (new_stmt, new_stmt_list);
730 tree tmp = create_tmp_var (sizetype, "offset");
733 /* For interleaved access step we divide STEP by the size of the
734 interleaving group. */
735 if (DR_GROUP_SIZE (stmt_info))
736 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
737 build_int_cst (TREE_TYPE (offset),
738 DR_GROUP_SIZE (stmt_info)));
742 add_referenced_var (tmp);
743 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
744 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
745 base_offset, offset);
746 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
747 append_to_statement_list_force (new_stmt, new_stmt_list);
750 /* base + base_offset */
751 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
754 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
756 /* addr_expr = addr_base */
757 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
758 get_name (base_name));
759 add_referenced_var (addr_expr);
760 vec_stmt = fold_convert (vect_ptr_type, addr_base);
761 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
762 get_name (base_name));
763 add_referenced_var (addr_expr2);
764 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
765 append_to_statement_list_force (new_stmt, new_stmt_list);
767 if (vect_print_dump_info (REPORT_DETAILS))
769 fprintf (vect_dump, "created ");
770 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
776 /* Function vect_create_data_ref_ptr.
778 Create a new pointer to vector type (vp), that points to the first location
779 accessed in the loop by STMT, along with the def-use update chain to
780 appropriately advance the pointer through the loop iterations. Also set
781 aliasing information for the pointer. This vector pointer is used by the
782 callers to this function to create a memory reference expression for vector
786 1. STMT: a stmt that references memory. Expected to be of the form
787 GIMPLE_MODIFY_STMT <name, data-ref> or
788 GIMPLE_MODIFY_STMT <data-ref, name>.
789 2. BSI: block_stmt_iterator where new stmts can be added.
790 3. OFFSET (optional): an offset to be added to the initial address accessed
791 by the data-ref in STMT.
792 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
793 pointing to the initial address.
794 5. TYPE: if not NULL indicates the required type of the data-ref
797 1. Declare a new ptr to vector_type, and have it point to the base of the
798 data reference (initial addressed accessed by the data reference).
799 For example, for vector of type V8HI, the following code is generated:
802 vp = (v8hi *)initial_address;
804 if OFFSET is not supplied:
805 initial_address = &a[init];
806 if OFFSET is supplied:
807 initial_address = &a[init + OFFSET];
809 Return the initial_address in INITIAL_ADDRESS.
811 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
812 update the pointer in each iteration of the loop.
814 Return the increment stmt that updates the pointer in PTR_INCR.
816 3. Return the pointer. */
819 vect_create_data_ref_ptr (tree stmt,
820 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
821 tree offset, tree *initial_address, tree *ptr_incr,
822 bool only_init, tree type)
825 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
826 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
828 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
834 tree new_stmt_list = NULL_TREE;
835 edge pe = loop_preheader_edge (loop);
838 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
840 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
842 if (vect_print_dump_info (REPORT_DETAILS))
844 tree data_ref_base = base_name;
845 fprintf (vect_dump, "create vector-pointer variable to type: ");
846 print_generic_expr (vect_dump, vectype, TDF_SLIM);
847 if (TREE_CODE (data_ref_base) == VAR_DECL)
848 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
849 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
850 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
851 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
852 fprintf (vect_dump, " vectorizing a record based array ref: ");
853 else if (TREE_CODE (data_ref_base) == SSA_NAME)
854 fprintf (vect_dump, " vectorizing a pointer ref: ");
855 print_generic_expr (vect_dump, base_name, TDF_SLIM);
858 /** (1) Create the new vector-pointer variable: **/
860 vect_ptr_type = build_pointer_type (type);
862 vect_ptr_type = build_pointer_type (vectype);
863 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
864 get_name (base_name));
865 add_referenced_var (vect_ptr);
867 /** (2) Add aliasing information to the new vector-pointer:
868 (The points-to info (DR_PTR_INFO) may be defined later.) **/
870 tag = DR_SYMBOL_TAG (dr);
873 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
874 tag must be created with tag added to its may alias list. */
876 new_type_alias (vect_ptr, tag, DR_REF (dr));
878 set_symbol_mem_tag (vect_ptr, tag);
880 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
882 /** (3) Calculate the initial address the vector-pointer, and set
883 the vector-pointer to point to it before the loop: **/
885 /* Create: (&(base[init_val+offset]) in the loop preheader. */
886 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
888 pe = loop_preheader_edge (loop);
889 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
890 gcc_assert (!new_bb);
891 *initial_address = new_temp;
893 /* Create: p = (vectype *) initial_base */
894 vec_stmt = fold_convert (vect_ptr_type, new_temp);
895 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
896 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
897 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
898 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
899 gcc_assert (!new_bb);
902 /** (4) Handle the updating of the vector-pointer inside the loop: **/
904 if (only_init) /* No update in loop is required. */
906 /* Copy the points-to information if it exists. */
907 if (DR_PTR_INFO (dr))
908 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
909 return vect_ptr_init;
913 block_stmt_iterator incr_bsi;
915 tree indx_before_incr, indx_after_incr;
918 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
919 create_iv (vect_ptr_init,
920 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
921 NULL_TREE, loop, &incr_bsi, insert_after,
922 &indx_before_incr, &indx_after_incr);
923 incr = bsi_stmt (incr_bsi);
924 set_stmt_info (stmt_ann (incr),
925 new_stmt_vec_info (incr, loop_vinfo));
927 /* Copy the points-to information if it exists. */
928 if (DR_PTR_INFO (dr))
930 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
931 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
933 merge_alias_info (vect_ptr_init, indx_before_incr);
934 merge_alias_info (vect_ptr_init, indx_after_incr);
938 return indx_before_incr;
943 /* Function bump_vector_ptr
945 Increment a pointer (to a vector type) by vector-size. Connect the new
946 increment stmt to the existing def-use update-chain of the pointer.
948 The pointer def-use update-chain before this function:
949 DATAREF_PTR = phi (p_0, p_2)
951 PTR_INCR: p_2 = DATAREF_PTR + step
953 The pointer def-use update-chain after this function:
954 DATAREF_PTR = phi (p_0, p_2)
956 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
958 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
961 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
963 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
964 The increment amount across iterations is also expected to be
966 BSI - location where the new update stmt is to be placed.
967 STMT - the original scalar memory-access stmt that is being vectorized.
969 Output: Return NEW_DATAREF_PTR as illustrated above.
974 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
977 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
978 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
979 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
980 tree vptr_type = TREE_TYPE (dataref_ptr);
981 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
982 tree update = TYPE_SIZE_UNIT (vectype);
986 tree new_dataref_ptr;
988 incr_stmt = build_gimple_modify_stmt (ptr_var,
989 build2 (POINTER_PLUS_EXPR, vptr_type,
990 dataref_ptr, update));
991 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
992 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
993 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
995 /* Update the vector-pointer's cross-iteration increment. */
996 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
998 tree use = USE_FROM_PTR (use_p);
1000 if (use == dataref_ptr)
1001 SET_USE (use_p, new_dataref_ptr);
1003 gcc_assert (tree_int_cst_compare (use, update) == 0);
1006 /* Copy the points-to information if it exists. */
1007 if (DR_PTR_INFO (dr))
1008 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1009 merge_alias_info (new_dataref_ptr, dataref_ptr);
1011 return new_dataref_ptr;
1015 /* Function vect_create_destination_var.
1017 Create a new temporary of type VECTYPE. */
1020 vect_create_destination_var (tree scalar_dest, tree vectype)
1023 const char *new_name;
1025 enum vect_var_kind kind;
1027 kind = vectype ? vect_simple_var : vect_scalar_var;
1028 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1030 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1032 new_name = get_name (scalar_dest);
1035 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1036 add_referenced_var (vec_dest);
1042 /* Function vect_init_vector.
1044 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1045 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1046 used in the vectorization of STMT. */
1049 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
1051 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1052 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1053 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1061 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1062 add_referenced_var (new_var);
1064 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1065 new_temp = make_ssa_name (new_var, init_stmt);
1066 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1068 pe = loop_preheader_edge (loop);
1069 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1070 gcc_assert (!new_bb);
1072 if (vect_print_dump_info (REPORT_DETAILS))
1074 fprintf (vect_dump, "created new init_stmt: ");
1075 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1078 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1083 /* Function get_initial_def_for_induction
1086 IV_PHI - the initial value of the induction variable
1089 Return a vector variable, initialized with the first VF values of
1090 the induction variable. E.g., for an iv with IV_PHI='X' and
1091 evolution S, for a vector of 4 units, we want to return:
1092 [X, X + S, X + 2*S, X + 3*S]. */
1095 get_initial_def_for_induction (tree iv_phi)
1097 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1098 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1099 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1100 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1101 tree vectype = get_vectype_for_scalar_type (scalar_type);
1102 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1103 edge pe = loop_preheader_edge (loop);
1105 block_stmt_iterator bsi;
1106 tree vec, vec_init, vec_step, t;
1111 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1112 tree init_expr, step_expr;
1113 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1116 int ncopies = vf / nunits;
1118 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1120 tree stmt = NULL_TREE;
1121 block_stmt_iterator si;
1122 basic_block bb = bb_for_stmt (iv_phi);
1124 gcc_assert (phi_info);
1125 gcc_assert (ncopies >= 1);
1127 /* Find the first insertion point in the BB. */
1128 si = bsi_after_labels (bb);
1129 stmt = bsi_stmt (si);
1131 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
1132 gcc_assert (access_fn);
1133 ok = vect_is_simple_iv_evolution (loop->num, access_fn,
1134 &init_expr, &step_expr);
1137 /* Create the vector that holds the initial_value of the induction. */
1138 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1139 add_referenced_var (new_var);
1141 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1144 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1145 gcc_assert (!new_bb);
1149 t = tree_cons (NULL_TREE, new_name, t);
1150 for (i = 1; i < nunits; i++)
1154 /* Create: new_name = new_name + step_expr */
1155 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1156 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1157 new_name = make_ssa_name (new_var, init_stmt);
1158 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1160 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1161 gcc_assert (!new_bb);
1163 if (vect_print_dump_info (REPORT_DETAILS))
1165 fprintf (vect_dump, "created new init_stmt: ");
1166 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1168 t = tree_cons (NULL_TREE, new_name, t);
1170 vec = build_constructor_from_list (vectype, nreverse (t));
1171 vec_init = vect_init_vector (stmt, vec, vectype);
1174 /* Create the vector that holds the step of the induction. */
1175 expr = build_int_cst (scalar_type, vf);
1176 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1178 for (i = 0; i < nunits; i++)
1179 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1180 vec = build_constructor_from_list (vectype, t);
1181 vec_step = vect_init_vector (stmt, vec, vectype);
1184 /* Create the following def-use cycle:
1186 vec_init = [X, X+S, X+2*S, X+3*S]
1187 vec_step = [VF*S, VF*S, VF*S, VF*S]
1189 vec_iv = PHI <vec_init, vec_loop>
1193 vec_loop = vec_iv + vec_step; */
1195 /* Create the induction-phi that defines the induction-operand. */
1196 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1197 add_referenced_var (vec_dest);
1198 induction_phi = create_phi_node (vec_dest, loop->header);
1199 set_stmt_info (get_stmt_ann (induction_phi),
1200 new_stmt_vec_info (induction_phi, loop_vinfo));
1201 induc_def = PHI_RESULT (induction_phi);
1203 /* Create the iv update inside the loop */
1204 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1205 build2 (PLUS_EXPR, vectype,
1206 induc_def, vec_step));
1207 vec_def = make_ssa_name (vec_dest, new_stmt);
1208 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1209 bsi = bsi_for_stmt (stmt);
1210 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1212 /* Set the arguments of the phi node: */
1213 add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
1214 add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
1217 /* In case the vectorization factor (VF) is bigger than the number
1218 of elements that we can fit in a vectype (nunits), we have to generate
1219 more than one vector stmt - i.e - we need to "unroll" the
1220 vector stmt by a factor VF/nunits. For more details see documentation
1221 in vectorizable_operation. */
1225 stmt_vec_info prev_stmt_vinfo;
1227 /* Create the vector that holds the step of the induction. */
1228 expr = build_int_cst (scalar_type, nunits);
1229 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1231 for (i = 0; i < nunits; i++)
1232 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1233 vec = build_constructor_from_list (vectype, t);
1234 vec_step = vect_init_vector (stmt, vec, vectype);
1236 vec_def = induc_def;
1237 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1238 for (i = 1; i < ncopies; i++)
1242 /* vec_i = vec_prev + vec_{step*nunits} */
1243 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1244 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1245 vec_def = make_ssa_name (vec_dest, new_stmt);
1246 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1247 bsi = bsi_for_stmt (stmt);
1248 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1250 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1251 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1255 if (vect_print_dump_info (REPORT_DETAILS))
1257 fprintf (vect_dump, "transform induction: created def-use cycle:");
1258 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1259 fprintf (vect_dump, "\n");
1260 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1263 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1268 /* Function vect_get_vec_def_for_operand.
1270 OP is an operand in STMT. This function returns a (vector) def that will be
1271 used in the vectorized stmt for STMT.
1273 In the case that OP is an SSA_NAME which is defined in the loop, then
1274 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1276 In case OP is an invariant or constant, a new stmt that creates a vector def
1277 needs to be introduced. */
1280 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1285 stmt_vec_info def_stmt_info = NULL;
1286 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1287 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1288 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1289 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1296 enum vect_def_type dt;
1300 if (vect_print_dump_info (REPORT_DETAILS))
1302 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1303 print_generic_expr (vect_dump, op, TDF_SLIM);
1306 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1307 gcc_assert (is_simple_use);
1308 if (vect_print_dump_info (REPORT_DETAILS))
1312 fprintf (vect_dump, "def = ");
1313 print_generic_expr (vect_dump, def, TDF_SLIM);
1317 fprintf (vect_dump, " def_stmt = ");
1318 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1324 /* Case 1: operand is a constant. */
1325 case vect_constant_def:
1330 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1331 if (vect_print_dump_info (REPORT_DETAILS))
1332 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1334 for (i = nunits - 1; i >= 0; --i)
1336 t = tree_cons (NULL_TREE, op, t);
1338 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1339 vec_cst = build_vector (vector_type, t);
1341 return vect_init_vector (stmt, vec_cst, vector_type);
1344 /* Case 2: operand is defined outside the loop - loop invariant. */
1345 case vect_invariant_def:
1350 /* Create 'vec_inv = {inv,inv,..,inv}' */
1351 if (vect_print_dump_info (REPORT_DETAILS))
1352 fprintf (vect_dump, "Create vector_inv.");
1354 for (i = nunits - 1; i >= 0; --i)
1356 t = tree_cons (NULL_TREE, def, t);
1359 /* FIXME: use build_constructor directly. */
1360 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1361 vec_inv = build_constructor_from_list (vector_type, t);
1363 return vect_init_vector (stmt, vec_inv, vector_type);
1366 /* Case 3: operand is defined inside the loop. */
1370 *scalar_def = def_stmt;
1372 /* Get the def from the vectorized stmt. */
1373 def_stmt_info = vinfo_for_stmt (def_stmt);
1374 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1375 gcc_assert (vec_stmt);
1376 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1380 /* Case 4: operand is defined by a loop header phi - reduction */
1381 case vect_reduction_def:
1383 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1385 /* Get the def before the loop */
1386 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1387 return get_initial_def_for_reduction (stmt, op, scalar_def);
1390 /* Case 5: operand is defined by loop-header phi - induction. */
1391 case vect_induction_def:
1393 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1395 /* Get the def before the loop */
1396 return get_initial_def_for_induction (def_stmt);
1405 /* Function vect_get_vec_def_for_stmt_copy
1407 Return a vector-def for an operand. This function is used when the
1408 vectorized stmt to be created (by the caller to this function) is a "copy"
1409 created in case the vectorized result cannot fit in one vector, and several
1410 copies of the vector-stmt are required. In this case the vector-def is
1411 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1412 of the stmt that defines VEC_OPRND.
1413 DT is the type of the vector def VEC_OPRND.
1416 In case the vectorization factor (VF) is bigger than the number
1417 of elements that can fit in a vectype (nunits), we have to generate
1418 more than one vector stmt to vectorize the scalar stmt. This situation
1419 arises when there are multiple data-types operated upon in the loop; the
1420 smallest data-type determines the VF, and as a result, when vectorizing
1421 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1422 vector stmt (each computing a vector of 'nunits' results, and together
1423 computing 'VF' results in each iteration). This function is called when
1424 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1425 which VF=16 and nunits=4, so the number of copies required is 4):
1427 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1429 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1430 VS1.1: vx.1 = memref1 VS1.2
1431 VS1.2: vx.2 = memref2 VS1.3
1432 VS1.3: vx.3 = memref3
1434 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1435 VSnew.1: vz1 = vx.1 + ... VSnew.2
1436 VSnew.2: vz2 = vx.2 + ... VSnew.3
1437 VSnew.3: vz3 = vx.3 + ...
1439 The vectorization of S1 is explained in vectorizable_load.
1440 The vectorization of S2:
1441 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1442 the function 'vect_get_vec_def_for_operand' is called to
1443 get the relevant vector-def for each operand of S2. For operand x it
1444 returns the vector-def 'vx.0'.
1446 To create the remaining copies of the vector-stmt (VSnew.j), this
1447 function is called to get the relevant vector-def for each operand. It is
1448 obtained from the respective VS1.j stmt, which is recorded in the
1449 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1451 For example, to obtain the vector-def 'vx.1' in order to create the
1452 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1453 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1454 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1455 and return its def ('vx.1').
1456 Overall, to create the above sequence this function will be called 3 times:
1457 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1458 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1459 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1462 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1464 tree vec_stmt_for_operand;
1465 stmt_vec_info def_stmt_info;
1467 /* Do nothing; can reuse same def. */
1468 if (dt == vect_invariant_def || dt == vect_constant_def )
1471 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1472 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1473 gcc_assert (def_stmt_info);
1474 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1475 gcc_assert (vec_stmt_for_operand);
1476 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1482 /* Function vect_finish_stmt_generation.
1484 Insert a new stmt. */
1487 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1488 block_stmt_iterator *bsi)
1490 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1491 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1493 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1494 set_stmt_info (get_stmt_ann (vec_stmt),
1495 new_stmt_vec_info (vec_stmt, loop_vinfo));
1497 if (vect_print_dump_info (REPORT_DETAILS))
1499 fprintf (vect_dump, "add new stmt: ");
1500 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
1503 /* Make sure bsi points to the stmt that is being vectorized. */
1504 gcc_assert (stmt == bsi_stmt (*bsi));
1506 #ifdef USE_MAPPED_LOCATION
1507 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
1509 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
1514 /* Function get_initial_def_for_reduction
1517 STMT - a stmt that performs a reduction operation in the loop.
1518 INIT_VAL - the initial value of the reduction variable
1521 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
1522 of the reduction (used for adjusting the epilog - see below).
1523 Return a vector variable, initialized according to the operation that STMT
1524 performs. This vector will be used as the initial value of the
1525 vector of partial results.
1527 Option1 (adjust in epilog): Initialize the vector as follows:
1530 min/max: [init_val,init_val,..,init_val,init_val]
1531 bit and/or: [init_val,init_val,..,init_val,init_val]
1532 and when necessary (e.g. add/mult case) let the caller know
1533 that it needs to adjust the result by init_val.
1535 Option2: Initialize the vector as follows:
1536 add: [0,0,...,0,init_val]
1537 mult: [1,1,...,1,init_val]
1538 min/max: [init_val,init_val,...,init_val]
1539 bit and/or: [init_val,init_val,...,init_val]
1540 and no adjustments are needed.
1542 For example, for the following code:
1548 STMT is 's = s + a[i]', and the reduction variable is 's'.
1549 For a vector of 4 units, we want to return either [0,0,0,init_val],
1550 or [0,0,0,0] and let the caller know that it needs to adjust
1551 the result at the end by 'init_val'.
1553 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
1554 initialization vector is simpler (same element in all entries).
1555 A cost model should help decide between these two schemes. */
1558 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
1560 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1561 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1562 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1563 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1564 tree type = TREE_TYPE (init_val);
1572 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
1573 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
1577 case WIDEN_SUM_EXPR:
1580 *adjustment_def = init_val;
1581 /* Create a vector of zeros for init_def. */
1582 if (INTEGRAL_TYPE_P (type))
1583 def_for_init = build_int_cst (type, 0);
1585 def_for_init = build_real (type, dconst0);
1586 for (i = nunits - 1; i >= 0; --i)
1587 t = tree_cons (NULL_TREE, def_for_init, t);
1588 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
1589 init_def = build_vector (vector_type, t);
1594 *adjustment_def = NULL_TREE;
1606 /* Function vect_create_epilog_for_reduction
1608 Create code at the loop-epilog to finalize the result of a reduction
1611 VECT_DEF is a vector of partial results.
1612 REDUC_CODE is the tree-code for the epilog reduction.
1613 STMT is the scalar reduction stmt that is being vectorized.
1614 REDUCTION_PHI is the phi-node that carries the reduction computation.
1617 1. Creates the reduction def-use cycle: sets the arguments for
1619 The loop-entry argument is the vectorized initial-value of the reduction.
1620 The loop-latch argument is VECT_DEF - the vector of partial sums.
1621 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1622 by applying the operation specified by REDUC_CODE if available, or by
1623 other means (whole-vector shifts or a scalar loop).
1624 The function also creates a new phi node at the loop exit to preserve
1625 loop-closed form, as illustrated below.
1627 The flow at the entry to this function:
1630 vec_def = phi <null, null> # REDUCTION_PHI
1631 VECT_DEF = vector_stmt # vectorized form of STMT
1632 s_loop = scalar_stmt # (scalar) STMT
1634 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1638 The above is transformed by this function into:
1641 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1642 VECT_DEF = vector_stmt # vectorized form of STMT
1643 s_loop = scalar_stmt # (scalar) STMT
1645 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1646 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1647 v_out2 = reduce <v_out1>
1648 s_out3 = extract_field <v_out2, 0>
1649 s_out4 = adjust_result <s_out3>
1655 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1656 enum tree_code reduc_code, tree reduction_phi)
1658 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1660 enum machine_mode mode;
1661 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1662 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1663 basic_block exit_bb;
1667 block_stmt_iterator exit_bsi;
1672 tree new_scalar_dest, exit_phi;
1673 tree bitsize, bitpos, bytesize;
1674 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1675 tree scalar_initial_def;
1676 tree vec_initial_def;
1678 imm_use_iterator imm_iter;
1679 use_operand_p use_p;
1680 bool extract_scalar_result;
1684 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
1687 op_type = TREE_OPERAND_LENGTH (operation);
1688 reduction_op = TREE_OPERAND (operation, op_type-1);
1689 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1690 mode = TYPE_MODE (vectype);
1692 /*** 1. Create the reduction def-use cycle ***/
1694 /* 1.1 set the loop-entry arg of the reduction-phi: */
1695 /* For the case of reduction, vect_get_vec_def_for_operand returns
1696 the scalar def before the loop, that defines the initial value
1697 of the reduction variable. */
1698 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1699 &scalar_initial_def);
1700 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1702 /* 1.2 set the loop-latch arg for the reduction-phi: */
1703 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1705 if (vect_print_dump_info (REPORT_DETAILS))
1707 fprintf (vect_dump, "transform reduction: created def-use cycle:");
1708 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1709 fprintf (vect_dump, "\n");
1710 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1714 /*** 2. Create epilog code
1715 The reduction epilog code operates across the elements of the vector
1716 of partial results computed by the vectorized loop.
1717 The reduction epilog code consists of:
1718 step 1: compute the scalar result in a vector (v_out2)
1719 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1720 step 3: adjust the scalar result (s_out3) if needed.
1722 Step 1 can be accomplished using one the following three schemes:
1723 (scheme 1) using reduc_code, if available.
1724 (scheme 2) using whole-vector shifts, if available.
1725 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1728 The overall epilog code looks like this:
1730 s_out0 = phi <s_loop> # original EXIT_PHI
1731 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1732 v_out2 = reduce <v_out1> # step 1
1733 s_out3 = extract_field <v_out2, 0> # step 2
1734 s_out4 = adjust_result <s_out3> # step 3
1736 (step 3 is optional, and step2 1 and 2 may be combined).
1737 Lastly, the uses of s_out0 are replaced by s_out4.
1741 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1742 v_out1 = phi <v_loop> */
1744 exit_bb = single_exit (loop)->dest;
1745 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1746 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1747 exit_bsi = bsi_after_labels (exit_bb);
1749 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1750 (i.e. when reduc_code is not available) and in the final adjustment
1751 code (if needed). Also get the original scalar reduction variable as
1752 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1753 represents a reduction pattern), the tree-code and scalar-def are
1754 taken from the original stmt that the pattern-stmt (STMT) replaces.
1755 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1756 are taken from STMT. */
1758 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1761 /* Regular reduction */
1766 /* Reduction pattern */
1767 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1768 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1769 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1771 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1772 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1773 scalar_type = TREE_TYPE (scalar_dest);
1774 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1775 bitsize = TYPE_SIZE (scalar_type);
1776 bytesize = TYPE_SIZE_UNIT (scalar_type);
1778 /* 2.3 Create the reduction code, using one of the three schemes described
1781 if (reduc_code < NUM_TREE_CODES)
1785 /*** Case 1: Create:
1786 v_out2 = reduc_expr <v_out1> */
1788 if (vect_print_dump_info (REPORT_DETAILS))
1789 fprintf (vect_dump, "Reduce using direct vector reduction.");
1791 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1792 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
1793 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1794 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1795 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1796 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1798 extract_scalar_result = true;
1802 enum tree_code shift_code = 0;
1803 bool have_whole_vector_shift = true;
1805 int element_bitsize = tree_low_cst (bitsize, 1);
1806 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1809 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1810 shift_code = VEC_RSHIFT_EXPR;
1812 have_whole_vector_shift = false;
1814 /* Regardless of whether we have a whole vector shift, if we're
1815 emulating the operation via tree-vect-generic, we don't want
1816 to use it. Only the first round of the reduction is likely
1817 to still be profitable via emulation. */
1818 /* ??? It might be better to emit a reduction tree code here, so that
1819 tree-vect-generic can expand the first round via bit tricks. */
1820 if (!VECTOR_MODE_P (mode))
1821 have_whole_vector_shift = false;
1824 optab optab = optab_for_tree_code (code, vectype);
1825 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1826 have_whole_vector_shift = false;
1829 if (have_whole_vector_shift)
1831 /*** Case 2: Create:
1832 for (offset = VS/2; offset >= element_size; offset/=2)
1834 Create: va' = vec_shift <va, offset>
1835 Create: va = vop <va, va'>
1838 if (vect_print_dump_info (REPORT_DETAILS))
1839 fprintf (vect_dump, "Reduce using vector shifts");
1841 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1842 new_temp = PHI_RESULT (new_phi);
1844 for (bit_offset = vec_size_in_bits/2;
1845 bit_offset >= element_bitsize;
1848 tree bitpos = size_int (bit_offset);
1849 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
1850 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1851 new_name = make_ssa_name (vec_dest, epilog_stmt);
1852 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1853 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1855 tmp = build2 (code, vectype, new_name, new_temp);
1856 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1857 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1858 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1859 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1862 extract_scalar_result = true;
1868 /*** Case 3: Create:
1869 s = extract_field <v_out2, 0>
1870 for (offset = element_size;
1871 offset < vector_size;
1872 offset += element_size;)
1874 Create: s' = extract_field <v_out2, offset>
1875 Create: s = op <s, s'>
1878 if (vect_print_dump_info (REPORT_DETAILS))
1879 fprintf (vect_dump, "Reduce using scalar code. ");
1881 vec_temp = PHI_RESULT (new_phi);
1882 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1883 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1885 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1886 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1887 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1888 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1889 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1891 for (bit_offset = element_bitsize;
1892 bit_offset < vec_size_in_bits;
1893 bit_offset += element_bitsize)
1896 tree bitpos = bitsize_int (bit_offset);
1897 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1900 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1901 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1902 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1903 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1904 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1906 tmp = build2 (code, scalar_type, new_name, new_temp);
1907 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1908 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1909 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1910 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1913 extract_scalar_result = false;
1917 /* 2.4 Extract the final scalar result. Create:
1918 s_out3 = extract_field <v_out2, bitpos> */
1920 if (extract_scalar_result)
1924 if (vect_print_dump_info (REPORT_DETAILS))
1925 fprintf (vect_dump, "extract scalar result");
1927 if (BYTES_BIG_ENDIAN)
1928 bitpos = size_binop (MULT_EXPR,
1929 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1930 TYPE_SIZE (scalar_type));
1932 bitpos = bitsize_zero_node;
1934 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1935 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1936 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1937 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1938 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1939 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1942 /* 2.4 Adjust the final result by the initial value of the reduction
1943 variable. (When such adjustment is not needed, then
1944 'scalar_initial_def' is zero).
1947 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1949 if (scalar_initial_def)
1951 tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
1952 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1953 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1954 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1955 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1958 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1960 /* Find the loop-closed-use at the loop exit of the original scalar result.
1961 (The reduction result is expected to have two immediate uses - one at the
1962 latch block, and one at the loop exit). */
1964 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1966 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1968 exit_phi = USE_STMT (use_p);
1972 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1973 gcc_assert (exit_phi);
1974 /* Replace the uses: */
1975 orig_name = PHI_RESULT (exit_phi);
1976 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1977 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1978 SET_USE (use_p, new_temp);
1982 /* Function vectorizable_reduction.
1984 Check if STMT performs a reduction operation that can be vectorized.
1985 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1986 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1987 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1989 This function also handles reduction idioms (patterns) that have been
1990 recognized in advance during vect_pattern_recog. In this case, STMT may be
1992 X = pattern_expr (arg0, arg1, ..., X)
1993 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1994 sequence that had been detected and replaced by the pattern-stmt (STMT).
1996 In some cases of reduction patterns, the type of the reduction variable X is
1997 different than the type of the other arguments of STMT.
1998 In such cases, the vectype that is used when transforming STMT into a vector
1999 stmt is different than the vectype that is used to determine the
2000 vectorization factor, because it consists of a different number of elements
2001 than the actual number of elements that are being operated upon in parallel.
2003 For example, consider an accumulation of shorts into an int accumulator.
2004 On some targets it's possible to vectorize this pattern operating on 8
2005 shorts at a time (hence, the vectype for purposes of determining the
2006 vectorization factor should be V8HI); on the other hand, the vectype that
2007 is used to create the vector form is actually V4SI (the type of the result).
2009 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2010 indicates what is the actual level of parallelism (V8HI in the example), so
2011 that the right vectorization factor would be derived. This vectype
2012 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2013 be used to create the vectorized stmt. The right vectype for the vectorized
2014 stmt is obtained from the type of the result X:
2015 get_vectype_for_scalar_type (TREE_TYPE (X))
2017 This means that, contrary to "regular" reductions (or "regular" stmts in
2018 general), the following equation:
2019 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2020 does *NOT* necessarily hold for reduction patterns. */
2023 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2028 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2030 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2032 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2034 enum tree_code code, orig_code, epilog_reduc_code = 0;
2035 enum machine_mode vec_mode;
2037 optab optab, reduc_optab;
2038 tree new_temp = NULL_TREE;
2040 enum vect_def_type dt;
2045 stmt_vec_info orig_stmt_info;
2046 tree expr = NULL_TREE;
2048 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2049 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2050 stmt_vec_info prev_stmt_info;
2052 tree new_stmt = NULL_TREE;
2055 gcc_assert (ncopies >= 1);
2057 /* 1. Is vectorizable reduction? */
2059 /* Not supportable if the reduction variable is used in the loop. */
2060 if (STMT_VINFO_RELEVANT_P (stmt_info))
2063 if (!STMT_VINFO_LIVE_P (stmt_info))
2066 /* Make sure it was already recognized as a reduction computation. */
2067 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2070 /* 2. Has this been recognized as a reduction pattern?
2072 Check if STMT represents a pattern that has been recognized
2073 in earlier analysis stages. For stmts that represent a pattern,
2074 the STMT_VINFO_RELATED_STMT field records the last stmt in
2075 the original sequence that constitutes the pattern. */
2077 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2080 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2081 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2082 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2083 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2086 /* 3. Check the operands of the operation. The first operands are defined
2087 inside the loop body. The last operand is the reduction variable,
2088 which is defined by the loop-header-phi. */
2090 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2092 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2093 code = TREE_CODE (operation);
2094 op_type = TREE_OPERAND_LENGTH (operation);
2095 if (op_type != binary_op && op_type != ternary_op)
2097 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2098 scalar_type = TREE_TYPE (scalar_dest);
2100 /* All uses but the last are expected to be defined in the loop.
2101 The last use is the reduction variable. */
2102 for (i = 0; i < op_type-1; i++)
2104 op = TREE_OPERAND (operation, i);
2105 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2106 gcc_assert (is_simple_use);
2107 if (dt != vect_loop_def
2108 && dt != vect_invariant_def
2109 && dt != vect_constant_def
2110 && dt != vect_induction_def)
2114 op = TREE_OPERAND (operation, i);
2115 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2116 gcc_assert (is_simple_use);
2117 gcc_assert (dt == vect_reduction_def);
2118 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2120 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
2122 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
2124 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2127 /* 4. Supportable by target? */
2129 /* 4.1. check support for the operation in the loop */
2130 optab = optab_for_tree_code (code, vectype);
2133 if (vect_print_dump_info (REPORT_DETAILS))
2134 fprintf (vect_dump, "no optab.");
2137 vec_mode = TYPE_MODE (vectype);
2138 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2140 if (vect_print_dump_info (REPORT_DETAILS))
2141 fprintf (vect_dump, "op not supported by target.");
2142 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2143 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2144 < vect_min_worthwhile_factor (code))
2146 if (vect_print_dump_info (REPORT_DETAILS))
2147 fprintf (vect_dump, "proceeding using word mode.");
2150 /* Worthwhile without SIMD support? */
2151 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2152 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2153 < vect_min_worthwhile_factor (code))
2155 if (vect_print_dump_info (REPORT_DETAILS))
2156 fprintf (vect_dump, "not worthwhile without SIMD support.");
2160 /* 4.2. Check support for the epilog operation.
2162 If STMT represents a reduction pattern, then the type of the
2163 reduction variable may be different than the type of the rest
2164 of the arguments. For example, consider the case of accumulation
2165 of shorts into an int accumulator; The original code:
2166 S1: int_a = (int) short_a;
2167 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2170 STMT: int_acc = widen_sum <short_a, int_acc>
2173 1. The tree-code that is used to create the vector operation in the
2174 epilog code (that reduces the partial results) is not the
2175 tree-code of STMT, but is rather the tree-code of the original
2176 stmt from the pattern that STMT is replacing. I.e, in the example
2177 above we want to use 'widen_sum' in the loop, but 'plus' in the
2179 2. The type (mode) we use to check available target support
2180 for the vector operation to be created in the *epilog*, is
2181 determined by the type of the reduction variable (in the example
2182 above we'd check this: plus_optab[vect_int_mode]).
2183 However the type (mode) we use to check available target support
2184 for the vector operation to be created *inside the loop*, is
2185 determined by the type of the other arguments to STMT (in the
2186 example we'd check this: widen_sum_optab[vect_short_mode]).
2188 This is contrary to "regular" reductions, in which the types of all
2189 the arguments are the same as the type of the reduction variable.
2190 For "regular" reductions we can therefore use the same vector type
2191 (and also the same tree-code) when generating the epilog code and
2192 when generating the code inside the loop. */
2196 /* This is a reduction pattern: get the vectype from the type of the
2197 reduction variable, and get the tree-code from orig_stmt. */
2198 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2199 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2200 vec_mode = TYPE_MODE (vectype);
2204 /* Regular reduction: use the same vectype and tree-code as used for
2205 the vector code inside the loop can be used for the epilog code. */
2209 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2211 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2214 if (vect_print_dump_info (REPORT_DETAILS))
2215 fprintf (vect_dump, "no optab for reduction.");
2216 epilog_reduc_code = NUM_TREE_CODES;
2218 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2220 if (vect_print_dump_info (REPORT_DETAILS))
2221 fprintf (vect_dump, "reduc op not supported by target.");
2222 epilog_reduc_code = NUM_TREE_CODES;
2225 if (!vec_stmt) /* transformation not required. */
2227 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2228 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2234 if (vect_print_dump_info (REPORT_DETAILS))
2235 fprintf (vect_dump, "transform reduction.");
2237 /* Create the destination vector */
2238 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2240 /* Create the reduction-phi that defines the reduction-operand. */
2241 new_phi = create_phi_node (vec_dest, loop->header);
2243 /* In case the vectorization factor (VF) is bigger than the number
2244 of elements that we can fit in a vectype (nunits), we have to generate
2245 more than one vector stmt - i.e - we need to "unroll" the
2246 vector stmt by a factor VF/nunits. For more details see documentation
2247 in vectorizable_operation. */
2249 prev_stmt_info = NULL;
2250 for (j = 0; j < ncopies; j++)
2255 op = TREE_OPERAND (operation, 0);
2256 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2257 if (op_type == ternary_op)
2259 op = TREE_OPERAND (operation, 1);
2260 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2263 /* Get the vector def for the reduction variable from the phi node */
2264 reduc_def = PHI_RESULT (new_phi);
2268 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2269 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2270 if (op_type == ternary_op)
2271 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2273 /* Get the vector def for the reduction variable from the vectorized
2274 reduction operation generated in the previous iteration (j-1) */
2275 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2278 /* Arguments are ready. create the new vector stmt. */
2279 if (op_type == binary_op)
2280 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2282 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2284 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2285 new_temp = make_ssa_name (vec_dest, new_stmt);
2286 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2287 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2290 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2292 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2293 prev_stmt_info = vinfo_for_stmt (new_stmt);
2296 /* Finalize the reduction-phi (set it's arguments) and create the
2297 epilog reduction code. */
2298 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2302 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2303 a function declaration if the target has a vectorized version
2304 of the function, or NULL_TREE if the function cannot be vectorized. */
2307 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2309 tree fndecl = get_callee_fndecl (call);
2310 enum built_in_function code;
2312 /* We only handle functions that do not read or clobber memory -- i.e.
2313 const or novops ones. */
2314 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2318 || TREE_CODE (fndecl) != FUNCTION_DECL
2319 || !DECL_BUILT_IN (fndecl))
2322 code = DECL_FUNCTION_CODE (fndecl);
2323 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2327 /* Function vectorizable_call.
2329 Check if STMT performs a function call that can be vectorized.
2330 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2331 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2332 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2335 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2341 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2342 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2343 tree vectype_out, vectype_in;
2346 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2347 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2348 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2350 int ncopies, j, nargs;
2351 call_expr_arg_iterator iter;
2353 enum { NARROW, NONE, WIDEN } modifier;
2355 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2358 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2361 /* FORNOW: not yet supported. */
2362 if (STMT_VINFO_LIVE_P (stmt_info))
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "value used after loop.");
2369 /* Is STMT a vectorizable call? */
2370 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2373 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2376 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2377 if (TREE_CODE (operation) != CALL_EXPR)
2380 /* Process function arguments. */
2381 rhs_type = NULL_TREE;
2383 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2385 /* Bail out if the function has more than two arguments, we
2386 do not have interesting builtin functions to vectorize with
2387 more than two arguments. */
2391 /* We can only handle calls with arguments of the same type. */
2393 && rhs_type != TREE_TYPE (op))
2395 if (vect_print_dump_info (REPORT_DETAILS))
2396 fprintf (vect_dump, "argument types differ.");
2399 rhs_type = TREE_TYPE (op);
2401 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
2403 if (vect_print_dump_info (REPORT_DETAILS))
2404 fprintf (vect_dump, "use not simple.");
2411 /* No arguments is also not good. */
2415 vectype_in = get_vectype_for_scalar_type (rhs_type);
2416 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2418 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2419 vectype_out = get_vectype_for_scalar_type (lhs_type);
2420 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2423 if (nunits_in == nunits_out / 2)
2425 else if (nunits_out == nunits_in)
2427 else if (nunits_out == nunits_in / 2)
2432 /* For now, we only vectorize functions if a target specific builtin
2433 is available. TODO -- in some cases, it might be profitable to
2434 insert the calls for pieces of the vector, in order to be able
2435 to vectorize other operations in the loop. */
2436 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
2437 if (fndecl == NULL_TREE)
2439 if (vect_print_dump_info (REPORT_DETAILS))
2440 fprintf (vect_dump, "function is not vectorizable.");
2445 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
2447 if (modifier == NARROW)
2448 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2450 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2452 /* Sanity check: make sure that at least one copy of the vectorized stmt
2453 needs to be generated. */
2454 gcc_assert (ncopies >= 1);
2456 if (!vec_stmt) /* transformation not required. */
2458 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2459 if (vect_print_dump_info (REPORT_DETAILS))
2460 fprintf (vect_dump, "=== vectorizable_call ===");
2461 vect_model_simple_cost (stmt_info, ncopies, dt);
2467 if (vect_print_dump_info (REPORT_DETAILS))
2468 fprintf (vect_dump, "transform operation.");
2471 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2472 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2474 prev_stmt_info = NULL;
2478 for (j = 0; j < ncopies; ++j)
2480 /* Build argument list for the vectorized call. */
2481 /* FIXME: Rewrite this so that it doesn't
2482 construct a temporary list. */
2485 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2489 = vect_get_vec_def_for_operand (op, stmt, NULL);
2492 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2494 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2498 vargs = nreverse (vargs);
2500 rhs = build_function_call_expr (fndecl, vargs);
2501 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2502 new_temp = make_ssa_name (vec_dest, new_stmt);
2503 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2505 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2508 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2510 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2512 prev_stmt_info = vinfo_for_stmt (new_stmt);
2518 for (j = 0; j < ncopies; ++j)
2520 /* Build argument list for the vectorized call. */
2521 /* FIXME: Rewrite this so that it doesn't
2522 construct a temporary list. */
2525 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2530 = vect_get_vec_def_for_operand (op, stmt, NULL);
2532 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2537 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
2539 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2542 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2543 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
2547 vargs = nreverse (vargs);
2549 rhs = build_function_call_expr (fndecl, vargs);
2550 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2551 new_temp = make_ssa_name (vec_dest, new_stmt);
2552 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2554 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2557 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2559 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2561 prev_stmt_info = vinfo_for_stmt (new_stmt);
2564 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2569 /* No current target implements this case. */
2573 /* The call in STMT might prevent it from being removed in dce.
2574 We however cannot remove it here, due to the way the ssa name
2575 it defines is mapped to the new definition. So just replace
2576 rhs of the statement with something harmless. */
2577 type = TREE_TYPE (scalar_dest);
2578 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
2585 /* Function vect_gen_widened_results_half
2587 Create a vector stmt whose code, type, number of arguments, and result
2588 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2589 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2590 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2591 needs to be created (DECL is a function-decl of a target-builtin).
2592 STMT is the original scalar stmt that we are vectorizing. */
2595 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2596 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2597 tree vec_dest, block_stmt_iterator *bsi,
2606 /* Generate half of the widened result: */
2607 if (code == CALL_EXPR)
2609 /* Target specific support */
2610 if (op_type == binary_op)
2611 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
2613 expr = build_call_expr (decl, 1, vec_oprnd0);
2617 /* Generic support */
2618 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2619 if (op_type == binary_op)
2620 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2622 expr = build1 (code, vectype, vec_oprnd0);
2624 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2625 new_temp = make_ssa_name (vec_dest, new_stmt);
2626 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2627 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2629 if (code == CALL_EXPR)
2631 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2633 if (TREE_CODE (sym) == SSA_NAME)
2634 sym = SSA_NAME_VAR (sym);
2635 mark_sym_for_renaming (sym);
2643 /* Function vectorizable_conversion.
2645 Check if STMT performs a conversion operation, that can be vectorized.
2646 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2647 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2648 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2651 vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
2658 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2659 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2660 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2661 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2662 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2665 enum vect_def_type dt0;
2667 stmt_vec_info prev_stmt_info;
2670 tree vectype_out, vectype_in;
2673 tree rhs_type, lhs_type;
2675 enum { NARROW, NONE, WIDEN } modifier;
2677 /* Is STMT a vectorizable conversion? */
2679 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2682 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2685 if (STMT_VINFO_LIVE_P (stmt_info))
2687 /* FORNOW: not yet supported. */
2688 if (vect_print_dump_info (REPORT_DETAILS))
2689 fprintf (vect_dump, "value used after loop.");
2693 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2696 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2699 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2700 code = TREE_CODE (operation);
2701 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2704 /* Check types of lhs and rhs */
2705 op0 = TREE_OPERAND (operation, 0);
2706 rhs_type = TREE_TYPE (op0);
2707 vectype_in = get_vectype_for_scalar_type (rhs_type);
2708 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2710 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2711 lhs_type = TREE_TYPE (scalar_dest);
2712 vectype_out = get_vectype_for_scalar_type (lhs_type);
2713 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2716 if (nunits_in == nunits_out / 2)
2718 else if (nunits_out == nunits_in)
2720 else if (nunits_out == nunits_in / 2)
2725 if (modifier == NONE)
2726 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
2728 /* Bail out if the types are both integral or non-integral */
2729 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
2730 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
2733 if (modifier == NARROW)
2734 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2736 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2738 /* Sanity check: make sure that at least one copy of the vectorized stmt
2739 needs to be generated. */
2740 gcc_assert (ncopies >= 1);
2742 /* Check the operands of the operation. */
2743 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2745 if (vect_print_dump_info (REPORT_DETAILS))
2746 fprintf (vect_dump, "use not simple.");
2750 /* Supportable by target? */
2751 if ((modifier == NONE
2752 && !targetm.vectorize.builtin_conversion (code, vectype_in))
2753 || (modifier == WIDEN
2754 && !supportable_widening_operation (code, stmt, vectype_in,
2757 || (modifier == NARROW
2758 && !supportable_narrowing_operation (code, stmt, vectype_in,
2761 if (vect_print_dump_info (REPORT_DETAILS))
2762 fprintf (vect_dump, "op not supported by target.");
2766 if (modifier != NONE)
2767 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2769 if (!vec_stmt) /* transformation not required. */
2771 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2776 if (vect_print_dump_info (REPORT_DETAILS))
2777 fprintf (vect_dump, "transform conversion.");
2780 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2782 prev_stmt_info = NULL;
2786 for (j = 0; j < ncopies; j++)
2792 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2794 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2797 targetm.vectorize.builtin_conversion (code, vectype_in);
2798 new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0);
2800 /* Arguments are ready. create the new vector stmt. */
2801 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
2802 new_temp = make_ssa_name (vec_dest, new_stmt);
2803 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2804 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2805 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2807 if (TREE_CODE (sym) == SSA_NAME)
2808 sym = SSA_NAME_VAR (sym);
2809 mark_sym_for_renaming (sym);
2813 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2815 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2816 prev_stmt_info = vinfo_for_stmt (new_stmt);
2821 /* In case the vectorization factor (VF) is bigger than the number
2822 of elements that we can fit in a vectype (nunits), we have to
2823 generate more than one vector stmt - i.e - we need to "unroll"
2824 the vector stmt by a factor VF/nunits. */
2825 for (j = 0; j < ncopies; j++)
2828 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2830 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2832 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2834 /* Generate first half of the widened result: */
2836 = vect_gen_widened_results_half (code1, vectype_out, decl1,
2837 vec_oprnd0, vec_oprnd1,
2838 unary_op, vec_dest, bsi, stmt);
2840 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2842 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2843 prev_stmt_info = vinfo_for_stmt (new_stmt);
2845 /* Generate second half of the widened result: */
2847 = vect_gen_widened_results_half (code2, vectype_out, decl2,
2848 vec_oprnd0, vec_oprnd1,
2849 unary_op, vec_dest, bsi, stmt);
2850 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2851 prev_stmt_info = vinfo_for_stmt (new_stmt);
2856 /* In case the vectorization factor (VF) is bigger than the number
2857 of elements that we can fit in a vectype (nunits), we have to
2858 generate more than one vector stmt - i.e - we need to "unroll"
2859 the vector stmt by a factor VF/nunits. */
2860 for (j = 0; j < ncopies; j++)
2865 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2866 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2870 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2871 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2874 /* Arguments are ready. Create the new vector stmt. */
2875 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2876 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2877 new_temp = make_ssa_name (vec_dest, new_stmt);
2878 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2879 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2882 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2884 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2886 prev_stmt_info = vinfo_for_stmt (new_stmt);
2889 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2895 /* Function vectorizable_assignment.
2897 Check if STMT performs an assignment (copy) that can be vectorized.
2898 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2899 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2900 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2903 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2909 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2910 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2911 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2914 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2915 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2916 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2918 gcc_assert (ncopies >= 1);
2920 return false; /* FORNOW */
2922 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2925 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2928 /* FORNOW: not yet supported. */
2929 if (STMT_VINFO_LIVE_P (stmt_info))
2931 if (vect_print_dump_info (REPORT_DETAILS))
2932 fprintf (vect_dump, "value used after loop.");
2936 /* Is vectorizable assignment? */
2937 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2940 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2941 if (TREE_CODE (scalar_dest) != SSA_NAME)
2944 op = GIMPLE_STMT_OPERAND (stmt, 1);
2945 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
2947 if (vect_print_dump_info (REPORT_DETAILS))
2948 fprintf (vect_dump, "use not simple.");
2952 if (!vec_stmt) /* transformation not required. */
2954 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2955 if (vect_print_dump_info (REPORT_DETAILS))
2956 fprintf (vect_dump, "=== vectorizable_assignment ===");
2957 vect_model_simple_cost (stmt_info, ncopies, dt);
2962 if (vect_print_dump_info (REPORT_DETAILS))
2963 fprintf (vect_dump, "transform assignment.");
2966 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2969 op = GIMPLE_STMT_OPERAND (stmt, 1);
2970 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2972 /* Arguments are ready. create the new vector stmt. */
2973 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd);
2974 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2975 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
2976 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2982 /* Function vect_min_worthwhile_factor.
2984 For a loop where we could vectorize the operation indicated by CODE,
2985 return the minimum vectorization factor that makes it worthwhile
2986 to use generic vectors. */
2988 vect_min_worthwhile_factor (enum tree_code code)
3009 /* Function vectorizable_induction
3011 Check if PHI performs an induction computation that can be vectorized.
3012 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3013 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3014 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3017 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3020 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3022 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3023 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3024 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3027 gcc_assert (ncopies >= 1);
3029 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3032 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3034 if (STMT_VINFO_LIVE_P (stmt_info))
3036 /* FORNOW: not yet supported. */
3037 if (vect_print_dump_info (REPORT_DETAILS))
3038 fprintf (vect_dump, "value used after loop.");
3042 if (TREE_CODE (phi) != PHI_NODE)
3045 if (!vec_stmt) /* transformation not required. */
3047 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3048 if (vect_print_dump_info (REPORT_DETAILS))
3049 fprintf (vect_dump, "=== vectorizable_induction ===");
3050 vect_model_induction_cost (stmt_info, ncopies);
3056 if (vect_print_dump_info (REPORT_DETAILS))
3057 fprintf (vect_dump, "transform induction phi.");
3059 vec_def = get_initial_def_for_induction (phi);
3060 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3065 /* Function vectorizable_operation.
3067 Check if STMT performs a binary or unary operation that can be vectorized.
3068 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3069 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3070 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3073 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3078 tree op0, op1 = NULL;
3079 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3080 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3081 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3082 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3083 enum tree_code code;
3084 enum machine_mode vec_mode;
3089 enum machine_mode optab_op2_mode;
3091 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3093 stmt_vec_info prev_stmt_info;
3094 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3097 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3100 gcc_assert (ncopies >= 1);
3102 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3105 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3108 /* FORNOW: not yet supported. */
3109 if (STMT_VINFO_LIVE_P (stmt_info))
3111 if (vect_print_dump_info (REPORT_DETAILS))
3112 fprintf (vect_dump, "value used after loop.");
3116 /* Is STMT a vectorizable binary/unary operation? */
3117 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3120 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3123 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3124 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3125 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3126 if (nunits_out != nunits_in)
3129 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3130 code = TREE_CODE (operation);
3132 /* For pointer addition, we should use the normal plus for
3133 the vector addition. */
3134 if (code == POINTER_PLUS_EXPR)
3137 optab = optab_for_tree_code (code, vectype);
3139 /* Support only unary or binary operations. */
3140 op_type = TREE_OPERAND_LENGTH (operation);
3141 if (op_type != unary_op && op_type != binary_op)
3143 if (vect_print_dump_info (REPORT_DETAILS))
3144 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3148 op0 = TREE_OPERAND (operation, 0);
3149 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3151 if (vect_print_dump_info (REPORT_DETAILS))
3152 fprintf (vect_dump, "use not simple.");
3156 if (op_type == binary_op)
3158 op1 = TREE_OPERAND (operation, 1);
3159 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3161 if (vect_print_dump_info (REPORT_DETAILS))
3162 fprintf (vect_dump, "use not simple.");
3167 /* Supportable by target? */
3170 if (vect_print_dump_info (REPORT_DETAILS))
3171 fprintf (vect_dump, "no optab.");
3174 vec_mode = TYPE_MODE (vectype);
3175 icode = (int) optab->handlers[(int) vec_mode].insn_code;
3176 if (icode == CODE_FOR_nothing)
3178 if (vect_print_dump_info (REPORT_DETAILS))
3179 fprintf (vect_dump, "op not supported by target.");
3180 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3181 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3182 < vect_min_worthwhile_factor (code))
3184 if (vect_print_dump_info (REPORT_DETAILS))
3185 fprintf (vect_dump, "proceeding using word mode.");
3188 /* Worthwhile without SIMD support? */
3189 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3190 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3191 < vect_min_worthwhile_factor (code))
3193 if (vect_print_dump_info (REPORT_DETAILS))
3194 fprintf (vect_dump, "not worthwhile without SIMD support.");
3198 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3200 /* FORNOW: not yet supported. */
3201 if (!VECTOR_MODE_P (vec_mode))
3204 /* Invariant argument is needed for a vector shift
3205 by a scalar shift operand. */
3206 optab_op2_mode = insn_data[icode].operand[2].mode;
3207 if (! (VECTOR_MODE_P (optab_op2_mode)
3208 || dt[1] == vect_constant_def
3209 || dt[1] == vect_invariant_def))
3211 if (vect_print_dump_info (REPORT_DETAILS))
3212 fprintf (vect_dump, "operand mode requires invariant argument.");
3217 if (!vec_stmt) /* transformation not required. */
3219 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3220 if (vect_print_dump_info (REPORT_DETAILS))
3221 fprintf (vect_dump, "=== vectorizable_operation ===");
3222 vect_model_simple_cost (stmt_info, ncopies, dt);
3228 if (vect_print_dump_info (REPORT_DETAILS))
3229 fprintf (vect_dump, "transform binary/unary operation.");
3232 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3234 /* In case the vectorization factor (VF) is bigger than the number
3235 of elements that we can fit in a vectype (nunits), we have to generate
3236 more than one vector stmt - i.e - we need to "unroll" the
3237 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3238 from one copy of the vector stmt to the next, in the field
3239 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3240 stages to find the correct vector defs to be used when vectorizing
3241 stmts that use the defs of the current stmt. The example below illustrates
3242 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3243 4 vectorized stmts):
3245 before vectorization:
3246 RELATED_STMT VEC_STMT
3250 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3252 RELATED_STMT VEC_STMT
3253 VS1_0: vx0 = memref0 VS1_1 -
3254 VS1_1: vx1 = memref1 VS1_2 -
3255 VS1_2: vx2 = memref2 VS1_3 -
3256 VS1_3: vx3 = memref3 - -
3257 S1: x = load - VS1_0
3260 step2: vectorize stmt S2 (done here):
3261 To vectorize stmt S2 we first need to find the relevant vector
3262 def for the first operand 'x'. This is, as usual, obtained from
3263 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3264 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3265 relevant vector def 'vx0'. Having found 'vx0' we can generate
3266 the vector stmt VS2_0, and as usual, record it in the
3267 STMT_VINFO_VEC_STMT of stmt S2.
3268 When creating the second copy (VS2_1), we obtain the relevant vector
3269 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3270 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3271 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3272 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3273 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3274 chain of stmts and pointers:
3275 RELATED_STMT VEC_STMT
3276 VS1_0: vx0 = memref0 VS1_1 -
3277 VS1_1: vx1 = memref1 VS1_2 -
3278 VS1_2: vx2 = memref2 VS1_3 -
3279 VS1_3: vx3 = memref3 - -
3280 S1: x = load - VS1_0
3281 VS2_0: vz0 = vx0 + v1 VS2_1 -
3282 VS2_1: vz1 = vx1 + v1 VS2_2 -
3283 VS2_2: vz2 = vx2 + v1 VS2_3 -
3284 VS2_3: vz3 = vx3 + v1 - -
3285 S2: z = x + 1 - VS2_0 */
3287 prev_stmt_info = NULL;
3288 for (j = 0; j < ncopies; j++)
3293 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3294 if (op_type == binary_op)
3296 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3298 /* Vector shl and shr insn patterns can be defined with
3299 scalar operand 2 (shift operand). In this case, use
3300 constant or loop invariant op1 directly, without
3301 extending it to vector mode first. */
3302 optab_op2_mode = insn_data[icode].operand[2].mode;
3303 if (!VECTOR_MODE_P (optab_op2_mode))
3305 if (vect_print_dump_info (REPORT_DETAILS))
3306 fprintf (vect_dump, "operand 1 using scalar mode.");
3311 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3316 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3317 if (op_type == binary_op)
3318 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3321 /* Arguments are ready. create the new vector stmt. */
3323 if (op_type == binary_op)
3324 new_stmt = build_gimple_modify_stmt (vec_dest,
3325 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
3327 new_stmt = build_gimple_modify_stmt (vec_dest,
3328 build1 (code, vectype, vec_oprnd0));
3329 new_temp = make_ssa_name (vec_dest, new_stmt);
3330 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3331 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3334 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3336 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3337 prev_stmt_info = vinfo_for_stmt (new_stmt);
3344 /* Function vectorizable_type_demotion
3346 Check if STMT performs a binary or unary operation that involves
3347 type demotion, and if it can be vectorized.
3348 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3349 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3350 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3353 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
3360 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3362 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3363 enum tree_code code, code1 = ERROR_MARK;
3366 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3368 stmt_vec_info prev_stmt_info;
3377 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3380 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3383 /* FORNOW: not yet supported. */
3384 if (STMT_VINFO_LIVE_P (stmt_info))
3386 if (vect_print_dump_info (REPORT_DETAILS))
3387 fprintf (vect_dump, "value used after loop.");
3391 /* Is STMT a vectorizable type-demotion operation? */
3392 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3395 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3398 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3399 code = TREE_CODE (operation);
3400 if (code != NOP_EXPR && code != CONVERT_EXPR)
3403 op0 = TREE_OPERAND (operation, 0);
3404 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3405 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3407 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3408 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3409 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3410 if (nunits_in != nunits_out / 2) /* FORNOW */
3413 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3414 gcc_assert (ncopies >= 1);
3416 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3417 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3418 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3419 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3420 && (code == NOP_EXPR || code == CONVERT_EXPR))))
3423 /* Check the operands of the operation. */
3424 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3426 if (vect_print_dump_info (REPORT_DETAILS))
3427 fprintf (vect_dump, "use not simple.");
3431 /* Supportable by target? */
3432 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
3435 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3437 if (!vec_stmt) /* transformation not required. */
3439 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3440 if (vect_print_dump_info (REPORT_DETAILS))
3441 fprintf (vect_dump, "=== vectorizable_demotion ===");
3442 vect_model_simple_cost (stmt_info, ncopies, dt);
3447 if (vect_print_dump_info (REPORT_DETAILS))
3448 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3452 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3454 /* In case the vectorization factor (VF) is bigger than the number
3455 of elements that we can fit in a vectype (nunits), we have to generate
3456 more than one vector stmt - i.e - we need to "unroll" the
3457 vector stmt by a factor VF/nunits. */
3458 prev_stmt_info = NULL;
3459 for (j = 0; j < ncopies; j++)
3464 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3465 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3469 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3470 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3473 /* Arguments are ready. Create the new vector stmt. */
3474 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3475 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3476 new_temp = make_ssa_name (vec_dest, new_stmt);
3477 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3478 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3481 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3483 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3485 prev_stmt_info = vinfo_for_stmt (new_stmt);
3488 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3493 /* Function vectorizable_type_promotion
3495 Check if STMT performs a binary or unary operation that involves
3496 type promotion, and if it can be vectorized.
3497 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3498 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3499 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3502 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
3508 tree op0, op1 = NULL;
3509 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3510 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3511 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3512 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3513 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3516 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3518 stmt_vec_info prev_stmt_info;
3526 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3529 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3532 /* FORNOW: not yet supported. */
3533 if (STMT_VINFO_LIVE_P (stmt_info))
3535 if (vect_print_dump_info (REPORT_DETAILS))
3536 fprintf (vect_dump, "value used after loop.");
3540 /* Is STMT a vectorizable type-promotion operation? */
3541 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3544 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3547 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3548 code = TREE_CODE (operation);
3549 if (code != NOP_EXPR && code != CONVERT_EXPR
3550 && code != WIDEN_MULT_EXPR)
3553 op0 = TREE_OPERAND (operation, 0);
3554 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3555 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3557 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3558 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3559 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3560 if (nunits_out != nunits_in / 2) /* FORNOW */
3563 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3564 gcc_assert (ncopies >= 1);
3566 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3567 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3568 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3569 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3570 && (code == CONVERT_EXPR || code == NOP_EXPR))))
3573 /* Check the operands of the operation. */
3574 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3576 if (vect_print_dump_info (REPORT_DETAILS))
3577 fprintf (vect_dump, "use not simple.");
3581 op_type = TREE_CODE_LENGTH (code);
3582 if (op_type == binary_op)
3584 op1 = TREE_OPERAND (operation, 1);
3585 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3587 if (vect_print_dump_info (REPORT_DETAILS))
3588 fprintf (vect_dump, "use not simple.");
3593 /* Supportable by target? */
3594 if (!supportable_widening_operation (code, stmt, vectype_in,
3595 &decl1, &decl2, &code1, &code2))
3598 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3600 if (!vec_stmt) /* transformation not required. */
3602 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3603 if (vect_print_dump_info (REPORT_DETAILS))
3604 fprintf (vect_dump, "=== vectorizable_promotion ===");
3605 vect_model_simple_cost (stmt_info, 2*ncopies, dt);
3611 if (vect_print_dump_info (REPORT_DETAILS))
3612 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3616 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3618 /* In case the vectorization factor (VF) is bigger than the number
3619 of elements that we can fit in a vectype (nunits), we have to generate
3620 more than one vector stmt - i.e - we need to "unroll" the
3621 vector stmt by a factor VF/nunits. */
3623 prev_stmt_info = NULL;
3624 for (j = 0; j < ncopies; j++)
3629 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3630 if (op_type == binary_op)
3631 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3635 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3636 if (op_type == binary_op)
3637 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3640 /* Arguments are ready. Create the new vector stmt. We are creating
3641 two vector defs because the widened result does not fit in one vector.
3642 The vectorized stmt can be expressed as a call to a taregt builtin,
3643 or a using a tree-code. */
3644 /* Generate first half of the widened result: */
3645 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
3646 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3648 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3650 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3651 prev_stmt_info = vinfo_for_stmt (new_stmt);
3653 /* Generate second half of the widened result: */
3654 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
3655 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3656 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3657 prev_stmt_info = vinfo_for_stmt (new_stmt);
3661 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3666 /* Function vect_strided_store_supported.
3668 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3669 and FALSE otherwise. */
3672 vect_strided_store_supported (tree vectype)
3674 optab interleave_high_optab, interleave_low_optab;
3677 mode = (int) TYPE_MODE (vectype);
3679 /* Check that the operation is supported. */
3680 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3682 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3684 if (!interleave_high_optab || !interleave_low_optab)
3686 if (vect_print_dump_info (REPORT_DETAILS))
3687 fprintf (vect_dump, "no optab for interleave.");
3691 if (interleave_high_optab->handlers[(int) mode].insn_code
3693 || interleave_low_optab->handlers[(int) mode].insn_code
3694 == CODE_FOR_nothing)
3696 if (vect_print_dump_info (REPORT_DETAILS))
3697 fprintf (vect_dump, "interleave op not supported by target.");
3704 /* Function vect_permute_store_chain.
3706 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3707 a power of 2, generate interleave_high/low stmts to reorder the data
3708 correctly for the stores. Return the final references for stores in
3711 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3712 The input is 4 vectors each containing 8 elements. We assign a number to each
3713 element, the input sequence is:
3715 1st vec: 0 1 2 3 4 5 6 7
3716 2nd vec: 8 9 10 11 12 13 14 15
3717 3rd vec: 16 17 18 19 20 21 22 23
3718 4th vec: 24 25 26 27 28 29 30 31
3720 The output sequence should be:
3722 1st vec: 0 8 16 24 1 9 17 25
3723 2nd vec: 2 10 18 26 3 11 19 27
3724 3rd vec: 4 12 20 28 5 13 21 30
3725 4th vec: 6 14 22 30 7 15 23 31
3727 i.e., we interleave the contents of the four vectors in their order.
3729 We use interleave_high/low instructions to create such output. The input of
3730 each interleave_high/low operation is two vectors:
3733 the even elements of the result vector are obtained left-to-right from the
3734 high/low elements of the first vector. The odd elements of the result are
3735 obtained left-to-right from the high/low elements of the second vector.
3736 The output of interleave_high will be: 0 4 1 5
3737 and of interleave_low: 2 6 3 7
3740 The permutation is done in log LENGTH stages. In each stage interleave_high
3741 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3742 where the first argument is taken from the first half of DR_CHAIN and the
3743 second argument from it's second half.
3746 I1: interleave_high (1st vec, 3rd vec)
3747 I2: interleave_low (1st vec, 3rd vec)
3748 I3: interleave_high (2nd vec, 4th vec)
3749 I4: interleave_low (2nd vec, 4th vec)
3751 The output for the first stage is:
3753 I1: 0 16 1 17 2 18 3 19
3754 I2: 4 20 5 21 6 22 7 23
3755 I3: 8 24 9 25 10 26 11 27
3756 I4: 12 28 13 29 14 30 15 31
3758 The output of the second stage, i.e. the final result is:
3760 I1: 0 8 16 24 1 9 17 25
3761 I2: 2 10 18 26 3 11 19 27
3762 I3: 4 12 20 28 5 13 21 30
3763 I4: 6 14 22 30 7 15 23 31. */
3766 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3767 unsigned int length,
3769 block_stmt_iterator *bsi,
3770 VEC(tree,heap) **result_chain)
3772 tree perm_dest, perm_stmt, vect1, vect2, high, low;
3773 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3774 tree scalar_dest, tmp;
3777 VEC(tree,heap) *first, *second;
3779 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3780 first = VEC_alloc (tree, heap, length/2);
3781 second = VEC_alloc (tree, heap, length/2);
3783 /* Check that the operation is supported. */
3784 if (!vect_strided_store_supported (vectype))
3787 *result_chain = VEC_copy (tree, heap, dr_chain);
3789 for (i = 0; i < exact_log2 (length); i++)
3791 for (j = 0; j < length/2; j++)
3793 vect1 = VEC_index (tree, dr_chain, j);
3794 vect2 = VEC_index (tree, dr_chain, j+length/2);
3796 /* Create interleaving stmt:
3797 in the case of big endian:
3798 high = interleave_high (vect1, vect2)
3799 and in the case of little endian:
3800 high = interleave_low (vect1, vect2). */
3801 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3802 DECL_GIMPLE_REG_P (perm_dest) = 1;
3803 add_referenced_var (perm_dest);
3804 if (BYTES_BIG_ENDIAN)
3805 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3807 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3808 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3809 high = make_ssa_name (perm_dest, perm_stmt);
3810 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
3811 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3812 VEC_replace (tree, *result_chain, 2*j, high);
3814 /* Create interleaving stmt:
3815 in the case of big endian:
3816 low = interleave_low (vect1, vect2)
3817 and in the case of little endian:
3818 low = interleave_high (vect1, vect2). */
3819 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3820 DECL_GIMPLE_REG_P (perm_dest) = 1;
3821 add_referenced_var (perm_dest);
3822 if (BYTES_BIG_ENDIAN)
3823 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3825 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3826 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3827 low = make_ssa_name (perm_dest, perm_stmt);
3828 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
3829 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3830 VEC_replace (tree, *result_chain, 2*j+1, low);
3832 dr_chain = VEC_copy (tree, heap, *result_chain);
3838 /* Function vectorizable_store.
3840 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3842 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3843 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3844 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3847 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3852 tree vec_oprnd = NULL_TREE;
3853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3854 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3855 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3857 enum machine_mode vec_mode;
3859 enum dr_alignment_support alignment_support_cheme;
3861 def_operand_p def_p;
3863 enum vect_def_type dt;
3864 stmt_vec_info prev_stmt_info = NULL;
3865 tree dataref_ptr = NULL_TREE;
3866 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3867 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3869 tree next_stmt, first_stmt;
3870 bool strided_store = false;
3871 unsigned int group_size, i;
3872 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3873 gcc_assert (ncopies >= 1);
3875 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3878 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3881 if (STMT_VINFO_LIVE_P (stmt_info))
3883 if (vect_print_dump_info (REPORT_DETAILS))
3884 fprintf (vect_dump, "value used after loop.");
3888 /* Is vectorizable store? */
3890 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3893 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3894 if (TREE_CODE (scalar_dest) != ARRAY_REF
3895 && TREE_CODE (scalar_dest) != INDIRECT_REF
3896 && !DR_GROUP_FIRST_DR (stmt_info))
3899 op = GIMPLE_STMT_OPERAND (stmt, 1);
3900 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3902 if (vect_print_dump_info (REPORT_DETAILS))
3903 fprintf (vect_dump, "use not simple.");
3907 vec_mode = TYPE_MODE (vectype);
3908 /* FORNOW. In some cases can vectorize even if data-type not supported
3909 (e.g. - array initialization with 0). */
3910 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
3913 if (!STMT_VINFO_DATA_REF (stmt_info))
3916 if (DR_GROUP_FIRST_DR (stmt_info))
3918 strided_store = true;
3919 if (!vect_strided_store_supported (vectype))
3923 if (!vec_stmt) /* transformation not required. */
3925 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3926 vect_model_store_cost (stmt_info, ncopies, dt);
3934 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3935 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3936 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3938 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3940 /* We vectorize all the stmts of the interleaving group when we
3941 reach the last stmt in the group. */
3942 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3943 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
3945 *vec_stmt = NULL_TREE;
3956 if (vect_print_dump_info (REPORT_DETAILS))
3957 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3959 dr_chain = VEC_alloc (tree, heap, group_size);
3960 oprnds = VEC_alloc (tree, heap, group_size);
3962 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3963 gcc_assert (alignment_support_cheme);
3964 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
3966 /* In case the vectorization factor (VF) is bigger than the number
3967 of elements that we can fit in a vectype (nunits), we have to generate
3968 more than one vector stmt - i.e - we need to "unroll" the
3969 vector stmt by a factor VF/nunits. For more details see documentation in
3970 vect_get_vec_def_for_copy_stmt. */
3972 /* In case of interleaving (non-unit strided access):
3979 We create vectorized stores starting from base address (the access of the
3980 first stmt in the chain (S2 in the above example), when the last store stmt
3981 of the chain (S4) is reached:
3984 VS2: &base + vec_size*1 = vx0
3985 VS3: &base + vec_size*2 = vx1
3986 VS4: &base + vec_size*3 = vx3
3988 Then permutation statements are generated:
3990 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3991 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3994 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3995 (the order of the data-refs in the output of vect_permute_store_chain
3996 corresponds to the order of scalar stmts in the interleaving chain - see
3997 the documentation of vect_permute_store_chain()).
3999 In case of both multiple types and interleaving, above vector stores and
4000 permutation stmts are created for every copy. The result vector stmts are
4001 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4002 STMT_VINFO_RELATED_STMT for the next copies.
4005 prev_stmt_info = NULL;
4006 for (j = 0; j < ncopies; j++)
4013 /* For interleaved stores we collect vectorized defs for all the
4014 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
4015 as an input to vect_permute_store_chain(), and OPRNDS as an input
4016 to vect_get_vec_def_for_stmt_copy() for the next copy.
4017 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4018 OPRNDS are of size 1. */
4019 next_stmt = first_stmt;
4020 for (i = 0; i < group_size; i++)
4022 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
4023 is the exact number of stmts in the chain. Therefore, NEXT_STMT
4024 can't be NULL_TREE. In case that there is no interleaving,
4025 GROUP_SIZE is 1, and only one iteration of the loop will be
4027 gcc_assert (next_stmt);
4028 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4029 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
4030 VEC_quick_push(tree, dr_chain, vec_oprnd);
4031 VEC_quick_push(tree, oprnds, vec_oprnd);
4032 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4034 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
4035 &dummy, &ptr_incr, false,
4036 TREE_TYPE (vec_oprnd));
4040 /* For interleaved stores we created vectorized defs for all the
4041 defs stored in OPRNDS in the previous iteration (previous copy).
4042 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4043 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4045 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4046 OPRNDS are of size 1. */
4047 for (i = 0; i < group_size; i++)
4049 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
4050 VEC_index (tree, oprnds, i));
4051 VEC_replace(tree, dr_chain, i, vec_oprnd);
4052 VEC_replace(tree, oprnds, i, vec_oprnd);
4054 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4059 result_chain = VEC_alloc (tree, heap, group_size);
4061 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4066 next_stmt = first_stmt;
4067 for (i = 0; i < group_size; i++)
4069 /* For strided stores vectorized defs are interleaved in
4070 vect_permute_store_chain(). */
4072 vec_oprnd = VEC_index(tree, result_chain, i);
4074 data_ref = build_fold_indirect_ref (dataref_ptr);
4075 /* Arguments are ready. Create the new vector stmt. */
4076 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4077 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4079 /* Set the VDEFs for the vector pointer. If this virtual def
4080 has a use outside the loop and a loop peel is performed
4081 then the def may be renamed by the peel. Mark it for
4082 renaming so the later use will also be renamed. */
4083 copy_virtual_operands (new_stmt, next_stmt);
4086 /* The original store is deleted so the same SSA_NAMEs
4088 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
4090 SSA_NAME_DEF_STMT (def) = new_stmt;
4091 mark_sym_for_renaming (SSA_NAME_VAR (def));
4094 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4098 /* Create new names for all the definitions created by COPY and
4099 add replacement mappings for each new name. */
4100 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
4102 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
4103 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
4106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4109 prev_stmt_info = vinfo_for_stmt (new_stmt);
4110 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4113 /* Bump the vector pointer. */
4114 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4122 /* Function vect_setup_realignment
4124 This function is called when vectorizing an unaligned load using
4125 the dr_unaligned_software_pipeline scheme.
4126 This function generates the following code at the loop prolog:
4129 msq_init = *(floor(p)); # prolog load
4130 realignment_token = call target_builtin;
4132 msq = phi (msq_init, ---)
4134 The code above sets up a new (vector) pointer, pointing to the first
4135 location accessed by STMT, and a "floor-aligned" load using that pointer.
4136 It also generates code to compute the "realignment-token" (if the relevant
4137 target hook was defined), and creates a phi-node at the loop-header bb
4138 whose arguments are the result of the prolog-load (created by this
4139 function) and the result of a load that takes place in the loop (to be
4140 created by the caller to this function).
4141 The caller to this function uses the phi-result (msq) to create the
4142 realignment code inside the loop, and sets up the missing phi argument,
4146 msq = phi (msq_init, lsq)
4147 lsq = *(floor(p')); # load in loop
4148 result = realign_load (msq, lsq, realignment_token);
4151 STMT - (scalar) load stmt to be vectorized. This load accesses
4152 a memory location that may be unaligned.
4153 BSI - place where new code is to be inserted.
4156 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4157 target hook, if defined.
4158 Return value - the result of the loop-header phi node. */
4161 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4162 tree *realignment_token)
4164 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4165 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4166 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4167 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4168 edge pe = loop_preheader_edge (loop);
4169 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4182 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
4183 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4184 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
4186 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
4187 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4188 new_temp = make_ssa_name (vec_dest, new_stmt);
4189 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4190 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4191 gcc_assert (!new_bb);
4192 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
4193 copy_virtual_operands (new_stmt, stmt);
4194 update_vuses_to_preheader (new_stmt, loop);
4196 /* 2. Create permutation mask, if required, in loop preheader. */
4197 if (targetm.vectorize.builtin_mask_for_load)
4201 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4202 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
4203 vec_dest = vect_create_destination_var (scalar_dest,
4204 TREE_TYPE (new_stmt));
4205 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4206 new_temp = make_ssa_name (vec_dest, new_stmt);
4207 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4208 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4209 gcc_assert (!new_bb);
4210 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
4212 /* The result of the CALL_EXPR to this builtin is determined from
4213 the value of the parameter and no global variables are touched
4214 which makes the builtin a "const" function. Requiring the
4215 builtin to have the "const" attribute makes it unnecessary
4216 to call mark_call_clobbered. */
4217 gcc_assert (TREE_READONLY (builtin_decl));
4220 /* 3. Create msq = phi <msq_init, lsq> in loop */
4221 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4222 msq = make_ssa_name (vec_dest, NULL_TREE);
4223 phi_stmt = create_phi_node (msq, loop->header);
4224 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4225 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
4231 /* Function vect_strided_load_supported.
4233 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
4234 and FALSE otherwise. */
4237 vect_strided_load_supported (tree vectype)
4239 optab perm_even_optab, perm_odd_optab;
4242 mode = (int) TYPE_MODE (vectype);
4244 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
4245 if (!perm_even_optab)
4247 if (vect_print_dump_info (REPORT_DETAILS))
4248 fprintf (vect_dump, "no optab for perm_even.");
4252 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4254 if (vect_print_dump_info (REPORT_DETAILS))
4255 fprintf (vect_dump, "perm_even op not supported by target.");
4259 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
4260 if (!perm_odd_optab)
4262 if (vect_print_dump_info (REPORT_DETAILS))
4263 fprintf (vect_dump, "no optab for perm_odd.");
4267 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4269 if (vect_print_dump_info (REPORT_DETAILS))
4270 fprintf (vect_dump, "perm_odd op not supported by target.");
4277 /* Function vect_permute_load_chain.
4279 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4280 a power of 2, generate extract_even/odd stmts to reorder the input data
4281 correctly. Return the final references for loads in RESULT_CHAIN.
4283 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4284 The input is 4 vectors each containing 8 elements. We assign a number to each
4285 element, the input sequence is:
4287 1st vec: 0 1 2 3 4 5 6 7
4288 2nd vec: 8 9 10 11 12 13 14 15
4289 3rd vec: 16 17 18 19 20 21 22 23
4290 4th vec: 24 25 26 27 28 29 30 31
4292 The output sequence should be:
4294 1st vec: 0 4 8 12 16 20 24 28
4295 2nd vec: 1 5 9 13 17 21 25 29
4296 3rd vec: 2 6 10 14 18 22 26 30
4297 4th vec: 3 7 11 15 19 23 27 31
4299 i.e., the first output vector should contain the first elements of each
4300 interleaving group, etc.
4302 We use extract_even/odd instructions to create such output. The input of each
4303 extract_even/odd operation is two vectors
4307 and the output is the vector of extracted even/odd elements. The output of
4308 extract_even will be: 0 2 4 6
4309 and of extract_odd: 1 3 5 7
4312 The permutation is done in log LENGTH stages. In each stage extract_even and
4313 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
4314 order. In our example,
4316 E1: extract_even (1st vec, 2nd vec)
4317 E2: extract_odd (1st vec, 2nd vec)
4318 E3: extract_even (3rd vec, 4th vec)
4319 E4: extract_odd (3rd vec, 4th vec)
4321 The output for the first stage will be:
4323 E1: 0 2 4 6 8 10 12 14
4324 E2: 1 3 5 7 9 11 13 15
4325 E3: 16 18 20 22 24 26 28 30
4326 E4: 17 19 21 23 25 27 29 31
4328 In order to proceed and create the correct sequence for the next stage (or
4329 for the correct output, if the second stage is the last one, as in our
4330 example), we first put the output of extract_even operation and then the
4331 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4332 The input for the second stage is:
4334 1st vec (E1): 0 2 4 6 8 10 12 14
4335 2nd vec (E3): 16 18 20 22 24 26 28 30
4336 3rd vec (E2): 1 3 5 7 9 11 13 15
4337 4th vec (E4): 17 19 21 23 25 27 29 31
4339 The output of the second stage:
4341 E1: 0 4 8 12 16 20 24 28
4342 E2: 2 6 10 14 18 22 26 30
4343 E3: 1 5 9 13 17 21 25 29
4344 E4: 3 7 11 15 19 23 27 31
4346 And RESULT_CHAIN after reordering:
4348 1st vec (E1): 0 4 8 12 16 20 24 28
4349 2nd vec (E3): 1 5 9 13 17 21 25 29
4350 3rd vec (E2): 2 6 10 14 18 22 26 30
4351 4th vec (E4): 3 7 11 15 19 23 27 31. */
4354 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4355 unsigned int length,
4357 block_stmt_iterator *bsi,
4358 VEC(tree,heap) **result_chain)
4360 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
4361 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4366 /* Check that the operation is supported. */
4367 if (!vect_strided_load_supported (vectype))
4370 *result_chain = VEC_copy (tree, heap, dr_chain);
4371 for (i = 0; i < exact_log2 (length); i++)
4373 for (j = 0; j < length; j +=2)
4375 first_vect = VEC_index (tree, dr_chain, j);
4376 second_vect = VEC_index (tree, dr_chain, j+1);
4378 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4379 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4380 DECL_GIMPLE_REG_P (perm_dest) = 1;
4381 add_referenced_var (perm_dest);
4383 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
4384 first_vect, second_vect);
4385 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4387 data_ref = make_ssa_name (perm_dest, perm_stmt);
4388 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4389 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4390 mark_symbols_for_renaming (perm_stmt);
4392 VEC_replace (tree, *result_chain, j/2, data_ref);
4394 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4395 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4396 DECL_GIMPLE_REG_P (perm_dest) = 1;
4397 add_referenced_var (perm_dest);
4399 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
4400 first_vect, second_vect);
4401 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4402 data_ref = make_ssa_name (perm_dest, perm_stmt);
4403 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4404 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4405 mark_symbols_for_renaming (perm_stmt);
4407 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4409 dr_chain = VEC_copy (tree, heap, *result_chain);
4415 /* Function vect_transform_strided_load.
4417 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4418 to perform their permutation and ascribe the result vectorized statements to
4419 the scalar statements.
4423 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
4424 block_stmt_iterator *bsi)
4426 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4427 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4428 tree next_stmt, new_stmt;
4429 VEC(tree,heap) *result_chain = NULL;
4430 unsigned int i, gap_count;
4433 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4434 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4435 vectors, that are ready for vector computation. */
4436 result_chain = VEC_alloc (tree, heap, size);
4438 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
4441 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4442 Since we scan the chain starting from it's first node, their order
4443 corresponds the order of data-refs in RESULT_CHAIN. */
4444 next_stmt = first_stmt;
4446 for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
4451 /* Skip the gaps. Loads created for the gaps will be removed by dead
4452 code elimination pass later.
4453 DR_GROUP_GAP is the number of steps in elements from the previous
4454 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4455 correspond to the gaps.
4457 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4465 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4466 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4467 copies, and we put the new vector statement in the first available
4469 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4470 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4473 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4474 tree rel_stmt = STMT_VINFO_RELATED_STMT (
4475 vinfo_for_stmt (prev_stmt));
4478 prev_stmt = rel_stmt;
4479 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4481 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
4483 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4485 /* If NEXT_STMT accesses the same DR as the previous statement,
4486 put the same TMP_DATA_REF as its vectorized statement; otherwise
4487 get the next data-ref from RESULT_CHAIN. */
4488 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4496 /* vectorizable_load.
4498 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4500 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4501 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4502 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4505 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4508 tree vec_dest = NULL;
4509 tree data_ref = NULL;
4511 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4512 stmt_vec_info prev_stmt_info;
4513 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4514 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4515 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4516 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4519 tree new_stmt = NULL_TREE;
4521 enum dr_alignment_support alignment_support_cheme;
4522 tree dataref_ptr = NULL_TREE;
4524 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4525 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4526 int i, j, group_size;
4527 tree msq = NULL_TREE, lsq;
4528 tree offset = NULL_TREE;
4529 tree realignment_token = NULL_TREE;
4530 tree phi_stmt = NULL_TREE;
4531 VEC(tree,heap) *dr_chain = NULL;
4532 bool strided_load = false;
4535 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4538 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4541 /* FORNOW: not yet supported. */
4542 if (STMT_VINFO_LIVE_P (stmt_info))
4544 if (vect_print_dump_info (REPORT_DETAILS))
4545 fprintf (vect_dump, "value used after loop.");
4549 /* Is vectorizable load? */
4550 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4553 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4554 if (TREE_CODE (scalar_dest) != SSA_NAME)
4557 op = GIMPLE_STMT_OPERAND (stmt, 1);
4558 if (TREE_CODE (op) != ARRAY_REF
4559 && TREE_CODE (op) != INDIRECT_REF
4560 && !DR_GROUP_FIRST_DR (stmt_info))
4563 if (!STMT_VINFO_DATA_REF (stmt_info))
4566 mode = (int) TYPE_MODE (vectype);
4568 /* FORNOW. In some cases can vectorize even if data-type not supported
4569 (e.g. - data copies). */
4570 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4572 if (vect_print_dump_info (REPORT_DETAILS))
4573 fprintf (vect_dump, "Aligned load, but unsupported type.");
4577 /* Check if the load is a part of an interleaving chain. */
4578 if (DR_GROUP_FIRST_DR (stmt_info))
4580 strided_load = true;
4582 /* Check if interleaving is supported. */
4583 if (!vect_strided_load_supported (vectype))
4587 if (!vec_stmt) /* transformation not required. */
4589 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4590 vect_model_load_cost (stmt_info, ncopies);
4594 if (vect_print_dump_info (REPORT_DETAILS))
4595 fprintf (vect_dump, "transform load.");
4601 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4602 /* Check if the chain of loads is already vectorized. */
4603 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4605 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4608 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4609 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4610 dr_chain = VEC_alloc (tree, heap, group_size);
4619 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
4620 gcc_assert (alignment_support_cheme);
4623 /* In case the vectorization factor (VF) is bigger than the number
4624 of elements that we can fit in a vectype (nunits), we have to generate
4625 more than one vector stmt - i.e - we need to "unroll" the
4626 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4627 from one copy of the vector stmt to the next, in the field
4628 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4629 stages to find the correct vector defs to be used when vectorizing
4630 stmts that use the defs of the current stmt. The example below illustrates
4631 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4632 4 vectorized stmts):
4634 before vectorization:
4635 RELATED_STMT VEC_STMT
4639 step 1: vectorize stmt S1:
4640 We first create the vector stmt VS1_0, and, as usual, record a
4641 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4642 Next, we create the vector stmt VS1_1, and record a pointer to
4643 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4644 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4646 RELATED_STMT VEC_STMT
4647 VS1_0: vx0 = memref0 VS1_1 -
4648 VS1_1: vx1 = memref1 VS1_2 -
4649 VS1_2: vx2 = memref2 VS1_3 -
4650 VS1_3: vx3 = memref3 - -
4651 S1: x = load - VS1_0
4654 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4655 information we recorded in RELATED_STMT field is used to vectorize
4658 /* In case of interleaving (non-unit strided access):
4665 Vectorized loads are created in the order of memory accesses
4666 starting from the access of the first stmt of the chain:
4669 VS2: vx1 = &base + vec_size*1
4670 VS3: vx3 = &base + vec_size*2
4671 VS4: vx4 = &base + vec_size*3
4673 Then permutation statements are generated:
4675 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4676 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4679 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4680 (the order of the data-refs in the output of vect_permute_load_chain
4681 corresponds to the order of scalar stmts in the interleaving chain - see
4682 the documentation of vect_permute_load_chain()).
4683 The generation of permutation stmts and recording them in
4684 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4686 In case of both multiple types and interleaving, the vector loads and
4687 permutation stmts above are created for every copy. The result vector stmts
4688 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4689 STMT_VINFO_RELATED_STMT for the next copies. */
4691 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4692 on a target that supports unaligned accesses (dr_unaligned_supported)
4693 we generate the following code:
4697 p = p + indx * vectype_size;
4702 Otherwise, the data reference is potentially unaligned on a target that
4703 does not support unaligned accesses (dr_unaligned_software_pipeline) -
4704 then generate the following code, in which the data in each iteration is
4705 obtained by two vector loads, one from the previous iteration, and one
4706 from the current iteration:
4708 msq_init = *(floor(p1))
4709 p2 = initial_addr + VS - 1;
4710 realignment_token = call target_builtin;
4713 p2 = p2 + indx * vectype_size
4715 vec_dest = realign_load (msq, lsq, realignment_token)
4720 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4722 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
4723 phi_stmt = SSA_NAME_DEF_STMT (msq);
4724 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4727 prev_stmt_info = NULL;
4728 for (j = 0; j < ncopies; j++)
4730 /* 1. Create the vector pointer update chain. */
4732 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
4733 &ptr_incr, false, NULL_TREE);
4735 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4737 for (i = 0; i < group_size; i++)
4739 /* 2. Create the vector-load in the loop. */
4740 switch (alignment_support_cheme)
4743 gcc_assert (aligned_access_p (first_dr));
4744 data_ref = build_fold_indirect_ref (dataref_ptr);
4746 case dr_unaligned_supported:
4748 int mis = DR_MISALIGNMENT (first_dr);
4749 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
4751 gcc_assert (!aligned_access_p (first_dr));
4752 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
4754 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
4757 case dr_unaligned_software_pipeline:
4758 gcc_assert (!aligned_access_p (first_dr));
4759 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
4764 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4765 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4766 new_temp = make_ssa_name (vec_dest, new_stmt);
4767 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4768 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4769 copy_virtual_operands (new_stmt, stmt);
4770 mark_symbols_for_renaming (new_stmt);
4772 /* 3. Handle explicit realignment if necessary/supported. */
4773 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4776 <vec_dest = realign_load (msq, lsq, realignment_token)> */
4777 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
4778 if (!realignment_token)
4779 realignment_token = dataref_ptr;
4780 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4782 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
4783 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4784 new_temp = make_ssa_name (vec_dest, new_stmt);
4785 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4786 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4787 if (i == group_size - 1 && j == ncopies - 1)
4788 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
4792 VEC_quick_push (tree, dr_chain, new_temp);
4793 if (i < group_size - 1)
4794 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4799 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
4801 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4802 dr_chain = VEC_alloc (tree, heap, group_size);
4807 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4809 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4810 prev_stmt_info = vinfo_for_stmt (new_stmt);
4818 /* Function vectorizable_live_operation.
4820 STMT computes a value that is used outside the loop. Check if
4821 it can be supported. */
4824 vectorizable_live_operation (tree stmt,
4825 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
4826 tree *vec_stmt ATTRIBUTE_UNUSED)
4829 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4830 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4835 enum vect_def_type dt;
4837 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4839 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4842 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4845 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4848 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4849 op_type = TREE_OPERAND_LENGTH (operation);
4851 /* FORNOW: support only if all uses are invariant. This means
4852 that the scalar operations can remain in place, unvectorized.
4853 The original last scalar value that they compute will be used. */
4855 for (i = 0; i < op_type; i++)
4857 op = TREE_OPERAND (operation, i);
4858 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4860 if (vect_print_dump_info (REPORT_DETAILS))
4861 fprintf (vect_dump, "use not simple.");
4865 if (dt != vect_invariant_def && dt != vect_constant_def)
4869 /* No transformation is required for the cases we currently support. */
4874 /* Function vect_is_simple_cond.
4877 LOOP - the loop that is being vectorized.
4878 COND - Condition that is checked for simple use.
4880 Returns whether a COND can be vectorized. Checks whether
4881 condition operands are supportable using vec_is_simple_use. */
4884 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4888 enum vect_def_type dt;
4890 if (!COMPARISON_CLASS_P (cond))
4893 lhs = TREE_OPERAND (cond, 0);
4894 rhs = TREE_OPERAND (cond, 1);
4896 if (TREE_CODE (lhs) == SSA_NAME)
4898 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4899 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
4902 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
4905 if (TREE_CODE (rhs) == SSA_NAME)
4907 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4908 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
4911 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
4917 /* vectorizable_condition.
4919 Check if STMT is conditional modify expression that can be vectorized.
4920 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4921 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4924 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4927 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4929 tree scalar_dest = NULL_TREE;
4930 tree vec_dest = NULL_TREE;
4931 tree op = NULL_TREE;
4932 tree cond_expr, then_clause, else_clause;
4933 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4934 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4935 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4936 tree vec_compare, vec_cond_expr;
4938 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4939 enum machine_mode vec_mode;
4941 enum vect_def_type dt;
4942 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4943 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4945 gcc_assert (ncopies >= 1);
4947 return false; /* FORNOW */
4949 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4952 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4955 /* FORNOW: not yet supported. */
4956 if (STMT_VINFO_LIVE_P (stmt_info))
4958 if (vect_print_dump_info (REPORT_DETAILS))
4959 fprintf (vect_dump, "value used after loop.");
4963 /* Is vectorizable conditional operation? */
4964 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4967 op = GIMPLE_STMT_OPERAND (stmt, 1);
4969 if (TREE_CODE (op) != COND_EXPR)
4972 cond_expr = TREE_OPERAND (op, 0);
4973 then_clause = TREE_OPERAND (op, 1);
4974 else_clause = TREE_OPERAND (op, 2);
4976 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4979 /* We do not handle two different vector types for the condition
4981 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
4984 if (TREE_CODE (then_clause) == SSA_NAME)
4986 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4987 if (!vect_is_simple_use (then_clause, loop_vinfo,
4988 &then_def_stmt, &def, &dt))
4991 else if (TREE_CODE (then_clause) != INTEGER_CST
4992 && TREE_CODE (then_clause) != REAL_CST)
4995 if (TREE_CODE (else_clause) == SSA_NAME)
4997 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4998 if (!vect_is_simple_use (else_clause, loop_vinfo,
4999 &else_def_stmt, &def, &dt))
5002 else if (TREE_CODE (else_clause) != INTEGER_CST
5003 && TREE_CODE (else_clause) != REAL_CST)
5007 vec_mode = TYPE_MODE (vectype);
5011 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5012 return expand_vec_cond_expr_p (op, vec_mode);
5018 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
5019 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5021 /* Handle cond expr. */
5023 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
5025 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
5026 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
5027 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
5029 /* Arguments are ready. create the new vector stmt. */
5030 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5031 vec_cond_lhs, vec_cond_rhs);
5032 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5033 vec_compare, vec_then_clause, vec_else_clause);
5035 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
5036 new_temp = make_ssa_name (vec_dest, *vec_stmt);
5037 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
5038 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
5043 /* Function vect_transform_stmt.
5045 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5048 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
5050 bool is_store = false;
5051 tree vec_stmt = NULL_TREE;
5052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5053 tree orig_stmt_in_pattern;
5056 switch (STMT_VINFO_TYPE (stmt_info))
5058 case type_demotion_vec_info_type:
5059 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
5063 case type_promotion_vec_info_type:
5064 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
5068 case type_conversion_vec_info_type:
5069 done = vectorizable_conversion (stmt, bsi, &vec_stmt);
5073 case induc_vec_info_type:
5074 done = vectorizable_induction (stmt, bsi, &vec_stmt);
5078 case op_vec_info_type:
5079 done = vectorizable_operation (stmt, bsi, &vec_stmt);
5083 case assignment_vec_info_type:
5084 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
5088 case load_vec_info_type:
5089 done = vectorizable_load (stmt, bsi, &vec_stmt);
5093 case store_vec_info_type:
5094 done = vectorizable_store (stmt, bsi, &vec_stmt);
5096 if (DR_GROUP_FIRST_DR (stmt_info))
5098 /* In case of interleaving, the whole chain is vectorized when the
5099 last store in the chain is reached. Store stmts before the last
5100 one are skipped, and there vec_stmt_info shouldn't be freed
5102 *strided_store = true;
5103 if (STMT_VINFO_VEC_STMT (stmt_info))
5110 case condition_vec_info_type:
5111 done = vectorizable_condition (stmt, bsi, &vec_stmt);
5115 case call_vec_info_type:
5116 done = vectorizable_call (stmt, bsi, &vec_stmt);
5119 case reduc_vec_info_type:
5120 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
5125 if (!STMT_VINFO_LIVE_P (stmt_info))
5127 if (vect_print_dump_info (REPORT_DETAILS))
5128 fprintf (vect_dump, "stmt not supported.");
5133 if (STMT_VINFO_LIVE_P (stmt_info)
5134 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5136 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
5142 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5143 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
5144 if (orig_stmt_in_pattern)
5146 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
5147 /* STMT was inserted by the vectorizer to replace a computation idiom.
5148 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
5149 computed this idiom. We need to record a pointer to VEC_STMT in
5150 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
5151 documentation of vect_pattern_recog. */
5152 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
5154 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
5155 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
5164 /* This function builds ni_name = number of iterations loop executes
5165 on the loop preheader. */
5168 vect_build_loop_niters (loop_vec_info loop_vinfo)
5170 tree ni_name, stmt, var;
5172 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5173 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5175 var = create_tmp_var (TREE_TYPE (ni), "niters");
5176 add_referenced_var (var);
5177 ni_name = force_gimple_operand (ni, &stmt, false, var);
5179 pe = loop_preheader_edge (loop);
5182 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5183 gcc_assert (!new_bb);
5190 /* This function generates the following statements:
5192 ni_name = number of iterations loop executes
5193 ratio = ni_name / vf
5194 ratio_mult_vf_name = ratio * vf
5196 and places them at the loop preheader edge. */
5199 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5201 tree *ratio_mult_vf_name_ptr,
5202 tree *ratio_name_ptr)
5210 tree ratio_mult_vf_name;
5211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5212 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
5213 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5216 pe = loop_preheader_edge (loop);
5218 /* Generate temporary variable that contains
5219 number of iterations loop executes. */
5221 ni_name = vect_build_loop_niters (loop_vinfo);
5222 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
5224 /* Create: ratio = ni >> log2(vf) */
5226 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
5227 if (!is_gimple_val (ratio_name))
5229 var = create_tmp_var (TREE_TYPE (ni), "bnd");
5230 add_referenced_var (var);
5232 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
5233 pe = loop_preheader_edge (loop);
5234 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5235 gcc_assert (!new_bb);
5238 /* Create: ratio_mult_vf = ratio << log2 (vf). */
5240 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5241 ratio_name, log_vf);
5242 if (!is_gimple_val (ratio_mult_vf_name))
5244 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
5245 add_referenced_var (var);
5247 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
5249 pe = loop_preheader_edge (loop);
5250 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5251 gcc_assert (!new_bb);
5254 *ni_name_ptr = ni_name;
5255 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5256 *ratio_name_ptr = ratio_name;
5262 /* Function update_vuses_to_preheader.
5265 STMT - a statement with potential VUSEs.
5266 LOOP - the loop whose preheader will contain STMT.
5268 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
5269 appears to be defined in a VDEF in another statement in a loop.
5270 One such case is when the VUSE is at the dereference of a __restricted__
5271 pointer in a load and the VDEF is at the dereference of a different
5272 __restricted__ pointer in a store. Vectorization may result in
5273 copy_virtual_uses being called to copy the problematic VUSE to a new
5274 statement that is being inserted in the loop preheader. This procedure
5275 is called to change the SSA_NAME in the new statement's VUSE from the
5276 SSA_NAME updated in the loop to the related SSA_NAME available on the
5277 path entering the loop.
5279 When this function is called, we have the following situation:
5284 # name1 = phi < name0 , name2>
5289 # name2 = vdef <name1>
5294 Stmt S1 was created in the loop preheader block as part of misaligned-load
5295 handling. This function fixes the name of the vuse of S1 from 'name1' to
5299 update_vuses_to_preheader (tree stmt, struct loop *loop)
5301 basic_block header_bb = loop->header;
5302 edge preheader_e = loop_preheader_edge (loop);
5304 use_operand_p use_p;
5306 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
5308 tree ssa_name = USE_FROM_PTR (use_p);
5309 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
5310 tree name_var = SSA_NAME_VAR (ssa_name);
5311 basic_block bb = bb_for_stmt (def_stmt);
5313 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
5314 if (!IS_EMPTY_STMT (def_stmt)
5315 && flow_bb_inside_loop_p (loop, bb))
5317 /* If the block containing the statement defining the SSA_NAME
5318 is in the loop then it's necessary to find the definition
5319 outside the loop using the PHI nodes of the header. */
5321 bool updated = false;
5323 for (phi = phi_nodes (header_bb); phi; phi = PHI_CHAIN (phi))
5325 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
5327 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
5332 gcc_assert (updated);
5338 /* Function vect_update_ivs_after_vectorizer.
5340 "Advance" the induction variables of LOOP to the value they should take
5341 after the execution of LOOP. This is currently necessary because the
5342 vectorizer does not handle induction variables that are used after the
5343 loop. Such a situation occurs when the last iterations of LOOP are
5345 1. We introduced new uses after LOOP for IVs that were not originally used
5346 after LOOP: the IVs of LOOP are now used by an epilog loop.
5347 2. LOOP is going to be vectorized; this means that it will iterate N/VF
5348 times, whereas the loop IVs should be bumped N times.
5351 - LOOP - a loop that is going to be vectorized. The last few iterations
5352 of LOOP were peeled.
5353 - NITERS - the number of iterations that LOOP executes (before it is
5354 vectorized). i.e, the number of times the ivs should be bumped.
5355 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
5356 coming out from LOOP on which there are uses of the LOOP ivs
5357 (this is the path from LOOP->exit to epilog_loop->preheader).
5359 The new definitions of the ivs are placed in LOOP->exit.
5360 The phi args associated with the edge UPDATE_E in the bb
5361 UPDATE_E->dest are updated accordingly.
5363 Assumption 1: Like the rest of the vectorizer, this function assumes
5364 a single loop exit that has a single predecessor.
5366 Assumption 2: The phi nodes in the LOOP header and in update_bb are
5367 organized in the same order.
5369 Assumption 3: The access function of the ivs is simple enough (see
5370 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
5372 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
5373 coming out of LOOP on which the ivs of LOOP are used (this is the path
5374 that leads to the epilog loop; other paths skip the epilog loop). This
5375 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
5376 needs to have its phis updated.
5380 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
5383 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5384 basic_block exit_bb = single_exit (loop)->dest;
5386 basic_block update_bb = update_e->dest;
5388 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
5390 /* Make sure there exists a single-predecessor exit bb: */
5391 gcc_assert (single_pred_p (exit_bb));
5393 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
5395 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
5397 tree access_fn = NULL;
5398 tree evolution_part;
5401 tree var, ni, ni_name;
5402 block_stmt_iterator last_bsi;
5404 if (vect_print_dump_info (REPORT_DETAILS))
5406 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
5407 print_generic_expr (vect_dump, phi, TDF_SLIM);
5410 /* Skip virtual phi's. */
5411 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5413 if (vect_print_dump_info (REPORT_DETAILS))
5414 fprintf (vect_dump, "virtual phi. skip.");
5418 /* Skip reduction phis. */
5419 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
5421 if (vect_print_dump_info (REPORT_DETAILS))
5422 fprintf (vect_dump, "reduc phi. skip.");
5426 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
5427 gcc_assert (access_fn);
5429 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
5430 gcc_assert (evolution_part != NULL_TREE);
5432 /* FORNOW: We do not support IVs whose evolution function is a polynomial
5433 of degree >= 2 or exponential. */
5434 gcc_assert (!tree_is_chrec (evolution_part));
5436 step_expr = evolution_part;
5437 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
5440 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
5441 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
5443 fold_convert (sizetype,
5444 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
5445 niters, step_expr)));
5447 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
5448 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
5449 fold_convert (TREE_TYPE (init_expr),
5456 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
5457 add_referenced_var (var);
5459 last_bsi = bsi_last (exit_bb);
5460 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
5461 true, BSI_SAME_STMT);
5463 /* Fix phi expressions in the successor bb. */
5464 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
5469 /* Function vect_do_peeling_for_loop_bound
5471 Peel the last iterations of the loop represented by LOOP_VINFO.
5472 The peeled iterations form a new epilog loop. Given that the loop now
5473 iterates NITERS times, the new epilog loop iterates
5474 NITERS % VECTORIZATION_FACTOR times.
5476 The original loop will later be made to iterate
5477 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
5480 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
5482 tree ni_name, ratio_mult_vf_name;
5483 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5484 struct loop *new_loop;
5486 basic_block preheader;
5489 int min_scalar_loop_bound;
5490 int min_profitable_iters;
5492 if (vect_print_dump_info (REPORT_DETAILS))
5493 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
5495 initialize_original_copy_tables ();
5497 /* Generate the following variables on the preheader of original loop:
5499 ni_name = number of iteration the original loop executes
5500 ratio = ni_name / vf
5501 ratio_mult_vf_name = ratio * vf */
5502 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
5503 &ratio_mult_vf_name, ratio);
5505 loop_num = loop->num;
5507 /* Analyze cost to set threshhold for vectorized loop. */
5508 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
5509 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
5510 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5512 /* Use the cost model only if it is more conservative than user specified
5515 th = (unsigned) min_scalar_loop_bound;
5516 if (min_profitable_iters
5517 && (!min_scalar_loop_bound
5518 || min_profitable_iters > min_scalar_loop_bound))
5519 th = (unsigned) min_profitable_iters;
5521 if (min_profitable_iters
5522 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5523 && vect_print_dump_info (REPORT_DETAILS))
5524 fprintf (vect_dump, "vectorization may not be profitable.");
5526 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
5527 ratio_mult_vf_name, ni_name, false,
5529 gcc_assert (new_loop);
5530 gcc_assert (loop_num == loop->num);
5531 #ifdef ENABLE_CHECKING
5532 slpeel_verify_cfg_after_peeling (loop, new_loop);
5535 /* A guard that controls whether the new_loop is to be executed or skipped
5536 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
5537 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
5538 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
5539 is on the path where the LOOP IVs are used and need to be updated. */
5541 preheader = loop_preheader_edge (new_loop)->src;
5542 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
5543 update_e = EDGE_PRED (preheader, 0);
5545 update_e = EDGE_PRED (preheader, 1);
5547 /* Update IVs of original loop as if they were advanced
5548 by ratio_mult_vf_name steps. */
5549 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
5551 /* After peeling we have to reset scalar evolution analyzer. */
5554 free_original_copy_tables ();
5558 /* Function vect_gen_niters_for_prolog_loop
5560 Set the number of iterations for the loop represented by LOOP_VINFO
5561 to the minimum between LOOP_NITERS (the original iteration count of the loop)
5562 and the misalignment of DR - the data reference recorded in
5563 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
5564 this loop, the data reference DR will refer to an aligned location.
5566 The following computation is generated:
5568 If the misalignment of DR is known at compile time:
5569 addr_mis = int mis = DR_MISALIGNMENT (dr);
5570 Else, compute address misalignment in bytes:
5571 addr_mis = addr & (vectype_size - 1)
5573 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
5575 (elem_size = element type size; an element is the scalar element
5576 whose type is the inner type of the vectype)
5580 prolog_niters = min ( LOOP_NITERS ,
5581 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
5582 where group_size is the size of the interleaved group.
5584 The above formulas assume that VF == number of elements in the vector. This
5585 may not hold when there are multiple-types in the loop.
5586 In this case, for some data-references in the loop the VF does not represent
5587 the number of elements that fit in the vector. Therefore, instead of VF we
5588 use TYPE_VECTOR_SUBPARTS. */
5591 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
5593 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
5594 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5596 tree iters, iters_name;
5599 tree dr_stmt = DR_STMT (dr);
5600 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
5601 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5602 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
5603 tree niters_type = TREE_TYPE (loop_niters);
5605 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
5606 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
5608 if (DR_GROUP_FIRST_DR (stmt_info))
5610 /* For interleaved access element size must be multiplied by the size of
5611 the interleaved group. */
5612 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
5613 DR_GROUP_FIRST_DR (stmt_info)));
5614 element_size *= group_size;
5617 pe = loop_preheader_edge (loop);
5619 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
5621 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
5622 int elem_misalign = byte_misalign / element_size;
5624 if (vect_print_dump_info (REPORT_DETAILS))
5625 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
5626 iters = build_int_cst (niters_type,
5627 (nelements - elem_misalign)&(nelements/group_size-1));
5631 tree new_stmts = NULL_TREE;
5633 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
5634 tree ptr_type = TREE_TYPE (start_addr);
5635 tree size = TYPE_SIZE (ptr_type);
5636 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
5637 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
5638 tree elem_size_log =
5639 build_int_cst (type, exact_log2 (vectype_align/nelements));
5640 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
5641 tree nelements_tree = build_int_cst (type, nelements);
5645 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
5646 gcc_assert (!new_bb);
5648 /* Create: byte_misalign = addr & (vectype_size - 1) */
5650 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
5652 /* Create: elem_misalign = byte_misalign / element_size */
5654 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
5656 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
5657 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
5658 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
5659 iters = fold_convert (niters_type, iters);
5662 /* Create: prolog_loop_niters = min (iters, loop_niters) */
5663 /* If the loop bound is known at compile time we already verified that it is
5664 greater than vf; since the misalignment ('iters') is at most vf, there's
5665 no need to generate the MIN_EXPR in this case. */
5666 if (TREE_CODE (loop_niters) != INTEGER_CST)
5667 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
5669 if (vect_print_dump_info (REPORT_DETAILS))
5671 fprintf (vect_dump, "niters for prolog loop: ");
5672 print_generic_expr (vect_dump, iters, TDF_SLIM);
5675 var = create_tmp_var (niters_type, "prolog_loop_niters");
5676 add_referenced_var (var);
5677 iters_name = force_gimple_operand (iters, &stmt, false, var);
5679 /* Insert stmt on loop preheader edge. */
5682 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5683 gcc_assert (!new_bb);
5690 /* Function vect_update_init_of_dr
5692 NITERS iterations were peeled from LOOP. DR represents a data reference
5693 in LOOP. This function updates the information recorded in DR to
5694 account for the fact that the first NITERS iterations had already been
5695 executed. Specifically, it updates the OFFSET field of DR. */
5698 vect_update_init_of_dr (struct data_reference *dr, tree niters)
5700 tree offset = DR_OFFSET (dr);
5702 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
5703 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
5704 DR_OFFSET (dr) = offset;
5708 /* Function vect_update_inits_of_drs
5710 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
5711 This function updates the information recorded for the data references in
5712 the loop to account for the fact that the first NITERS iterations had
5713 already been executed. Specifically, it updates the initial_condition of
5714 the access_function of all the data_references in the loop. */
5717 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
5720 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
5721 struct data_reference *dr;
5723 if (vect_print_dump_info (REPORT_DETAILS))
5724 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
5726 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
5727 vect_update_init_of_dr (dr, niters);
5731 /* Function vect_do_peeling_for_alignment
5733 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
5734 'niters' is set to the misalignment of one of the data references in the
5735 loop, thereby forcing it to refer to an aligned location at the beginning
5736 of the execution of this loop. The data reference for which we are
5737 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
5740 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
5742 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5743 tree niters_of_prolog_loop, ni_name;
5745 struct loop *new_loop;
5747 if (vect_print_dump_info (REPORT_DETAILS))
5748 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
5750 initialize_original_copy_tables ();
5752 ni_name = vect_build_loop_niters (loop_vinfo);
5753 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
5755 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
5757 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
5758 niters_of_prolog_loop, ni_name, true, 0);
5759 gcc_assert (new_loop);
5760 #ifdef ENABLE_CHECKING
5761 slpeel_verify_cfg_after_peeling (new_loop, loop);
5764 /* Update number of times loop executes. */
5765 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
5766 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
5767 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
5769 /* Update the init conditions of the access functions of all data refs. */
5770 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
5772 /* After peeling we have to reset scalar evolution analyzer. */
5775 free_original_copy_tables ();
5779 /* Function vect_create_cond_for_align_checks.
5781 Create a conditional expression that represents the alignment checks for
5782 all of data references (array element references) whose alignment must be
5786 LOOP_VINFO - two fields of the loop information are used.
5787 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
5788 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
5791 COND_EXPR_STMT_LIST - statements needed to construct the conditional
5793 The returned value is the conditional expression to be used in the if
5794 statement that controls which version of the loop gets executed at runtime.
5796 The algorithm makes two assumptions:
5797 1) The number of bytes "n" in a vector is a power of 2.
5798 2) An address "a" is aligned if a%n is zero and that this
5799 test can be done as a&(n-1) == 0. For example, for 16
5800 byte vectors the test is a&0xf == 0. */
5803 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
5804 tree *cond_expr_stmt_list)
5806 VEC(tree,heap) *may_misalign_stmts
5807 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
5809 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
5813 tree int_ptrsize_type;
5815 tree or_tmp_name = NULL_TREE;
5816 tree and_tmp, and_tmp_name, and_stmt;
5819 /* Check that mask is one less than a power of 2, i.e., mask is
5820 all zeros followed by all ones. */
5821 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
5823 /* CHECKME: what is the best integer or unsigned type to use to hold a
5824 cast from a pointer value? */
5825 psize = TYPE_SIZE (ptr_type_node);
5827 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
5829 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
5830 of the first vector of the i'th data reference. */
5832 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
5834 tree new_stmt_list = NULL_TREE;
5836 tree addr_tmp, addr_tmp_name, addr_stmt;
5837 tree or_tmp, new_or_tmp_name, or_stmt;
5839 /* create: addr_tmp = (int)(address_of_first_vector) */
5840 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
5844 if (new_stmt_list != NULL_TREE)
5845 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
5847 sprintf (tmp_name, "%s%d", "addr2int", i);
5848 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5849 add_referenced_var (addr_tmp);
5850 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
5851 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
5852 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
5853 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
5854 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
5856 /* The addresses are OR together. */
5858 if (or_tmp_name != NULL_TREE)
5860 /* create: or_tmp = or_tmp | addr_tmp */
5861 sprintf (tmp_name, "%s%d", "orptrs", i);
5862 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5863 add_referenced_var (or_tmp);
5864 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
5865 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
5866 or_tmp_name, addr_tmp_name);
5867 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
5868 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
5869 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
5870 or_tmp_name = new_or_tmp_name;
5873 or_tmp_name = addr_tmp_name;
5877 mask_cst = build_int_cst (int_ptrsize_type, mask);
5879 /* create: and_tmp = or_tmp & mask */
5880 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
5881 add_referenced_var (and_tmp);
5882 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
5884 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
5885 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
5886 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
5887 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
5889 /* Make and_tmp the left operand of the conditional test against zero.
5890 if and_tmp has a nonzero bit then some address is unaligned. */
5891 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
5892 return build2 (EQ_EXPR, boolean_type_node,
5893 and_tmp_name, ptrsize_zero);
5897 /* Function vect_transform_loop.
5899 The analysis phase has determined that the loop is vectorizable.
5900 Vectorize the loop - created vectorized stmts to replace the scalar
5901 stmts in the loop, and update the loop exit condition. */
5904 vect_transform_loop (loop_vec_info loop_vinfo)
5906 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5907 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5908 int nbbs = loop->num_nodes;
5909 block_stmt_iterator si, next_si;
5912 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5915 if (vect_print_dump_info (REPORT_DETAILS))
5916 fprintf (vect_dump, "=== vec_transform_loop ===");
5918 /* If the loop has data references that may or may not be aligned then
5919 two versions of the loop need to be generated, one which is vectorized
5920 and one which isn't. A test is then generated to control which of the
5921 loops is executed. The test checks for the alignment of all of the
5922 data references that may or may not be aligned. */
5924 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
5928 tree cond_expr_stmt_list = NULL_TREE;
5929 basic_block condition_bb;
5930 block_stmt_iterator cond_exp_bsi;
5931 basic_block merge_bb;
5932 basic_block new_exit_bb;
5934 tree orig_phi, new_phi, arg;
5935 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
5937 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
5938 &cond_expr_stmt_list);
5939 initialize_original_copy_tables ();
5940 nloop = loop_version (loop, cond_expr, &condition_bb,
5941 prob, prob, REG_BR_PROB_BASE - prob, true);
5942 free_original_copy_tables();
5944 /** Loop versioning violates an assumption we try to maintain during
5945 vectorization - that the loop exit block has a single predecessor.
5946 After versioning, the exit block of both loop versions is the same
5947 basic block (i.e. it has two predecessors). Just in order to simplify
5948 following transformations in the vectorizer, we fix this situation
5949 here by adding a new (empty) block on the exit-edge of the loop,
5950 with the proper loop-exit phis to maintain loop-closed-form. **/
5952 merge_bb = single_exit (loop)->dest;
5953 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
5954 new_exit_bb = split_edge (single_exit (loop));
5955 new_exit_e = single_exit (loop);
5956 e = EDGE_SUCC (new_exit_bb, 0);
5958 for (orig_phi = phi_nodes (merge_bb); orig_phi;
5959 orig_phi = PHI_CHAIN (orig_phi))
5961 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
5963 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
5964 add_phi_arg (new_phi, arg, new_exit_e);
5965 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
5968 /** end loop-exit-fixes after versioning **/
5970 update_ssa (TODO_update_ssa);
5971 cond_exp_bsi = bsi_last (condition_bb);
5972 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
5975 /* CHECKME: we wouldn't need this if we called update_ssa once
5977 bitmap_zero (vect_memsyms_to_rename);
5979 /* Peel the loop if there are data refs with unknown alignment.
5980 Only one data ref with unknown store is allowed. */
5982 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5983 vect_do_peeling_for_alignment (loop_vinfo);
5985 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5986 compile time constant), or it is a constant that doesn't divide by the
5987 vectorization factor, then an epilog loop needs to be created.
5988 We therefore duplicate the loop: the original loop will be vectorized,
5989 and will compute the first (n/VF) iterations. The second copy of the loop
5990 will remain scalar and will compute the remaining (n%VF) iterations.
5991 (VF is the vectorization factor). */
5993 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5994 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5995 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
5996 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
5998 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5999 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6001 /* 1) Make sure the loop header has exactly two entries
6002 2) Make sure we have a preheader basic block. */
6004 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6006 split_edge (loop_preheader_edge (loop));
6008 /* FORNOW: the vectorizer supports only loops which body consist
6009 of one basic block (header + empty latch). When the vectorizer will
6010 support more involved loop forms, the order by which the BBs are
6011 traversed need to be reconsidered. */
6013 for (i = 0; i < nbbs; i++)
6015 basic_block bb = bbs[i];
6016 stmt_vec_info stmt_info;
6019 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
6021 if (vect_print_dump_info (REPORT_DETAILS))
6023 fprintf (vect_dump, "------>vectorizing phi: ");
6024 print_generic_expr (vect_dump, phi, TDF_SLIM);
6026 stmt_info = vinfo_for_stmt (phi);
6029 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6030 && !STMT_VINFO_LIVE_P (stmt_info))
6033 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6034 != (unsigned HOST_WIDE_INT) vectorization_factor)
6035 && vect_print_dump_info (REPORT_DETAILS))
6036 fprintf (vect_dump, "multiple-types.");
6038 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6040 if (vect_print_dump_info (REPORT_DETAILS))
6041 fprintf (vect_dump, "transform phi.");
6042 vect_transform_stmt (phi, NULL, NULL);
6046 for (si = bsi_start (bb); !bsi_end_p (si);)
6048 tree stmt = bsi_stmt (si);
6051 if (vect_print_dump_info (REPORT_DETAILS))
6053 fprintf (vect_dump, "------>vectorizing statement: ");
6054 print_generic_expr (vect_dump, stmt, TDF_SLIM);
6056 stmt_info = vinfo_for_stmt (stmt);
6057 gcc_assert (stmt_info);
6058 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6059 && !STMT_VINFO_LIVE_P (stmt_info))
6065 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
6066 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6067 != (unsigned HOST_WIDE_INT) vectorization_factor)
6068 && vect_print_dump_info (REPORT_DETAILS))
6069 fprintf (vect_dump, "multiple-types.");
6071 /* -------- vectorize statement ------------ */
6072 if (vect_print_dump_info (REPORT_DETAILS))
6073 fprintf (vect_dump, "transform statement.");
6075 strided_store = false;
6076 is_store = vect_transform_stmt (stmt, &si, &strided_store);
6080 if (DR_GROUP_FIRST_DR (stmt_info))
6082 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6083 interleaving chain was completed - free all the stores in
6085 tree next = DR_GROUP_FIRST_DR (stmt_info);
6087 stmt_vec_info next_stmt_info;
6091 next_si = bsi_for_stmt (next);
6092 next_stmt_info = vinfo_for_stmt (next);
6093 /* Free the attached stmt_vec_info and remove the stmt. */
6094 ann = stmt_ann (next);
6095 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
6096 free (next_stmt_info);
6097 set_stmt_info (ann, NULL);
6098 bsi_remove (&next_si, true);
6101 bsi_remove (&si, true);
6106 /* Free the attached stmt_vec_info and remove the stmt. */
6107 ann = stmt_ann (stmt);
6109 set_stmt_info (ann, NULL);
6110 bsi_remove (&si, true);
6118 slpeel_make_loop_iterate_ntimes (loop, ratio);
6120 mark_set_for_renaming (vect_memsyms_to_rename);
6122 /* The memory tags and pointers in vectorized statements need to
6123 have their SSA forms updated. FIXME, why can't this be delayed
6124 until all the loops have been transformed? */
6125 update_ssa (TODO_update_ssa);
6127 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
6128 fprintf (vect_dump, "LOOP VECTORIZED.");