1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
53 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
63 static tree get_initial_def_for_reduction (tree, tree, tree *);
65 /* Utility function dealing with loop peeling (not peeling itself). */
66 static void vect_generate_tmps_on_preheader
67 (loop_vec_info, tree *, tree *, tree *);
68 static tree vect_build_loop_niters (loop_vec_info);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71 static void vect_update_init_of_dr (struct data_reference *, tree niters);
72 static void vect_update_inits_of_drs (loop_vec_info, tree);
73 static int vect_min_worthwhile_factor (enum tree_code);
77 cost_for_stmt (tree stmt)
79 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
81 switch (STMT_VINFO_TYPE (stmt_info))
83 case load_vec_info_type:
84 return TARG_SCALAR_LOAD_COST;
85 case store_vec_info_type:
86 return TARG_SCALAR_STORE_COST;
87 case op_vec_info_type:
88 case condition_vec_info_type:
89 case assignment_vec_info_type:
90 case reduc_vec_info_type:
91 case induc_vec_info_type:
92 case type_promotion_vec_info_type:
93 case type_demotion_vec_info_type:
94 case type_conversion_vec_info_type:
95 case call_vec_info_type:
96 return TARG_SCALAR_STMT_COST;
97 case undef_vec_info_type:
104 /* Function vect_estimate_min_profitable_iters
106 Return the number of iterations required for the vector version of the
107 loop to be profitable relative to the cost of the scalar version of the
110 TODO: Take profile info into account before making vectorization
111 decisions, if available. */
114 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
117 int min_profitable_iters;
118 int peel_iters_prologue;
119 int peel_iters_epilogue;
120 int vec_inside_cost = 0;
121 int vec_outside_cost = 0;
122 int scalar_single_iter_cost = 0;
123 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
124 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
125 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
126 int nbbs = loop->num_nodes;
129 /* Cost model disabled. */
130 if (!flag_vect_cost_model)
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "cost model disabled.");
137 /* Requires loop versioning tests to handle misalignment.
138 FIXME: Make cost depend on number of stmts in may_misalign list. */
140 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
142 vec_outside_cost += TARG_COND_BRANCH_COST;
143 if (vect_print_dump_info (REPORT_DETAILS))
144 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
148 /* Count statements in scalar loop. Using this as scalar cost for a single
151 TODO: Add outer loop support.
153 TODO: Consider assigning different costs to different scalar
156 for (i = 0; i < nbbs; i++)
158 block_stmt_iterator si;
159 basic_block bb = bbs[i];
161 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
163 tree stmt = bsi_stmt (si);
164 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
165 if (!STMT_VINFO_RELEVANT_P (stmt_info)
166 && !STMT_VINFO_LIVE_P (stmt_info))
168 scalar_single_iter_cost += cost_for_stmt (stmt);
169 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
170 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
174 /* Add additional cost for the peeled instructions in prologue and epilogue
177 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
178 at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
180 TODO: Build an expression that represents peel_iters for prologue and
181 epilogue to be used in a run-time test. */
183 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
185 if (byte_misalign < 0)
187 peel_iters_prologue = (vf - 1)/2;
188 if (vect_print_dump_info (REPORT_DETAILS))
189 fprintf (vect_dump, "cost model: "
190 "prologue peel iters set to (vf-1)/2.");
192 /* If peeling for alignment is unknown, loop bound of main loop becomes
194 peel_iters_epilogue = (vf - 1)/2;
195 if (vect_print_dump_info (REPORT_DETAILS))
196 fprintf (vect_dump, "cost model: "
197 "epilogue peel iters set to (vf-1)/2 because "
198 "peeling for alignment is unknown .");
204 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
205 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
206 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
207 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
209 peel_iters_prologue = nelements - (byte_misalign / element_size);
212 peel_iters_prologue = 0;
214 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
216 peel_iters_epilogue = (vf - 1)/2;
217 if (vect_print_dump_info (REPORT_DETAILS))
218 fprintf (vect_dump, "cost model: "
219 "epilogue peel iters set to (vf-1)/2 because "
220 "loop iterations are unknown .");
224 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
225 peel_iters_prologue = niters < peel_iters_prologue ?
226 niters : peel_iters_prologue;
227 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
231 /* Requires a prologue loop when peeling to handle misalignment. Add cost of
232 two guards, one for the peeled loop and one for the vector loop. */
234 if (peel_iters_prologue)
236 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
237 if (vect_print_dump_info (REPORT_DETAILS))
238 fprintf (vect_dump, "cost model: Adding cost of checks for "
242 /* Requires an epilogue loop to finish up remaining iterations after vector
243 loop. Add cost of two guards, one for the peeled loop and one for the
246 if (peel_iters_epilogue
247 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
248 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
250 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
251 if (vect_print_dump_info (REPORT_DETAILS))
252 fprintf (vect_dump, "cost model : Adding cost of checks for "
256 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
257 + (peel_iters_epilogue * scalar_single_iter_cost);
259 /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
260 information we provide for the target is whether testing against the
261 threshold involves a runtime test. */
262 if (targetm.vectorize.builtin_vectorization_cost)
264 bool runtime_test = false;
266 /* If the number of iterations is unknown, or the
267 peeling-for-misalignment amount is unknown, we eill have to generate
268 a runtime test to test the loop count against the threshold. */
269 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
270 || (byte_misalign < 0))
273 targetm.vectorize.builtin_vectorization_cost (runtime_test);
274 if (vect_print_dump_info (REPORT_DETAILS))
275 fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
276 targetm.vectorize.builtin_vectorization_cost (runtime_test));
279 /* Calculate number of iterations required to make the vector version
280 profitable, relative to the loop bodies only. The following condition
281 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
282 SIC = scalar iteration cost, VIC = vector iteration cost,
283 VOC = vector outside cost and VF = vectorization factor. */
285 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
287 if (vec_outside_cost == 0)
288 min_profitable_iters = 1;
291 min_profitable_iters = (vec_outside_cost * vf)
292 / ((scalar_single_iter_cost * vf)
295 if ((scalar_single_iter_cost * vf * min_profitable_iters)
296 <= ((vec_inside_cost * min_profitable_iters)
297 + (vec_outside_cost * vf)))
298 min_profitable_iters++;
301 /* vector version will never be profitable. */
304 if (vect_print_dump_info (REPORT_DETAILS))
305 fprintf (vect_dump, "cost model: vector iteration cost = %d "
306 "is divisible by scalar iteration cost = %d by a factor "
307 "greater than or equal to the vectorization factor = %d .",
308 vec_inside_cost, scalar_single_iter_cost, vf);
312 if (vect_print_dump_info (REPORT_DETAILS))
314 fprintf (vect_dump, "Cost model analysis: \n");
315 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
317 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
319 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
320 fprintf (vect_dump, " prologue iterations: %d\n",
321 peel_iters_prologue);
322 fprintf (vect_dump, " epilogue iterations: %d\n",
323 peel_iters_epilogue);
324 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
325 min_profitable_iters);
326 fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
327 min_profitable_iters < vf ? vf : min_profitable_iters);
330 min_profitable_iters =
331 min_profitable_iters < vf ? vf : min_profitable_iters;
333 /* Because the condition we create is:
334 if (niters <= min_profitable_iters)
335 then skip the vectorized loop. */
336 min_profitable_iters--;
337 return min_profitable_iters;
341 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
342 functions. Design better to avoid maintenance issues. */
344 /* Function vect_model_reduction_cost.
346 Models cost for a reduction operation, including the vector ops
347 generated within the strip-mine loop, the initial definition before
348 the loop, and the epilogue code that must be generated. */
351 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
360 enum machine_mode mode;
361 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
362 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
364 /* Cost of reduction op inside loop. */
365 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
367 reduction_op = TREE_OPERAND (operation, op_type-1);
368 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
369 mode = TYPE_MODE (vectype);
370 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
373 orig_stmt = STMT_VINFO_STMT (stmt_info);
375 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
377 /* Add in cost for initial definition. */
378 outer_cost += TARG_SCALAR_TO_VEC_COST;
380 /* Determine cost of epilogue code.
382 We have a reduction operator that will reduce the vector in one statement.
383 Also requires scalar extract. */
385 if (reduc_code < NUM_TREE_CODES)
386 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
389 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
391 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
392 int element_bitsize = tree_low_cst (bitsize, 1);
393 int nelements = vec_size_in_bits / element_bitsize;
395 optab = optab_for_tree_code (code, vectype);
397 /* We have a whole vector shift available. */
398 if (VECTOR_MODE_P (mode)
399 && optab->handlers[mode].insn_code != CODE_FOR_nothing
400 && vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
401 /* Final reduction via vector shifts and the reduction operator. Also
402 requires scalar extract. */
403 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
404 + TARG_VEC_TO_SCALAR_COST);
406 /* Use extracts and reduction op for final reduction. For N elements,
407 we have N extracts and N-1 reduction ops. */
408 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
411 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
413 if (vect_print_dump_info (REPORT_DETAILS))
414 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
415 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
416 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
420 /* Function vect_model_induction_cost.
422 Models cost for induction operations. */
425 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
427 /* loop cost for vec_loop. */
428 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
429 /* prologue cost for vec_init and vec_step. */
430 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
432 if (vect_print_dump_info (REPORT_DETAILS))
433 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
434 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
435 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
439 /* Function vect_model_simple_cost.
441 Models cost for simple operations, i.e. those that only emit ncopies of a
442 single op. Right now, this does not account for multiple insns that could
443 be generated for the single vector op. We will handle that shortly. */
446 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt)
450 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
452 /* FORNOW: Assuming maximum 2 args per stmts. */
455 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
456 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST;
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
461 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
462 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
466 /* Function vect_cost_strided_group_size
468 For strided load or store, return the group_size only if it is the first
469 load or store of a group, else return 1. This ensures that group size is
470 only returned once per group. */
473 vect_cost_strided_group_size (stmt_vec_info stmt_info)
475 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
477 if (first_stmt == STMT_VINFO_STMT (stmt_info))
478 return DR_GROUP_SIZE (stmt_info);
484 /* Function vect_model_store_cost
486 Models cost for stores. In the case of strided accesses, one access
487 has the overhead of the strided access attributed to it. */
490 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt)
495 if (dt == vect_constant_def || dt == vect_invariant_def)
496 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST;
498 /* Strided access? */
499 if (DR_GROUP_FIRST_DR (stmt_info))
500 group_size = vect_cost_strided_group_size (stmt_info);
501 /* Not a strided access. */
505 /* Is this an access in a group of stores, which provide strided access?
506 If so, add in the cost of the permutes. */
509 /* Uses a high and low interleave operation for each needed permute. */
510 cost = ncopies * exact_log2(group_size) * group_size
511 * TARG_VEC_STMT_COST;
513 if (vect_print_dump_info (REPORT_DETAILS))
514 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
519 /* Costs of the stores. */
520 cost += ncopies * TARG_VEC_STORE_COST;
522 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
524 if (vect_print_dump_info (REPORT_DETAILS))
525 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
526 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
527 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
531 /* Function vect_model_load_cost
533 Models cost for loads. In the case of strided accesses, the last access
534 has the overhead of the strided access attributed to it. Since unaligned
535 accesses are supported for loads, we also account for the costs of the
536 access scheme chosen. */
539 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
544 int alignment_support_cheme;
546 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
548 /* Strided accesses? */
549 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
552 group_size = vect_cost_strided_group_size (stmt_info);
553 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
555 /* Not a strided access. */
562 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
564 /* Is this an access in a group of loads providing strided access?
565 If so, add in the cost of the permutes. */
568 /* Uses an even and odd extract operations for each needed permute. */
569 inner_cost = ncopies * exact_log2(group_size) * group_size
570 * TARG_VEC_STMT_COST;
572 if (vect_print_dump_info (REPORT_DETAILS))
573 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
578 /* The loads themselves. */
579 switch (alignment_support_cheme)
583 inner_cost += ncopies * TARG_VEC_LOAD_COST;
585 if (vect_print_dump_info (REPORT_DETAILS))
586 fprintf (vect_dump, "vect_model_load_cost: aligned.");
590 case dr_unaligned_supported:
592 /* Here, we assign an additional cost for the unaligned load. */
593 inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
595 if (vect_print_dump_info (REPORT_DETAILS))
596 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
601 case dr_unaligned_software_pipeline:
605 if (vect_print_dump_info (REPORT_DETAILS))
606 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
609 /* Unaligned software pipeline has a load of an address, an initial
610 load, and possibly a mask operation to "prime" the loop. However,
611 if this is an access in a group of loads, which provide strided
612 access, then the above cost should only be considered for one
613 access in the group. Inside the loop, there is a load op
614 and a realignment op. */
616 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
618 outer_cost = 2*TARG_VEC_STMT_COST;
619 if (targetm.vectorize.builtin_mask_for_load)
620 outer_cost += TARG_VEC_STMT_COST;
623 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
625 inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
634 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
636 if (vect_print_dump_info (REPORT_DETAILS))
637 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
638 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
639 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
644 /* Function vect_get_new_vect_var.
646 Returns a name for a new variable. The current naming scheme appends the
647 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
648 the name of vectorizer generated variables, and appends that to NAME if
652 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
659 case vect_simple_var:
662 case vect_scalar_var:
665 case vect_pointer_var:
674 char* tmp = concat (prefix, name, NULL);
675 new_vect_var = create_tmp_var (type, tmp);
679 new_vect_var = create_tmp_var (type, prefix);
681 /* Mark vector typed variable as a gimple register variable. */
682 if (TREE_CODE (type) == VECTOR_TYPE)
683 DECL_GIMPLE_REG_P (new_vect_var) = true;
689 /* Function vect_create_addr_base_for_vector_ref.
691 Create an expression that computes the address of the first memory location
692 that will be accessed for a data reference.
695 STMT: The statement containing the data reference.
696 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
697 OFFSET: Optional. If supplied, it is be added to the initial address.
700 1. Return an SSA_NAME whose value is the address of the memory location of
701 the first vector of the data reference.
702 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
703 these statement(s) which define the returned SSA_NAME.
705 FORNOW: We are only handling array accesses with step 1. */
708 vect_create_addr_base_for_vector_ref (tree stmt,
712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
713 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
714 tree data_ref_base_expr = unshare_expr (DR_BASE_ADDRESS (dr));
715 tree base_name = build_fold_indirect_ref (data_ref_base_expr);
716 tree data_ref_base_var;
720 tree addr_base, addr_expr;
722 tree base_offset = unshare_expr (DR_OFFSET (dr));
723 tree init = unshare_expr (DR_INIT (dr));
724 tree vect_ptr_type, addr_expr2;
727 /* Create data_ref_base */
728 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base_expr), "batmp");
729 add_referenced_var (data_ref_base_var);
730 data_ref_base = force_gimple_operand (data_ref_base_expr, &new_base_stmt,
731 true, data_ref_base_var);
732 append_to_statement_list_force(new_base_stmt, new_stmt_list);
734 /* Create base_offset */
735 base_offset = size_binop (PLUS_EXPR, base_offset, init);
736 base_offset = fold_convert (sizetype, base_offset);
737 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
738 add_referenced_var (dest);
739 base_offset = force_gimple_operand (base_offset, &new_stmt, true, dest);
740 append_to_statement_list_force (new_stmt, new_stmt_list);
744 tree tmp = create_tmp_var (sizetype, "offset");
747 /* For interleaved access step we divide STEP by the size of the
748 interleaving group. */
749 if (DR_GROUP_SIZE (stmt_info))
750 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
751 build_int_cst (TREE_TYPE (offset),
752 DR_GROUP_SIZE (stmt_info)));
756 add_referenced_var (tmp);
757 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
758 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
759 base_offset, offset);
760 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
761 append_to_statement_list_force (new_stmt, new_stmt_list);
764 /* base + base_offset */
765 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
768 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
770 /* addr_expr = addr_base */
771 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
772 get_name (base_name));
773 add_referenced_var (addr_expr);
774 vec_stmt = fold_convert (vect_ptr_type, addr_base);
775 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
776 get_name (base_name));
777 add_referenced_var (addr_expr2);
778 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
779 append_to_statement_list_force (new_stmt, new_stmt_list);
781 if (vect_print_dump_info (REPORT_DETAILS))
783 fprintf (vect_dump, "created ");
784 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
790 /* Function vect_create_data_ref_ptr.
792 Create a new pointer to vector type (vp), that points to the first location
793 accessed in the loop by STMT, along with the def-use update chain to
794 appropriately advance the pointer through the loop iterations. Also set
795 aliasing information for the pointer. This vector pointer is used by the
796 callers to this function to create a memory reference expression for vector
800 1. STMT: a stmt that references memory. Expected to be of the form
801 GIMPLE_MODIFY_STMT <name, data-ref> or
802 GIMPLE_MODIFY_STMT <data-ref, name>.
803 2. BSI: block_stmt_iterator where new stmts can be added.
804 3. OFFSET (optional): an offset to be added to the initial address accessed
805 by the data-ref in STMT.
806 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
807 pointing to the initial address.
808 5. TYPE: if not NULL indicates the required type of the data-ref
811 1. Declare a new ptr to vector_type, and have it point to the base of the
812 data reference (initial addressed accessed by the data reference).
813 For example, for vector of type V8HI, the following code is generated:
816 vp = (v8hi *)initial_address;
818 if OFFSET is not supplied:
819 initial_address = &a[init];
820 if OFFSET is supplied:
821 initial_address = &a[init + OFFSET];
823 Return the initial_address in INITIAL_ADDRESS.
825 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
826 update the pointer in each iteration of the loop.
828 Return the increment stmt that updates the pointer in PTR_INCR.
830 3. Return the pointer. */
833 vect_create_data_ref_ptr (tree stmt,
834 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
835 tree offset, tree *initial_address, tree *ptr_incr,
836 bool only_init, tree type)
839 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
840 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
841 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
842 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
848 tree new_stmt_list = NULL_TREE;
849 edge pe = loop_preheader_edge (loop);
852 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
854 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
856 if (vect_print_dump_info (REPORT_DETAILS))
858 tree data_ref_base = base_name;
859 fprintf (vect_dump, "create vector-pointer variable to type: ");
860 print_generic_expr (vect_dump, vectype, TDF_SLIM);
861 if (TREE_CODE (data_ref_base) == VAR_DECL)
862 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
863 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
864 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
865 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
866 fprintf (vect_dump, " vectorizing a record based array ref: ");
867 else if (TREE_CODE (data_ref_base) == SSA_NAME)
868 fprintf (vect_dump, " vectorizing a pointer ref: ");
869 print_generic_expr (vect_dump, base_name, TDF_SLIM);
872 /** (1) Create the new vector-pointer variable: **/
874 vect_ptr_type = build_pointer_type (type);
876 vect_ptr_type = build_pointer_type (vectype);
877 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
878 get_name (base_name));
879 add_referenced_var (vect_ptr);
881 /** (2) Add aliasing information to the new vector-pointer:
882 (The points-to info (DR_PTR_INFO) may be defined later.) **/
884 tag = DR_SYMBOL_TAG (dr);
887 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
888 tag must be created with tag added to its may alias list. */
890 new_type_alias (vect_ptr, tag, DR_REF (dr));
892 set_symbol_mem_tag (vect_ptr, tag);
894 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
896 /** (3) Calculate the initial address the vector-pointer, and set
897 the vector-pointer to point to it before the loop: **/
899 /* Create: (&(base[init_val+offset]) in the loop preheader. */
900 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
902 pe = loop_preheader_edge (loop);
903 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
904 gcc_assert (!new_bb);
905 *initial_address = new_temp;
907 /* Create: p = (vectype *) initial_base */
908 vec_stmt = fold_convert (vect_ptr_type, new_temp);
909 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
910 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
911 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
912 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
913 gcc_assert (!new_bb);
916 /** (4) Handle the updating of the vector-pointer inside the loop: **/
918 if (only_init) /* No update in loop is required. */
920 /* Copy the points-to information if it exists. */
921 if (DR_PTR_INFO (dr))
922 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
923 return vect_ptr_init;
927 block_stmt_iterator incr_bsi;
929 tree indx_before_incr, indx_after_incr;
932 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
933 create_iv (vect_ptr_init,
934 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
935 NULL_TREE, loop, &incr_bsi, insert_after,
936 &indx_before_incr, &indx_after_incr);
937 incr = bsi_stmt (incr_bsi);
938 set_stmt_info (stmt_ann (incr),
939 new_stmt_vec_info (incr, loop_vinfo));
941 /* Copy the points-to information if it exists. */
942 if (DR_PTR_INFO (dr))
944 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
945 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
947 merge_alias_info (vect_ptr_init, indx_before_incr);
948 merge_alias_info (vect_ptr_init, indx_after_incr);
952 return indx_before_incr;
957 /* Function bump_vector_ptr
959 Increment a pointer (to a vector type) by vector-size. Connect the new
960 increment stmt to the existing def-use update-chain of the pointer.
962 The pointer def-use update-chain before this function:
963 DATAREF_PTR = phi (p_0, p_2)
965 PTR_INCR: p_2 = DATAREF_PTR + step
967 The pointer def-use update-chain after this function:
968 DATAREF_PTR = phi (p_0, p_2)
970 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
972 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
975 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
977 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
978 The increment amount across iterations is also expected to be
980 BSI - location where the new update stmt is to be placed.
981 STMT - the original scalar memory-access stmt that is being vectorized.
983 Output: Return NEW_DATAREF_PTR as illustrated above.
988 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
991 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
992 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
993 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
994 tree vptr_type = TREE_TYPE (dataref_ptr);
995 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
996 tree update = TYPE_SIZE_UNIT (vectype);
1000 tree new_dataref_ptr;
1002 incr_stmt = build_gimple_modify_stmt (ptr_var,
1003 build2 (POINTER_PLUS_EXPR, vptr_type,
1004 dataref_ptr, update));
1005 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1006 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
1007 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
1009 /* Update the vector-pointer's cross-iteration increment. */
1010 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1012 tree use = USE_FROM_PTR (use_p);
1014 if (use == dataref_ptr)
1015 SET_USE (use_p, new_dataref_ptr);
1017 gcc_assert (tree_int_cst_compare (use, update) == 0);
1020 /* Copy the points-to information if it exists. */
1021 if (DR_PTR_INFO (dr))
1022 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1023 merge_alias_info (new_dataref_ptr, dataref_ptr);
1025 return new_dataref_ptr;
1029 /* Function vect_create_destination_var.
1031 Create a new temporary of type VECTYPE. */
1034 vect_create_destination_var (tree scalar_dest, tree vectype)
1037 const char *new_name;
1039 enum vect_var_kind kind;
1041 kind = vectype ? vect_simple_var : vect_scalar_var;
1042 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1044 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1046 new_name = get_name (scalar_dest);
1049 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1050 add_referenced_var (vec_dest);
1056 /* Function vect_init_vector.
1058 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1059 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1060 used in the vectorization of STMT. */
1063 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
1065 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1066 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1067 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1075 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1076 add_referenced_var (new_var);
1078 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1079 new_temp = make_ssa_name (new_var, init_stmt);
1080 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1082 pe = loop_preheader_edge (loop);
1083 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1084 gcc_assert (!new_bb);
1086 if (vect_print_dump_info (REPORT_DETAILS))
1088 fprintf (vect_dump, "created new init_stmt: ");
1089 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1092 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1097 /* Function get_initial_def_for_induction
1100 IV_PHI - the initial value of the induction variable
1103 Return a vector variable, initialized with the first VF values of
1104 the induction variable. E.g., for an iv with IV_PHI='X' and
1105 evolution S, for a vector of 4 units, we want to return:
1106 [X, X + S, X + 2*S, X + 3*S]. */
1109 get_initial_def_for_induction (tree iv_phi)
1111 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1114 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1115 tree vectype = get_vectype_for_scalar_type (scalar_type);
1116 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1117 edge pe = loop_preheader_edge (loop);
1119 block_stmt_iterator bsi;
1120 tree vec, vec_init, vec_step, t;
1125 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1126 tree init_expr, step_expr;
1127 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1130 int ncopies = vf / nunits;
1132 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1134 tree stmt = NULL_TREE;
1135 block_stmt_iterator si;
1136 basic_block bb = bb_for_stmt (iv_phi);
1138 gcc_assert (phi_info);
1139 gcc_assert (ncopies >= 1);
1141 /* Find the first insertion point in the BB. */
1142 si = bsi_after_labels (bb);
1143 stmt = bsi_stmt (si);
1145 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
1146 gcc_assert (access_fn);
1147 ok = vect_is_simple_iv_evolution (loop->num, access_fn,
1148 &init_expr, &step_expr);
1151 /* Create the vector that holds the initial_value of the induction. */
1152 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1153 add_referenced_var (new_var);
1155 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1158 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1159 gcc_assert (!new_bb);
1163 t = tree_cons (NULL_TREE, new_name, t);
1164 for (i = 1; i < nunits; i++)
1168 /* Create: new_name = new_name + step_expr */
1169 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1170 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1171 new_name = make_ssa_name (new_var, init_stmt);
1172 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1174 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1175 gcc_assert (!new_bb);
1177 if (vect_print_dump_info (REPORT_DETAILS))
1179 fprintf (vect_dump, "created new init_stmt: ");
1180 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1182 t = tree_cons (NULL_TREE, new_name, t);
1184 vec = build_constructor_from_list (vectype, nreverse (t));
1185 vec_init = vect_init_vector (stmt, vec, vectype);
1188 /* Create the vector that holds the step of the induction. */
1189 expr = build_int_cst (scalar_type, vf);
1190 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1192 for (i = 0; i < nunits; i++)
1193 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1194 vec = build_constructor_from_list (vectype, t);
1195 vec_step = vect_init_vector (stmt, vec, vectype);
1198 /* Create the following def-use cycle:
1200 vec_init = [X, X+S, X+2*S, X+3*S]
1201 vec_step = [VF*S, VF*S, VF*S, VF*S]
1203 vec_iv = PHI <vec_init, vec_loop>
1207 vec_loop = vec_iv + vec_step; */
1209 /* Create the induction-phi that defines the induction-operand. */
1210 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1211 add_referenced_var (vec_dest);
1212 induction_phi = create_phi_node (vec_dest, loop->header);
1213 set_stmt_info (get_stmt_ann (induction_phi),
1214 new_stmt_vec_info (induction_phi, loop_vinfo));
1215 induc_def = PHI_RESULT (induction_phi);
1217 /* Create the iv update inside the loop */
1218 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1219 build2 (PLUS_EXPR, vectype,
1220 induc_def, vec_step));
1221 vec_def = make_ssa_name (vec_dest, new_stmt);
1222 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1223 bsi = bsi_for_stmt (stmt);
1224 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1226 /* Set the arguments of the phi node: */
1227 add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
1228 add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
1231 /* In case the vectorization factor (VF) is bigger than the number
1232 of elements that we can fit in a vectype (nunits), we have to generate
1233 more than one vector stmt - i.e - we need to "unroll" the
1234 vector stmt by a factor VF/nunits. For more details see documentation
1235 in vectorizable_operation. */
1239 stmt_vec_info prev_stmt_vinfo;
1241 /* Create the vector that holds the step of the induction. */
1242 expr = build_int_cst (scalar_type, nunits);
1243 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1245 for (i = 0; i < nunits; i++)
1246 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1247 vec = build_constructor_from_list (vectype, t);
1248 vec_step = vect_init_vector (stmt, vec, vectype);
1250 vec_def = induc_def;
1251 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1252 for (i = 1; i < ncopies; i++)
1256 /* vec_i = vec_prev + vec_{step*nunits} */
1257 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1258 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1259 vec_def = make_ssa_name (vec_dest, new_stmt);
1260 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1261 bsi = bsi_for_stmt (stmt);
1262 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1264 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1265 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1269 if (vect_print_dump_info (REPORT_DETAILS))
1271 fprintf (vect_dump, "transform induction: created def-use cycle:");
1272 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1273 fprintf (vect_dump, "\n");
1274 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1277 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1282 /* Function vect_get_vec_def_for_operand.
1284 OP is an operand in STMT. This function returns a (vector) def that will be
1285 used in the vectorized stmt for STMT.
1287 In the case that OP is an SSA_NAME which is defined in the loop, then
1288 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1290 In case OP is an invariant or constant, a new stmt that creates a vector def
1291 needs to be introduced. */
1294 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1299 stmt_vec_info def_stmt_info = NULL;
1300 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1301 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1302 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1303 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1304 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1310 enum vect_def_type dt;
1314 if (vect_print_dump_info (REPORT_DETAILS))
1316 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1317 print_generic_expr (vect_dump, op, TDF_SLIM);
1320 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1321 gcc_assert (is_simple_use);
1322 if (vect_print_dump_info (REPORT_DETAILS))
1326 fprintf (vect_dump, "def = ");
1327 print_generic_expr (vect_dump, def, TDF_SLIM);
1331 fprintf (vect_dump, " def_stmt = ");
1332 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1338 /* Case 1: operand is a constant. */
1339 case vect_constant_def:
1344 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1345 if (vect_print_dump_info (REPORT_DETAILS))
1346 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1348 for (i = nunits - 1; i >= 0; --i)
1350 t = tree_cons (NULL_TREE, op, t);
1352 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1353 vec_cst = build_vector (vector_type, t);
1355 return vect_init_vector (stmt, vec_cst, vector_type);
1358 /* Case 2: operand is defined outside the loop - loop invariant. */
1359 case vect_invariant_def:
1364 /* Create 'vec_inv = {inv,inv,..,inv}' */
1365 if (vect_print_dump_info (REPORT_DETAILS))
1366 fprintf (vect_dump, "Create vector_inv.");
1368 for (i = nunits - 1; i >= 0; --i)
1370 t = tree_cons (NULL_TREE, def, t);
1373 /* FIXME: use build_constructor directly. */
1374 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1375 vec_inv = build_constructor_from_list (vector_type, t);
1377 return vect_init_vector (stmt, vec_inv, vector_type);
1380 /* Case 3: operand is defined inside the loop. */
1384 *scalar_def = def_stmt;
1386 /* Get the def from the vectorized stmt. */
1387 def_stmt_info = vinfo_for_stmt (def_stmt);
1388 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1389 gcc_assert (vec_stmt);
1390 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1394 /* Case 4: operand is defined by a loop header phi - reduction */
1395 case vect_reduction_def:
1397 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1399 /* Get the def before the loop */
1400 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1401 return get_initial_def_for_reduction (stmt, op, scalar_def);
1404 /* Case 5: operand is defined by loop-header phi - induction. */
1405 case vect_induction_def:
1407 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1409 /* Get the def before the loop */
1410 return get_initial_def_for_induction (def_stmt);
1419 /* Function vect_get_vec_def_for_stmt_copy
1421 Return a vector-def for an operand. This function is used when the
1422 vectorized stmt to be created (by the caller to this function) is a "copy"
1423 created in case the vectorized result cannot fit in one vector, and several
1424 copies of the vector-stmt are required. In this case the vector-def is
1425 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1426 of the stmt that defines VEC_OPRND.
1427 DT is the type of the vector def VEC_OPRND.
1430 In case the vectorization factor (VF) is bigger than the number
1431 of elements that can fit in a vectype (nunits), we have to generate
1432 more than one vector stmt to vectorize the scalar stmt. This situation
1433 arises when there are multiple data-types operated upon in the loop; the
1434 smallest data-type determines the VF, and as a result, when vectorizing
1435 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1436 vector stmt (each computing a vector of 'nunits' results, and together
1437 computing 'VF' results in each iteration). This function is called when
1438 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1439 which VF=16 and nunits=4, so the number of copies required is 4):
1441 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1443 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1444 VS1.1: vx.1 = memref1 VS1.2
1445 VS1.2: vx.2 = memref2 VS1.3
1446 VS1.3: vx.3 = memref3
1448 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1449 VSnew.1: vz1 = vx.1 + ... VSnew.2
1450 VSnew.2: vz2 = vx.2 + ... VSnew.3
1451 VSnew.3: vz3 = vx.3 + ...
1453 The vectorization of S1 is explained in vectorizable_load.
1454 The vectorization of S2:
1455 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1456 the function 'vect_get_vec_def_for_operand' is called to
1457 get the relevant vector-def for each operand of S2. For operand x it
1458 returns the vector-def 'vx.0'.
1460 To create the remaining copies of the vector-stmt (VSnew.j), this
1461 function is called to get the relevant vector-def for each operand. It is
1462 obtained from the respective VS1.j stmt, which is recorded in the
1463 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1465 For example, to obtain the vector-def 'vx.1' in order to create the
1466 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1467 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1468 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1469 and return its def ('vx.1').
1470 Overall, to create the above sequence this function will be called 3 times:
1471 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1472 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1473 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1476 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1478 tree vec_stmt_for_operand;
1479 stmt_vec_info def_stmt_info;
1481 /* Do nothing; can reuse same def. */
1482 if (dt == vect_invariant_def || dt == vect_constant_def )
1485 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1486 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1487 gcc_assert (def_stmt_info);
1488 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1489 gcc_assert (vec_stmt_for_operand);
1490 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1496 /* Function vect_finish_stmt_generation.
1498 Insert a new stmt. */
1501 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1502 block_stmt_iterator *bsi)
1504 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1505 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1507 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1508 set_stmt_info (get_stmt_ann (vec_stmt),
1509 new_stmt_vec_info (vec_stmt, loop_vinfo));
1511 if (vect_print_dump_info (REPORT_DETAILS))
1513 fprintf (vect_dump, "add new stmt: ");
1514 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
1517 /* Make sure bsi points to the stmt that is being vectorized. */
1518 gcc_assert (stmt == bsi_stmt (*bsi));
1520 #ifdef USE_MAPPED_LOCATION
1521 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
1523 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
1528 /* Function get_initial_def_for_reduction
1531 STMT - a stmt that performs a reduction operation in the loop.
1532 INIT_VAL - the initial value of the reduction variable
1535 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
1536 of the reduction (used for adjusting the epilog - see below).
1537 Return a vector variable, initialized according to the operation that STMT
1538 performs. This vector will be used as the initial value of the
1539 vector of partial results.
1541 Option1 (adjust in epilog): Initialize the vector as follows:
1544 min/max: [init_val,init_val,..,init_val,init_val]
1545 bit and/or: [init_val,init_val,..,init_val,init_val]
1546 and when necessary (e.g. add/mult case) let the caller know
1547 that it needs to adjust the result by init_val.
1549 Option2: Initialize the vector as follows:
1550 add: [0,0,...,0,init_val]
1551 mult: [1,1,...,1,init_val]
1552 min/max: [init_val,init_val,...,init_val]
1553 bit and/or: [init_val,init_val,...,init_val]
1554 and no adjustments are needed.
1556 For example, for the following code:
1562 STMT is 's = s + a[i]', and the reduction variable is 's'.
1563 For a vector of 4 units, we want to return either [0,0,0,init_val],
1564 or [0,0,0,0] and let the caller know that it needs to adjust
1565 the result at the end by 'init_val'.
1567 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
1568 initialization vector is simpler (same element in all entries).
1569 A cost model should help decide between these two schemes. */
1572 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
1574 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1575 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1576 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1577 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1578 tree type = TREE_TYPE (init_val);
1586 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
1587 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
1591 case WIDEN_SUM_EXPR:
1594 *adjustment_def = init_val;
1595 /* Create a vector of zeros for init_def. */
1596 if (INTEGRAL_TYPE_P (type))
1597 def_for_init = build_int_cst (type, 0);
1599 def_for_init = build_real (type, dconst0);
1600 for (i = nunits - 1; i >= 0; --i)
1601 t = tree_cons (NULL_TREE, def_for_init, t);
1602 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
1603 init_def = build_vector (vector_type, t);
1608 *adjustment_def = NULL_TREE;
1620 /* Function vect_create_epilog_for_reduction
1622 Create code at the loop-epilog to finalize the result of a reduction
1625 VECT_DEF is a vector of partial results.
1626 REDUC_CODE is the tree-code for the epilog reduction.
1627 STMT is the scalar reduction stmt that is being vectorized.
1628 REDUCTION_PHI is the phi-node that carries the reduction computation.
1631 1. Creates the reduction def-use cycle: sets the arguments for
1633 The loop-entry argument is the vectorized initial-value of the reduction.
1634 The loop-latch argument is VECT_DEF - the vector of partial sums.
1635 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1636 by applying the operation specified by REDUC_CODE if available, or by
1637 other means (whole-vector shifts or a scalar loop).
1638 The function also creates a new phi node at the loop exit to preserve
1639 loop-closed form, as illustrated below.
1641 The flow at the entry to this function:
1644 vec_def = phi <null, null> # REDUCTION_PHI
1645 VECT_DEF = vector_stmt # vectorized form of STMT
1646 s_loop = scalar_stmt # (scalar) STMT
1648 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1652 The above is transformed by this function into:
1655 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1656 VECT_DEF = vector_stmt # vectorized form of STMT
1657 s_loop = scalar_stmt # (scalar) STMT
1659 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1660 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1661 v_out2 = reduce <v_out1>
1662 s_out3 = extract_field <v_out2, 0>
1663 s_out4 = adjust_result <s_out3>
1669 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1670 enum tree_code reduc_code, tree reduction_phi)
1672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1674 enum machine_mode mode;
1675 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1676 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1677 basic_block exit_bb;
1681 block_stmt_iterator exit_bsi;
1686 tree new_scalar_dest, exit_phi;
1687 tree bitsize, bitpos, bytesize;
1688 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1689 tree scalar_initial_def;
1690 tree vec_initial_def;
1692 imm_use_iterator imm_iter;
1693 use_operand_p use_p;
1694 bool extract_scalar_result;
1698 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
1701 op_type = TREE_OPERAND_LENGTH (operation);
1702 reduction_op = TREE_OPERAND (operation, op_type-1);
1703 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1704 mode = TYPE_MODE (vectype);
1706 /*** 1. Create the reduction def-use cycle ***/
1708 /* 1.1 set the loop-entry arg of the reduction-phi: */
1709 /* For the case of reduction, vect_get_vec_def_for_operand returns
1710 the scalar def before the loop, that defines the initial value
1711 of the reduction variable. */
1712 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1713 &scalar_initial_def);
1714 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1716 /* 1.2 set the loop-latch arg for the reduction-phi: */
1717 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1719 if (vect_print_dump_info (REPORT_DETAILS))
1721 fprintf (vect_dump, "transform reduction: created def-use cycle:");
1722 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1723 fprintf (vect_dump, "\n");
1724 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1728 /*** 2. Create epilog code
1729 The reduction epilog code operates across the elements of the vector
1730 of partial results computed by the vectorized loop.
1731 The reduction epilog code consists of:
1732 step 1: compute the scalar result in a vector (v_out2)
1733 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1734 step 3: adjust the scalar result (s_out3) if needed.
1736 Step 1 can be accomplished using one the following three schemes:
1737 (scheme 1) using reduc_code, if available.
1738 (scheme 2) using whole-vector shifts, if available.
1739 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1742 The overall epilog code looks like this:
1744 s_out0 = phi <s_loop> # original EXIT_PHI
1745 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1746 v_out2 = reduce <v_out1> # step 1
1747 s_out3 = extract_field <v_out2, 0> # step 2
1748 s_out4 = adjust_result <s_out3> # step 3
1750 (step 3 is optional, and step2 1 and 2 may be combined).
1751 Lastly, the uses of s_out0 are replaced by s_out4.
1755 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1756 v_out1 = phi <v_loop> */
1758 exit_bb = single_exit (loop)->dest;
1759 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1760 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1761 exit_bsi = bsi_after_labels (exit_bb);
1763 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1764 (i.e. when reduc_code is not available) and in the final adjustment
1765 code (if needed). Also get the original scalar reduction variable as
1766 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1767 represents a reduction pattern), the tree-code and scalar-def are
1768 taken from the original stmt that the pattern-stmt (STMT) replaces.
1769 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1770 are taken from STMT. */
1772 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1775 /* Regular reduction */
1780 /* Reduction pattern */
1781 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1782 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1783 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1785 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1786 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1787 scalar_type = TREE_TYPE (scalar_dest);
1788 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1789 bitsize = TYPE_SIZE (scalar_type);
1790 bytesize = TYPE_SIZE_UNIT (scalar_type);
1792 /* 2.3 Create the reduction code, using one of the three schemes described
1795 if (reduc_code < NUM_TREE_CODES)
1799 /*** Case 1: Create:
1800 v_out2 = reduc_expr <v_out1> */
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 fprintf (vect_dump, "Reduce using direct vector reduction.");
1805 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1806 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
1807 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1808 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1809 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1810 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1812 extract_scalar_result = true;
1816 enum tree_code shift_code = 0;
1817 bool have_whole_vector_shift = true;
1819 int element_bitsize = tree_low_cst (bitsize, 1);
1820 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1823 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1824 shift_code = VEC_RSHIFT_EXPR;
1826 have_whole_vector_shift = false;
1828 /* Regardless of whether we have a whole vector shift, if we're
1829 emulating the operation via tree-vect-generic, we don't want
1830 to use it. Only the first round of the reduction is likely
1831 to still be profitable via emulation. */
1832 /* ??? It might be better to emit a reduction tree code here, so that
1833 tree-vect-generic can expand the first round via bit tricks. */
1834 if (!VECTOR_MODE_P (mode))
1835 have_whole_vector_shift = false;
1838 optab optab = optab_for_tree_code (code, vectype);
1839 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1840 have_whole_vector_shift = false;
1843 if (have_whole_vector_shift)
1845 /*** Case 2: Create:
1846 for (offset = VS/2; offset >= element_size; offset/=2)
1848 Create: va' = vec_shift <va, offset>
1849 Create: va = vop <va, va'>
1852 if (vect_print_dump_info (REPORT_DETAILS))
1853 fprintf (vect_dump, "Reduce using vector shifts");
1855 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1856 new_temp = PHI_RESULT (new_phi);
1858 for (bit_offset = vec_size_in_bits/2;
1859 bit_offset >= element_bitsize;
1862 tree bitpos = size_int (bit_offset);
1863 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
1864 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1865 new_name = make_ssa_name (vec_dest, epilog_stmt);
1866 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1867 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1869 tmp = build2 (code, vectype, new_name, new_temp);
1870 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1871 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1872 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1873 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1876 extract_scalar_result = true;
1882 /*** Case 3: Create:
1883 s = extract_field <v_out2, 0>
1884 for (offset = element_size;
1885 offset < vector_size;
1886 offset += element_size;)
1888 Create: s' = extract_field <v_out2, offset>
1889 Create: s = op <s, s'>
1892 if (vect_print_dump_info (REPORT_DETAILS))
1893 fprintf (vect_dump, "Reduce using scalar code. ");
1895 vec_temp = PHI_RESULT (new_phi);
1896 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1897 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1899 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1900 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1901 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1902 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1903 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1905 for (bit_offset = element_bitsize;
1906 bit_offset < vec_size_in_bits;
1907 bit_offset += element_bitsize)
1910 tree bitpos = bitsize_int (bit_offset);
1911 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1914 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1915 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1916 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1917 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1918 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1920 tmp = build2 (code, scalar_type, new_name, new_temp);
1921 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1922 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1923 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1924 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1927 extract_scalar_result = false;
1931 /* 2.4 Extract the final scalar result. Create:
1932 s_out3 = extract_field <v_out2, bitpos> */
1934 if (extract_scalar_result)
1938 if (vect_print_dump_info (REPORT_DETAILS))
1939 fprintf (vect_dump, "extract scalar result");
1941 if (BYTES_BIG_ENDIAN)
1942 bitpos = size_binop (MULT_EXPR,
1943 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1944 TYPE_SIZE (scalar_type));
1946 bitpos = bitsize_zero_node;
1948 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1949 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1950 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1951 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1952 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1953 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1956 /* 2.4 Adjust the final result by the initial value of the reduction
1957 variable. (When such adjustment is not needed, then
1958 'scalar_initial_def' is zero).
1961 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1963 if (scalar_initial_def)
1965 tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
1966 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1967 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1968 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1969 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1972 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1974 /* Find the loop-closed-use at the loop exit of the original scalar result.
1975 (The reduction result is expected to have two immediate uses - one at the
1976 latch block, and one at the loop exit). */
1978 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1980 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1982 exit_phi = USE_STMT (use_p);
1986 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1987 gcc_assert (exit_phi);
1988 /* Replace the uses: */
1989 orig_name = PHI_RESULT (exit_phi);
1990 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1991 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1992 SET_USE (use_p, new_temp);
1996 /* Function vectorizable_reduction.
1998 Check if STMT performs a reduction operation that can be vectorized.
1999 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2000 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2001 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2003 This function also handles reduction idioms (patterns) that have been
2004 recognized in advance during vect_pattern_recog. In this case, STMT may be
2006 X = pattern_expr (arg0, arg1, ..., X)
2007 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2008 sequence that had been detected and replaced by the pattern-stmt (STMT).
2010 In some cases of reduction patterns, the type of the reduction variable X is
2011 different than the type of the other arguments of STMT.
2012 In such cases, the vectype that is used when transforming STMT into a vector
2013 stmt is different than the vectype that is used to determine the
2014 vectorization factor, because it consists of a different number of elements
2015 than the actual number of elements that are being operated upon in parallel.
2017 For example, consider an accumulation of shorts into an int accumulator.
2018 On some targets it's possible to vectorize this pattern operating on 8
2019 shorts at a time (hence, the vectype for purposes of determining the
2020 vectorization factor should be V8HI); on the other hand, the vectype that
2021 is used to create the vector form is actually V4SI (the type of the result).
2023 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2024 indicates what is the actual level of parallelism (V8HI in the example), so
2025 that the right vectorization factor would be derived. This vectype
2026 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2027 be used to create the vectorized stmt. The right vectype for the vectorized
2028 stmt is obtained from the type of the result X:
2029 get_vectype_for_scalar_type (TREE_TYPE (X))
2031 This means that, contrary to "regular" reductions (or "regular" stmts in
2032 general), the following equation:
2033 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2034 does *NOT* necessarily hold for reduction patterns. */
2037 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2042 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2043 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2044 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2045 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2046 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2048 enum tree_code code, orig_code, epilog_reduc_code = 0;
2049 enum machine_mode vec_mode;
2051 optab optab, reduc_optab;
2052 tree new_temp = NULL_TREE;
2054 enum vect_def_type dt;
2059 stmt_vec_info orig_stmt_info;
2060 tree expr = NULL_TREE;
2062 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2063 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2064 stmt_vec_info prev_stmt_info;
2066 tree new_stmt = NULL_TREE;
2069 gcc_assert (ncopies >= 1);
2071 /* 1. Is vectorizable reduction? */
2073 /* Not supportable if the reduction variable is used in the loop. */
2074 if (STMT_VINFO_RELEVANT_P (stmt_info))
2077 if (!STMT_VINFO_LIVE_P (stmt_info))
2080 /* Make sure it was already recognized as a reduction computation. */
2081 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2084 /* 2. Has this been recognized as a reduction pattern?
2086 Check if STMT represents a pattern that has been recognized
2087 in earlier analysis stages. For stmts that represent a pattern,
2088 the STMT_VINFO_RELATED_STMT field records the last stmt in
2089 the original sequence that constitutes the pattern. */
2091 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2094 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2095 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2096 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2097 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2100 /* 3. Check the operands of the operation. The first operands are defined
2101 inside the loop body. The last operand is the reduction variable,
2102 which is defined by the loop-header-phi. */
2104 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2106 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2107 code = TREE_CODE (operation);
2108 op_type = TREE_OPERAND_LENGTH (operation);
2109 if (op_type != binary_op && op_type != ternary_op)
2111 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2112 scalar_type = TREE_TYPE (scalar_dest);
2114 /* All uses but the last are expected to be defined in the loop.
2115 The last use is the reduction variable. */
2116 for (i = 0; i < op_type-1; i++)
2118 op = TREE_OPERAND (operation, i);
2119 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2120 gcc_assert (is_simple_use);
2121 if (dt != vect_loop_def
2122 && dt != vect_invariant_def
2123 && dt != vect_constant_def
2124 && dt != vect_induction_def)
2128 op = TREE_OPERAND (operation, i);
2129 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2130 gcc_assert (is_simple_use);
2131 gcc_assert (dt == vect_reduction_def);
2132 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2134 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
2136 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
2138 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2141 /* 4. Supportable by target? */
2143 /* 4.1. check support for the operation in the loop */
2144 optab = optab_for_tree_code (code, vectype);
2147 if (vect_print_dump_info (REPORT_DETAILS))
2148 fprintf (vect_dump, "no optab.");
2151 vec_mode = TYPE_MODE (vectype);
2152 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2154 if (vect_print_dump_info (REPORT_DETAILS))
2155 fprintf (vect_dump, "op not supported by target.");
2156 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2157 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2158 < vect_min_worthwhile_factor (code))
2160 if (vect_print_dump_info (REPORT_DETAILS))
2161 fprintf (vect_dump, "proceeding using word mode.");
2164 /* Worthwhile without SIMD support? */
2165 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2166 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2167 < vect_min_worthwhile_factor (code))
2169 if (vect_print_dump_info (REPORT_DETAILS))
2170 fprintf (vect_dump, "not worthwhile without SIMD support.");
2174 /* 4.2. Check support for the epilog operation.
2176 If STMT represents a reduction pattern, then the type of the
2177 reduction variable may be different than the type of the rest
2178 of the arguments. For example, consider the case of accumulation
2179 of shorts into an int accumulator; The original code:
2180 S1: int_a = (int) short_a;
2181 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2184 STMT: int_acc = widen_sum <short_a, int_acc>
2187 1. The tree-code that is used to create the vector operation in the
2188 epilog code (that reduces the partial results) is not the
2189 tree-code of STMT, but is rather the tree-code of the original
2190 stmt from the pattern that STMT is replacing. I.e, in the example
2191 above we want to use 'widen_sum' in the loop, but 'plus' in the
2193 2. The type (mode) we use to check available target support
2194 for the vector operation to be created in the *epilog*, is
2195 determined by the type of the reduction variable (in the example
2196 above we'd check this: plus_optab[vect_int_mode]).
2197 However the type (mode) we use to check available target support
2198 for the vector operation to be created *inside the loop*, is
2199 determined by the type of the other arguments to STMT (in the
2200 example we'd check this: widen_sum_optab[vect_short_mode]).
2202 This is contrary to "regular" reductions, in which the types of all
2203 the arguments are the same as the type of the reduction variable.
2204 For "regular" reductions we can therefore use the same vector type
2205 (and also the same tree-code) when generating the epilog code and
2206 when generating the code inside the loop. */
2210 /* This is a reduction pattern: get the vectype from the type of the
2211 reduction variable, and get the tree-code from orig_stmt. */
2212 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2213 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2214 vec_mode = TYPE_MODE (vectype);
2218 /* Regular reduction: use the same vectype and tree-code as used for
2219 the vector code inside the loop can be used for the epilog code. */
2223 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2225 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2228 if (vect_print_dump_info (REPORT_DETAILS))
2229 fprintf (vect_dump, "no optab for reduction.");
2230 epilog_reduc_code = NUM_TREE_CODES;
2232 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2234 if (vect_print_dump_info (REPORT_DETAILS))
2235 fprintf (vect_dump, "reduc op not supported by target.");
2236 epilog_reduc_code = NUM_TREE_CODES;
2239 if (!vec_stmt) /* transformation not required. */
2241 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2242 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2248 if (vect_print_dump_info (REPORT_DETAILS))
2249 fprintf (vect_dump, "transform reduction.");
2251 /* Create the destination vector */
2252 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2254 /* Create the reduction-phi that defines the reduction-operand. */
2255 new_phi = create_phi_node (vec_dest, loop->header);
2257 /* In case the vectorization factor (VF) is bigger than the number
2258 of elements that we can fit in a vectype (nunits), we have to generate
2259 more than one vector stmt - i.e - we need to "unroll" the
2260 vector stmt by a factor VF/nunits. For more details see documentation
2261 in vectorizable_operation. */
2263 prev_stmt_info = NULL;
2264 for (j = 0; j < ncopies; j++)
2269 op = TREE_OPERAND (operation, 0);
2270 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2271 if (op_type == ternary_op)
2273 op = TREE_OPERAND (operation, 1);
2274 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2277 /* Get the vector def for the reduction variable from the phi node */
2278 reduc_def = PHI_RESULT (new_phi);
2282 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2283 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2284 if (op_type == ternary_op)
2285 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2287 /* Get the vector def for the reduction variable from the vectorized
2288 reduction operation generated in the previous iteration (j-1) */
2289 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2292 /* Arguments are ready. create the new vector stmt. */
2293 if (op_type == binary_op)
2294 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2296 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2298 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2299 new_temp = make_ssa_name (vec_dest, new_stmt);
2300 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2301 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2304 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2306 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2307 prev_stmt_info = vinfo_for_stmt (new_stmt);
2310 /* Finalize the reduction-phi (set it's arguments) and create the
2311 epilog reduction code. */
2312 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2316 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2317 a function declaration if the target has a vectorized version
2318 of the function, or NULL_TREE if the function cannot be vectorized. */
2321 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2323 tree fndecl = get_callee_fndecl (call);
2324 enum built_in_function code;
2326 /* We only handle functions that do not read or clobber memory -- i.e.
2327 const or novops ones. */
2328 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2332 || TREE_CODE (fndecl) != FUNCTION_DECL
2333 || !DECL_BUILT_IN (fndecl))
2336 code = DECL_FUNCTION_CODE (fndecl);
2337 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2341 /* Function vectorizable_call.
2343 Check if STMT performs a function call that can be vectorized.
2344 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2345 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2346 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2349 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2355 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2357 tree vectype_out, vectype_in;
2360 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2361 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2362 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2364 int ncopies, j, nargs;
2365 call_expr_arg_iterator iter;
2367 enum { NARROW, NONE, WIDEN } modifier;
2369 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2372 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2375 /* FORNOW: not yet supported. */
2376 if (STMT_VINFO_LIVE_P (stmt_info))
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "value used after loop.");
2383 /* Is STMT a vectorizable call? */
2384 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2387 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2390 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2391 if (TREE_CODE (operation) != CALL_EXPR)
2394 /* Process function arguments. */
2395 rhs_type = NULL_TREE;
2397 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2399 /* Bail out if the function has more than two arguments, we
2400 do not have interesting builtin functions to vectorize with
2401 more than two arguments. */
2405 /* We can only handle calls with arguments of the same type. */
2407 && rhs_type != TREE_TYPE (op))
2409 if (vect_print_dump_info (REPORT_DETAILS))
2410 fprintf (vect_dump, "argument types differ.");
2413 rhs_type = TREE_TYPE (op);
2415 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
2417 if (vect_print_dump_info (REPORT_DETAILS))
2418 fprintf (vect_dump, "use not simple.");
2425 /* No arguments is also not good. */
2429 vectype_in = get_vectype_for_scalar_type (rhs_type);
2430 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2432 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2433 vectype_out = get_vectype_for_scalar_type (lhs_type);
2434 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2437 if (nunits_in == nunits_out / 2)
2439 else if (nunits_out == nunits_in)
2441 else if (nunits_out == nunits_in / 2)
2446 /* For now, we only vectorize functions if a target specific builtin
2447 is available. TODO -- in some cases, it might be profitable to
2448 insert the calls for pieces of the vector, in order to be able
2449 to vectorize other operations in the loop. */
2450 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
2451 if (fndecl == NULL_TREE)
2453 if (vect_print_dump_info (REPORT_DETAILS))
2454 fprintf (vect_dump, "function is not vectorizable.");
2459 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
2461 if (modifier == NARROW)
2462 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2464 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2466 /* Sanity check: make sure that at least one copy of the vectorized stmt
2467 needs to be generated. */
2468 gcc_assert (ncopies >= 1);
2470 if (!vec_stmt) /* transformation not required. */
2472 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2473 if (vect_print_dump_info (REPORT_DETAILS))
2474 fprintf (vect_dump, "=== vectorizable_call ===");
2475 vect_model_simple_cost (stmt_info, ncopies, dt);
2481 if (vect_print_dump_info (REPORT_DETAILS))
2482 fprintf (vect_dump, "transform operation.");
2485 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2486 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2488 prev_stmt_info = NULL;
2492 for (j = 0; j < ncopies; ++j)
2494 /* Build argument list for the vectorized call. */
2495 /* FIXME: Rewrite this so that it doesn't
2496 construct a temporary list. */
2499 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2503 = vect_get_vec_def_for_operand (op, stmt, NULL);
2506 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2508 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2512 vargs = nreverse (vargs);
2514 rhs = build_function_call_expr (fndecl, vargs);
2515 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2516 new_temp = make_ssa_name (vec_dest, new_stmt);
2517 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2519 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2522 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2524 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2526 prev_stmt_info = vinfo_for_stmt (new_stmt);
2532 for (j = 0; j < ncopies; ++j)
2534 /* Build argument list for the vectorized call. */
2535 /* FIXME: Rewrite this so that it doesn't
2536 construct a temporary list. */
2539 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2544 = vect_get_vec_def_for_operand (op, stmt, NULL);
2546 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2551 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
2553 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2556 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2557 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
2561 vargs = nreverse (vargs);
2563 rhs = build_function_call_expr (fndecl, vargs);
2564 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2565 new_temp = make_ssa_name (vec_dest, new_stmt);
2566 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2568 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2571 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2573 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2575 prev_stmt_info = vinfo_for_stmt (new_stmt);
2578 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2583 /* No current target implements this case. */
2587 /* The call in STMT might prevent it from being removed in dce.
2588 We however cannot remove it here, due to the way the ssa name
2589 it defines is mapped to the new definition. So just replace
2590 rhs of the statement with something harmless. */
2591 type = TREE_TYPE (scalar_dest);
2592 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
2599 /* Function vect_gen_widened_results_half
2601 Create a vector stmt whose code, type, number of arguments, and result
2602 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2603 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2604 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2605 needs to be created (DECL is a function-decl of a target-builtin).
2606 STMT is the original scalar stmt that we are vectorizing. */
2609 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2610 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2611 tree vec_dest, block_stmt_iterator *bsi,
2620 /* Generate half of the widened result: */
2621 if (code == CALL_EXPR)
2623 /* Target specific support */
2624 if (op_type == binary_op)
2625 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
2627 expr = build_call_expr (decl, 1, vec_oprnd0);
2631 /* Generic support */
2632 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2633 if (op_type == binary_op)
2634 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2636 expr = build1 (code, vectype, vec_oprnd0);
2638 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2639 new_temp = make_ssa_name (vec_dest, new_stmt);
2640 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2641 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2643 if (code == CALL_EXPR)
2645 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2647 if (TREE_CODE (sym) == SSA_NAME)
2648 sym = SSA_NAME_VAR (sym);
2649 mark_sym_for_renaming (sym);
2657 /* Function vectorizable_conversion.
2659 Check if STMT performs a conversion operation, that can be vectorized.
2660 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2661 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2662 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2665 vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
2672 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2674 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2675 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2676 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2679 enum vect_def_type dt0;
2681 stmt_vec_info prev_stmt_info;
2684 tree vectype_out, vectype_in;
2687 tree rhs_type, lhs_type;
2689 enum { NARROW, NONE, WIDEN } modifier;
2691 /* Is STMT a vectorizable conversion? */
2693 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2696 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2699 if (STMT_VINFO_LIVE_P (stmt_info))
2701 /* FORNOW: not yet supported. */
2702 if (vect_print_dump_info (REPORT_DETAILS))
2703 fprintf (vect_dump, "value used after loop.");
2707 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2710 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2713 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2714 code = TREE_CODE (operation);
2715 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2718 /* Check types of lhs and rhs */
2719 op0 = TREE_OPERAND (operation, 0);
2720 rhs_type = TREE_TYPE (op0);
2721 vectype_in = get_vectype_for_scalar_type (rhs_type);
2722 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2724 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2725 lhs_type = TREE_TYPE (scalar_dest);
2726 vectype_out = get_vectype_for_scalar_type (lhs_type);
2727 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2730 if (nunits_in == nunits_out / 2)
2732 else if (nunits_out == nunits_in)
2734 else if (nunits_out == nunits_in / 2)
2739 if (modifier == NONE)
2740 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
2742 /* Bail out if the types are both integral or non-integral */
2743 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
2744 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
2747 if (modifier == NARROW)
2748 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2750 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2752 /* Sanity check: make sure that at least one copy of the vectorized stmt
2753 needs to be generated. */
2754 gcc_assert (ncopies >= 1);
2756 /* Check the operands of the operation. */
2757 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2759 if (vect_print_dump_info (REPORT_DETAILS))
2760 fprintf (vect_dump, "use not simple.");
2764 /* Supportable by target? */
2765 if ((modifier == NONE
2766 && !targetm.vectorize.builtin_conversion (code, vectype_in))
2767 || (modifier == WIDEN
2768 && !supportable_widening_operation (code, stmt, vectype_in,
2771 || (modifier == NARROW
2772 && !supportable_narrowing_operation (code, stmt, vectype_in,
2775 if (vect_print_dump_info (REPORT_DETAILS))
2776 fprintf (vect_dump, "op not supported by target.");
2780 if (modifier != NONE)
2781 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2783 if (!vec_stmt) /* transformation not required. */
2785 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2790 if (vect_print_dump_info (REPORT_DETAILS))
2791 fprintf (vect_dump, "transform conversion.");
2794 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2796 prev_stmt_info = NULL;
2800 for (j = 0; j < ncopies; j++)
2806 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2808 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2811 targetm.vectorize.builtin_conversion (code, vectype_in);
2812 new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0);
2814 /* Arguments are ready. create the new vector stmt. */
2815 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
2816 new_temp = make_ssa_name (vec_dest, new_stmt);
2817 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2818 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2819 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2821 if (TREE_CODE (sym) == SSA_NAME)
2822 sym = SSA_NAME_VAR (sym);
2823 mark_sym_for_renaming (sym);
2827 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2829 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2830 prev_stmt_info = vinfo_for_stmt (new_stmt);
2835 /* In case the vectorization factor (VF) is bigger than the number
2836 of elements that we can fit in a vectype (nunits), we have to
2837 generate more than one vector stmt - i.e - we need to "unroll"
2838 the vector stmt by a factor VF/nunits. */
2839 for (j = 0; j < ncopies; j++)
2842 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2844 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2846 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2848 /* Generate first half of the widened result: */
2850 = vect_gen_widened_results_half (code1, vectype_out, decl1,
2851 vec_oprnd0, vec_oprnd1,
2852 unary_op, vec_dest, bsi, stmt);
2854 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2856 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2857 prev_stmt_info = vinfo_for_stmt (new_stmt);
2859 /* Generate second half of the widened result: */
2861 = vect_gen_widened_results_half (code2, vectype_out, decl2,
2862 vec_oprnd0, vec_oprnd1,
2863 unary_op, vec_dest, bsi, stmt);
2864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2865 prev_stmt_info = vinfo_for_stmt (new_stmt);
2870 /* In case the vectorization factor (VF) is bigger than the number
2871 of elements that we can fit in a vectype (nunits), we have to
2872 generate more than one vector stmt - i.e - we need to "unroll"
2873 the vector stmt by a factor VF/nunits. */
2874 for (j = 0; j < ncopies; j++)
2879 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2880 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2884 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2885 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2888 /* Arguments are ready. Create the new vector stmt. */
2889 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2890 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2891 new_temp = make_ssa_name (vec_dest, new_stmt);
2892 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2893 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2896 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2898 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2900 prev_stmt_info = vinfo_for_stmt (new_stmt);
2903 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2909 /* Function vectorizable_assignment.
2911 Check if STMT performs an assignment (copy) that can be vectorized.
2912 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2913 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2914 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2917 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2925 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2928 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2929 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2930 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2932 gcc_assert (ncopies >= 1);
2934 return false; /* FORNOW */
2936 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2939 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2942 /* FORNOW: not yet supported. */
2943 if (STMT_VINFO_LIVE_P (stmt_info))
2945 if (vect_print_dump_info (REPORT_DETAILS))
2946 fprintf (vect_dump, "value used after loop.");
2950 /* Is vectorizable assignment? */
2951 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2954 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2955 if (TREE_CODE (scalar_dest) != SSA_NAME)
2958 op = GIMPLE_STMT_OPERAND (stmt, 1);
2959 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
2961 if (vect_print_dump_info (REPORT_DETAILS))
2962 fprintf (vect_dump, "use not simple.");
2966 if (!vec_stmt) /* transformation not required. */
2968 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2969 if (vect_print_dump_info (REPORT_DETAILS))
2970 fprintf (vect_dump, "=== vectorizable_assignment ===");
2971 vect_model_simple_cost (stmt_info, ncopies, dt);
2976 if (vect_print_dump_info (REPORT_DETAILS))
2977 fprintf (vect_dump, "transform assignment.");
2980 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2983 op = GIMPLE_STMT_OPERAND (stmt, 1);
2984 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2986 /* Arguments are ready. create the new vector stmt. */
2987 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd);
2988 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2989 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
2990 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2996 /* Function vect_min_worthwhile_factor.
2998 For a loop where we could vectorize the operation indicated by CODE,
2999 return the minimum vectorization factor that makes it worthwhile
3000 to use generic vectors. */
3002 vect_min_worthwhile_factor (enum tree_code code)
3023 /* Function vectorizable_induction
3025 Check if PHI performs an induction computation that can be vectorized.
3026 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3027 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3028 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3031 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3034 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3035 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3036 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3037 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3038 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3041 gcc_assert (ncopies >= 1);
3043 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3046 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3048 if (STMT_VINFO_LIVE_P (stmt_info))
3050 /* FORNOW: not yet supported. */
3051 if (vect_print_dump_info (REPORT_DETAILS))
3052 fprintf (vect_dump, "value used after loop.");
3056 if (TREE_CODE (phi) != PHI_NODE)
3059 if (!vec_stmt) /* transformation not required. */
3061 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3062 if (vect_print_dump_info (REPORT_DETAILS))
3063 fprintf (vect_dump, "=== vectorizable_induction ===");
3064 vect_model_induction_cost (stmt_info, ncopies);
3070 if (vect_print_dump_info (REPORT_DETAILS))
3071 fprintf (vect_dump, "transform induction phi.");
3073 vec_def = get_initial_def_for_induction (phi);
3074 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3079 /* Function vectorizable_operation.
3081 Check if STMT performs a binary or unary operation that can be vectorized.
3082 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3083 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3084 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3087 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3092 tree op0, op1 = NULL;
3093 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3096 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3097 enum tree_code code;
3098 enum machine_mode vec_mode;
3103 enum machine_mode optab_op2_mode;
3105 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3107 stmt_vec_info prev_stmt_info;
3108 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3111 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3114 gcc_assert (ncopies >= 1);
3116 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3119 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3122 /* FORNOW: not yet supported. */
3123 if (STMT_VINFO_LIVE_P (stmt_info))
3125 if (vect_print_dump_info (REPORT_DETAILS))
3126 fprintf (vect_dump, "value used after loop.");
3130 /* Is STMT a vectorizable binary/unary operation? */
3131 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3134 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3137 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3138 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3139 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3140 if (nunits_out != nunits_in)
3143 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3144 code = TREE_CODE (operation);
3146 /* For pointer addition, we should use the normal plus for
3147 the vector addition. */
3148 if (code == POINTER_PLUS_EXPR)
3151 optab = optab_for_tree_code (code, vectype);
3153 /* Support only unary or binary operations. */
3154 op_type = TREE_OPERAND_LENGTH (operation);
3155 if (op_type != unary_op && op_type != binary_op)
3157 if (vect_print_dump_info (REPORT_DETAILS))
3158 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3162 op0 = TREE_OPERAND (operation, 0);
3163 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3165 if (vect_print_dump_info (REPORT_DETAILS))
3166 fprintf (vect_dump, "use not simple.");
3170 if (op_type == binary_op)
3172 op1 = TREE_OPERAND (operation, 1);
3173 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3175 if (vect_print_dump_info (REPORT_DETAILS))
3176 fprintf (vect_dump, "use not simple.");
3181 /* Supportable by target? */
3184 if (vect_print_dump_info (REPORT_DETAILS))
3185 fprintf (vect_dump, "no optab.");
3188 vec_mode = TYPE_MODE (vectype);
3189 icode = (int) optab->handlers[(int) vec_mode].insn_code;
3190 if (icode == CODE_FOR_nothing)
3192 if (vect_print_dump_info (REPORT_DETAILS))
3193 fprintf (vect_dump, "op not supported by target.");
3194 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3195 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3196 < vect_min_worthwhile_factor (code))
3198 if (vect_print_dump_info (REPORT_DETAILS))
3199 fprintf (vect_dump, "proceeding using word mode.");
3202 /* Worthwhile without SIMD support? */
3203 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3204 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3205 < vect_min_worthwhile_factor (code))
3207 if (vect_print_dump_info (REPORT_DETAILS))
3208 fprintf (vect_dump, "not worthwhile without SIMD support.");
3212 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3214 /* FORNOW: not yet supported. */
3215 if (!VECTOR_MODE_P (vec_mode))
3218 /* Invariant argument is needed for a vector shift
3219 by a scalar shift operand. */
3220 optab_op2_mode = insn_data[icode].operand[2].mode;
3221 if (! (VECTOR_MODE_P (optab_op2_mode)
3222 || dt[1] == vect_constant_def
3223 || dt[1] == vect_invariant_def))
3225 if (vect_print_dump_info (REPORT_DETAILS))
3226 fprintf (vect_dump, "operand mode requires invariant argument.");
3231 if (!vec_stmt) /* transformation not required. */
3233 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3234 if (vect_print_dump_info (REPORT_DETAILS))
3235 fprintf (vect_dump, "=== vectorizable_operation ===");
3236 vect_model_simple_cost (stmt_info, ncopies, dt);
3242 if (vect_print_dump_info (REPORT_DETAILS))
3243 fprintf (vect_dump, "transform binary/unary operation.");
3246 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3248 /* In case the vectorization factor (VF) is bigger than the number
3249 of elements that we can fit in a vectype (nunits), we have to generate
3250 more than one vector stmt - i.e - we need to "unroll" the
3251 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3252 from one copy of the vector stmt to the next, in the field
3253 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3254 stages to find the correct vector defs to be used when vectorizing
3255 stmts that use the defs of the current stmt. The example below illustrates
3256 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3257 4 vectorized stmts):
3259 before vectorization:
3260 RELATED_STMT VEC_STMT
3264 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3266 RELATED_STMT VEC_STMT
3267 VS1_0: vx0 = memref0 VS1_1 -
3268 VS1_1: vx1 = memref1 VS1_2 -
3269 VS1_2: vx2 = memref2 VS1_3 -
3270 VS1_3: vx3 = memref3 - -
3271 S1: x = load - VS1_0
3274 step2: vectorize stmt S2 (done here):
3275 To vectorize stmt S2 we first need to find the relevant vector
3276 def for the first operand 'x'. This is, as usual, obtained from
3277 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3278 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3279 relevant vector def 'vx0'. Having found 'vx0' we can generate
3280 the vector stmt VS2_0, and as usual, record it in the
3281 STMT_VINFO_VEC_STMT of stmt S2.
3282 When creating the second copy (VS2_1), we obtain the relevant vector
3283 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3284 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3285 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3286 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3287 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3288 chain of stmts and pointers:
3289 RELATED_STMT VEC_STMT
3290 VS1_0: vx0 = memref0 VS1_1 -
3291 VS1_1: vx1 = memref1 VS1_2 -
3292 VS1_2: vx2 = memref2 VS1_3 -
3293 VS1_3: vx3 = memref3 - -
3294 S1: x = load - VS1_0
3295 VS2_0: vz0 = vx0 + v1 VS2_1 -
3296 VS2_1: vz1 = vx1 + v1 VS2_2 -
3297 VS2_2: vz2 = vx2 + v1 VS2_3 -
3298 VS2_3: vz3 = vx3 + v1 - -
3299 S2: z = x + 1 - VS2_0 */
3301 prev_stmt_info = NULL;
3302 for (j = 0; j < ncopies; j++)
3307 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3308 if (op_type == binary_op)
3310 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3312 /* Vector shl and shr insn patterns can be defined with
3313 scalar operand 2 (shift operand). In this case, use
3314 constant or loop invariant op1 directly, without
3315 extending it to vector mode first. */
3316 optab_op2_mode = insn_data[icode].operand[2].mode;
3317 if (!VECTOR_MODE_P (optab_op2_mode))
3319 if (vect_print_dump_info (REPORT_DETAILS))
3320 fprintf (vect_dump, "operand 1 using scalar mode.");
3325 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3330 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3331 if (op_type == binary_op)
3332 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3335 /* Arguments are ready. create the new vector stmt. */
3337 if (op_type == binary_op)
3338 new_stmt = build_gimple_modify_stmt (vec_dest,
3339 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
3341 new_stmt = build_gimple_modify_stmt (vec_dest,
3342 build1 (code, vectype, vec_oprnd0));
3343 new_temp = make_ssa_name (vec_dest, new_stmt);
3344 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3345 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3348 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3350 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3351 prev_stmt_info = vinfo_for_stmt (new_stmt);
3358 /* Function vectorizable_type_demotion
3360 Check if STMT performs a binary or unary operation that involves
3361 type demotion, and if it can be vectorized.
3362 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3363 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3364 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3367 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
3374 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3376 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3377 enum tree_code code, code1 = ERROR_MARK;
3380 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3382 stmt_vec_info prev_stmt_info;
3391 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3394 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3397 /* FORNOW: not yet supported. */
3398 if (STMT_VINFO_LIVE_P (stmt_info))
3400 if (vect_print_dump_info (REPORT_DETAILS))
3401 fprintf (vect_dump, "value used after loop.");
3405 /* Is STMT a vectorizable type-demotion operation? */
3406 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3409 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3412 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3413 code = TREE_CODE (operation);
3414 if (code != NOP_EXPR && code != CONVERT_EXPR)
3417 op0 = TREE_OPERAND (operation, 0);
3418 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3419 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3421 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3422 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3423 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3424 if (nunits_in != nunits_out / 2) /* FORNOW */
3427 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3428 gcc_assert (ncopies >= 1);
3430 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3431 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3432 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3433 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3434 && (code == NOP_EXPR || code == CONVERT_EXPR))))
3437 /* Check the operands of the operation. */
3438 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3440 if (vect_print_dump_info (REPORT_DETAILS))
3441 fprintf (vect_dump, "use not simple.");
3445 /* Supportable by target? */
3446 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
3449 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3451 if (!vec_stmt) /* transformation not required. */
3453 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3454 if (vect_print_dump_info (REPORT_DETAILS))
3455 fprintf (vect_dump, "=== vectorizable_demotion ===");
3456 vect_model_simple_cost (stmt_info, ncopies, dt);
3461 if (vect_print_dump_info (REPORT_DETAILS))
3462 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3466 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3468 /* In case the vectorization factor (VF) is bigger than the number
3469 of elements that we can fit in a vectype (nunits), we have to generate
3470 more than one vector stmt - i.e - we need to "unroll" the
3471 vector stmt by a factor VF/nunits. */
3472 prev_stmt_info = NULL;
3473 for (j = 0; j < ncopies; j++)
3478 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3479 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3483 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3484 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3487 /* Arguments are ready. Create the new vector stmt. */
3488 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3489 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3490 new_temp = make_ssa_name (vec_dest, new_stmt);
3491 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3492 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3495 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3497 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3499 prev_stmt_info = vinfo_for_stmt (new_stmt);
3502 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3507 /* Function vectorizable_type_promotion
3509 Check if STMT performs a binary or unary operation that involves
3510 type promotion, and if it can be vectorized.
3511 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3512 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3513 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3516 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
3522 tree op0, op1 = NULL;
3523 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3525 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3526 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3527 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3530 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3532 stmt_vec_info prev_stmt_info;
3540 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3543 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3546 /* FORNOW: not yet supported. */
3547 if (STMT_VINFO_LIVE_P (stmt_info))
3549 if (vect_print_dump_info (REPORT_DETAILS))
3550 fprintf (vect_dump, "value used after loop.");
3554 /* Is STMT a vectorizable type-promotion operation? */
3555 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3558 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3561 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3562 code = TREE_CODE (operation);
3563 if (code != NOP_EXPR && code != CONVERT_EXPR
3564 && code != WIDEN_MULT_EXPR)
3567 op0 = TREE_OPERAND (operation, 0);
3568 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3569 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3571 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3572 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3573 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3574 if (nunits_out != nunits_in / 2) /* FORNOW */
3577 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3578 gcc_assert (ncopies >= 1);
3580 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3581 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3582 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3583 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3584 && (code == CONVERT_EXPR || code == NOP_EXPR))))
3587 /* Check the operands of the operation. */
3588 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3590 if (vect_print_dump_info (REPORT_DETAILS))
3591 fprintf (vect_dump, "use not simple.");
3595 op_type = TREE_CODE_LENGTH (code);
3596 if (op_type == binary_op)
3598 op1 = TREE_OPERAND (operation, 1);
3599 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3601 if (vect_print_dump_info (REPORT_DETAILS))
3602 fprintf (vect_dump, "use not simple.");
3607 /* Supportable by target? */
3608 if (!supportable_widening_operation (code, stmt, vectype_in,
3609 &decl1, &decl2, &code1, &code2))
3612 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3614 if (!vec_stmt) /* transformation not required. */
3616 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3617 if (vect_print_dump_info (REPORT_DETAILS))
3618 fprintf (vect_dump, "=== vectorizable_promotion ===");
3619 vect_model_simple_cost (stmt_info, 2*ncopies, dt);
3625 if (vect_print_dump_info (REPORT_DETAILS))
3626 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3630 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3632 /* In case the vectorization factor (VF) is bigger than the number
3633 of elements that we can fit in a vectype (nunits), we have to generate
3634 more than one vector stmt - i.e - we need to "unroll" the
3635 vector stmt by a factor VF/nunits. */
3637 prev_stmt_info = NULL;
3638 for (j = 0; j < ncopies; j++)
3643 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3644 if (op_type == binary_op)
3645 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3649 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3650 if (op_type == binary_op)
3651 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3654 /* Arguments are ready. Create the new vector stmt. We are creating
3655 two vector defs because the widened result does not fit in one vector.
3656 The vectorized stmt can be expressed as a call to a taregt builtin,
3657 or a using a tree-code. */
3658 /* Generate first half of the widened result: */
3659 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
3660 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3662 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3664 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3665 prev_stmt_info = vinfo_for_stmt (new_stmt);
3667 /* Generate second half of the widened result: */
3668 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
3669 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3670 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3671 prev_stmt_info = vinfo_for_stmt (new_stmt);
3675 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3680 /* Function vect_strided_store_supported.
3682 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3683 and FALSE otherwise. */
3686 vect_strided_store_supported (tree vectype)
3688 optab interleave_high_optab, interleave_low_optab;
3691 mode = (int) TYPE_MODE (vectype);
3693 /* Check that the operation is supported. */
3694 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3696 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3698 if (!interleave_high_optab || !interleave_low_optab)
3700 if (vect_print_dump_info (REPORT_DETAILS))
3701 fprintf (vect_dump, "no optab for interleave.");
3705 if (interleave_high_optab->handlers[(int) mode].insn_code
3707 || interleave_low_optab->handlers[(int) mode].insn_code
3708 == CODE_FOR_nothing)
3710 if (vect_print_dump_info (REPORT_DETAILS))
3711 fprintf (vect_dump, "interleave op not supported by target.");
3718 /* Function vect_permute_store_chain.
3720 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3721 a power of 2, generate interleave_high/low stmts to reorder the data
3722 correctly for the stores. Return the final references for stores in
3725 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3726 The input is 4 vectors each containing 8 elements. We assign a number to each
3727 element, the input sequence is:
3729 1st vec: 0 1 2 3 4 5 6 7
3730 2nd vec: 8 9 10 11 12 13 14 15
3731 3rd vec: 16 17 18 19 20 21 22 23
3732 4th vec: 24 25 26 27 28 29 30 31
3734 The output sequence should be:
3736 1st vec: 0 8 16 24 1 9 17 25
3737 2nd vec: 2 10 18 26 3 11 19 27
3738 3rd vec: 4 12 20 28 5 13 21 30
3739 4th vec: 6 14 22 30 7 15 23 31
3741 i.e., we interleave the contents of the four vectors in their order.
3743 We use interleave_high/low instructions to create such output. The input of
3744 each interleave_high/low operation is two vectors:
3747 the even elements of the result vector are obtained left-to-right from the
3748 high/low elements of the first vector. The odd elements of the result are
3749 obtained left-to-right from the high/low elements of the second vector.
3750 The output of interleave_high will be: 0 4 1 5
3751 and of interleave_low: 2 6 3 7
3754 The permutation is done in log LENGTH stages. In each stage interleave_high
3755 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3756 where the first argument is taken from the first half of DR_CHAIN and the
3757 second argument from it's second half.
3760 I1: interleave_high (1st vec, 3rd vec)
3761 I2: interleave_low (1st vec, 3rd vec)
3762 I3: interleave_high (2nd vec, 4th vec)
3763 I4: interleave_low (2nd vec, 4th vec)
3765 The output for the first stage is:
3767 I1: 0 16 1 17 2 18 3 19
3768 I2: 4 20 5 21 6 22 7 23
3769 I3: 8 24 9 25 10 26 11 27
3770 I4: 12 28 13 29 14 30 15 31
3772 The output of the second stage, i.e. the final result is:
3774 I1: 0 8 16 24 1 9 17 25
3775 I2: 2 10 18 26 3 11 19 27
3776 I3: 4 12 20 28 5 13 21 30
3777 I4: 6 14 22 30 7 15 23 31. */
3780 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3781 unsigned int length,
3783 block_stmt_iterator *bsi,
3784 VEC(tree,heap) **result_chain)
3786 tree perm_dest, perm_stmt, vect1, vect2, high, low;
3787 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3788 tree scalar_dest, tmp;
3791 VEC(tree,heap) *first, *second;
3793 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3794 first = VEC_alloc (tree, heap, length/2);
3795 second = VEC_alloc (tree, heap, length/2);
3797 /* Check that the operation is supported. */
3798 if (!vect_strided_store_supported (vectype))
3801 *result_chain = VEC_copy (tree, heap, dr_chain);
3803 for (i = 0; i < exact_log2 (length); i++)
3805 for (j = 0; j < length/2; j++)
3807 vect1 = VEC_index (tree, dr_chain, j);
3808 vect2 = VEC_index (tree, dr_chain, j+length/2);
3810 /* Create interleaving stmt:
3811 in the case of big endian:
3812 high = interleave_high (vect1, vect2)
3813 and in the case of little endian:
3814 high = interleave_low (vect1, vect2). */
3815 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3816 DECL_GIMPLE_REG_P (perm_dest) = 1;
3817 add_referenced_var (perm_dest);
3818 if (BYTES_BIG_ENDIAN)
3819 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3821 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3822 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3823 high = make_ssa_name (perm_dest, perm_stmt);
3824 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
3825 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3826 VEC_replace (tree, *result_chain, 2*j, high);
3828 /* Create interleaving stmt:
3829 in the case of big endian:
3830 low = interleave_low (vect1, vect2)
3831 and in the case of little endian:
3832 low = interleave_high (vect1, vect2). */
3833 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3834 DECL_GIMPLE_REG_P (perm_dest) = 1;
3835 add_referenced_var (perm_dest);
3836 if (BYTES_BIG_ENDIAN)
3837 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3839 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3840 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3841 low = make_ssa_name (perm_dest, perm_stmt);
3842 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
3843 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3844 VEC_replace (tree, *result_chain, 2*j+1, low);
3846 dr_chain = VEC_copy (tree, heap, *result_chain);
3852 /* Function vectorizable_store.
3854 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3856 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3857 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3858 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3861 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3866 tree vec_oprnd = NULL_TREE;
3867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3868 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3869 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3870 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3871 enum machine_mode vec_mode;
3873 enum dr_alignment_support alignment_support_cheme;
3875 def_operand_p def_p;
3877 enum vect_def_type dt;
3878 stmt_vec_info prev_stmt_info = NULL;
3879 tree dataref_ptr = NULL_TREE;
3880 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3881 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3883 tree next_stmt, first_stmt;
3884 bool strided_store = false;
3885 unsigned int group_size, i;
3886 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3887 gcc_assert (ncopies >= 1);
3889 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3892 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3895 if (STMT_VINFO_LIVE_P (stmt_info))
3897 if (vect_print_dump_info (REPORT_DETAILS))
3898 fprintf (vect_dump, "value used after loop.");
3902 /* Is vectorizable store? */
3904 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3907 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3908 if (TREE_CODE (scalar_dest) != ARRAY_REF
3909 && TREE_CODE (scalar_dest) != INDIRECT_REF
3910 && !DR_GROUP_FIRST_DR (stmt_info))
3913 op = GIMPLE_STMT_OPERAND (stmt, 1);
3914 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3916 if (vect_print_dump_info (REPORT_DETAILS))
3917 fprintf (vect_dump, "use not simple.");
3921 vec_mode = TYPE_MODE (vectype);
3922 /* FORNOW. In some cases can vectorize even if data-type not supported
3923 (e.g. - array initialization with 0). */
3924 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
3927 if (!STMT_VINFO_DATA_REF (stmt_info))
3930 if (DR_GROUP_FIRST_DR (stmt_info))
3932 strided_store = true;
3933 if (!vect_strided_store_supported (vectype))
3937 if (!vec_stmt) /* transformation not required. */
3939 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3940 vect_model_store_cost (stmt_info, ncopies, dt);
3948 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3949 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3950 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3952 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3954 /* We vectorize all the stmts of the interleaving group when we
3955 reach the last stmt in the group. */
3956 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3957 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
3959 *vec_stmt = NULL_TREE;
3970 if (vect_print_dump_info (REPORT_DETAILS))
3971 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3973 dr_chain = VEC_alloc (tree, heap, group_size);
3974 oprnds = VEC_alloc (tree, heap, group_size);
3976 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3977 gcc_assert (alignment_support_cheme);
3978 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
3980 /* In case the vectorization factor (VF) is bigger than the number
3981 of elements that we can fit in a vectype (nunits), we have to generate
3982 more than one vector stmt - i.e - we need to "unroll" the
3983 vector stmt by a factor VF/nunits. For more details see documentation in
3984 vect_get_vec_def_for_copy_stmt. */
3986 /* In case of interleaving (non-unit strided access):
3993 We create vectorized stores starting from base address (the access of the
3994 first stmt in the chain (S2 in the above example), when the last store stmt
3995 of the chain (S4) is reached:
3998 VS2: &base + vec_size*1 = vx0
3999 VS3: &base + vec_size*2 = vx1
4000 VS4: &base + vec_size*3 = vx3
4002 Then permutation statements are generated:
4004 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4005 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4008 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4009 (the order of the data-refs in the output of vect_permute_store_chain
4010 corresponds to the order of scalar stmts in the interleaving chain - see
4011 the documentation of vect_permute_store_chain()).
4013 In case of both multiple types and interleaving, above vector stores and
4014 permutation stmts are created for every copy. The result vector stmts are
4015 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4016 STMT_VINFO_RELATED_STMT for the next copies.
4019 prev_stmt_info = NULL;
4020 for (j = 0; j < ncopies; j++)
4027 /* For interleaved stores we collect vectorized defs for all the
4028 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
4029 as an input to vect_permute_store_chain(), and OPRNDS as an input
4030 to vect_get_vec_def_for_stmt_copy() for the next copy.
4031 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4032 OPRNDS are of size 1. */
4033 next_stmt = first_stmt;
4034 for (i = 0; i < group_size; i++)
4036 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
4037 is the exact number of stmts in the chain. Therefore, NEXT_STMT
4038 can't be NULL_TREE. In case that there is no interleaving,
4039 GROUP_SIZE is 1, and only one iteration of the loop will be
4041 gcc_assert (next_stmt);
4042 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4043 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
4044 VEC_quick_push(tree, dr_chain, vec_oprnd);
4045 VEC_quick_push(tree, oprnds, vec_oprnd);
4046 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4048 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
4049 &dummy, &ptr_incr, false,
4050 TREE_TYPE (vec_oprnd));
4054 /* For interleaved stores we created vectorized defs for all the
4055 defs stored in OPRNDS in the previous iteration (previous copy).
4056 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4057 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4059 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4060 OPRNDS are of size 1. */
4061 for (i = 0; i < group_size; i++)
4063 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
4064 VEC_index (tree, oprnds, i));
4065 VEC_replace(tree, dr_chain, i, vec_oprnd);
4066 VEC_replace(tree, oprnds, i, vec_oprnd);
4068 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4073 result_chain = VEC_alloc (tree, heap, group_size);
4075 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4080 next_stmt = first_stmt;
4081 for (i = 0; i < group_size; i++)
4083 /* For strided stores vectorized defs are interleaved in
4084 vect_permute_store_chain(). */
4086 vec_oprnd = VEC_index(tree, result_chain, i);
4088 data_ref = build_fold_indirect_ref (dataref_ptr);
4089 /* Arguments are ready. Create the new vector stmt. */
4090 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4091 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4093 /* Set the VDEFs for the vector pointer. If this virtual def
4094 has a use outside the loop and a loop peel is performed
4095 then the def may be renamed by the peel. Mark it for
4096 renaming so the later use will also be renamed. */
4097 copy_virtual_operands (new_stmt, next_stmt);
4100 /* The original store is deleted so the same SSA_NAMEs
4102 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
4104 SSA_NAME_DEF_STMT (def) = new_stmt;
4105 mark_sym_for_renaming (SSA_NAME_VAR (def));
4108 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4112 /* Create new names for all the definitions created by COPY and
4113 add replacement mappings for each new name. */
4114 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
4116 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
4117 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
4120 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4123 prev_stmt_info = vinfo_for_stmt (new_stmt);
4124 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4127 /* Bump the vector pointer. */
4128 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4136 /* Function vect_setup_realignment
4138 This function is called when vectorizing an unaligned load using
4139 the dr_unaligned_software_pipeline scheme.
4140 This function generates the following code at the loop prolog:
4143 msq_init = *(floor(p)); # prolog load
4144 realignment_token = call target_builtin;
4146 msq = phi (msq_init, ---)
4148 The code above sets up a new (vector) pointer, pointing to the first
4149 location accessed by STMT, and a "floor-aligned" load using that pointer.
4150 It also generates code to compute the "realignment-token" (if the relevant
4151 target hook was defined), and creates a phi-node at the loop-header bb
4152 whose arguments are the result of the prolog-load (created by this
4153 function) and the result of a load that takes place in the loop (to be
4154 created by the caller to this function).
4155 The caller to this function uses the phi-result (msq) to create the
4156 realignment code inside the loop, and sets up the missing phi argument,
4160 msq = phi (msq_init, lsq)
4161 lsq = *(floor(p')); # load in loop
4162 result = realign_load (msq, lsq, realignment_token);
4165 STMT - (scalar) load stmt to be vectorized. This load accesses
4166 a memory location that may be unaligned.
4167 BSI - place where new code is to be inserted.
4170 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4171 target hook, if defined.
4172 Return value - the result of the loop-header phi node. */
4175 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4176 tree *realignment_token)
4178 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4179 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4181 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4182 edge pe = loop_preheader_edge (loop);
4183 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4196 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
4197 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4198 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
4200 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
4201 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4202 new_temp = make_ssa_name (vec_dest, new_stmt);
4203 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4204 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4205 gcc_assert (!new_bb);
4206 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
4207 copy_virtual_operands (new_stmt, stmt);
4208 update_vuses_to_preheader (new_stmt, loop);
4210 /* 2. Create permutation mask, if required, in loop preheader. */
4211 if (targetm.vectorize.builtin_mask_for_load)
4215 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4216 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
4217 vec_dest = vect_create_destination_var (scalar_dest,
4218 TREE_TYPE (new_stmt));
4219 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4220 new_temp = make_ssa_name (vec_dest, new_stmt);
4221 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4222 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4223 gcc_assert (!new_bb);
4224 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
4226 /* The result of the CALL_EXPR to this builtin is determined from
4227 the value of the parameter and no global variables are touched
4228 which makes the builtin a "const" function. Requiring the
4229 builtin to have the "const" attribute makes it unnecessary
4230 to call mark_call_clobbered. */
4231 gcc_assert (TREE_READONLY (builtin_decl));
4234 /* 3. Create msq = phi <msq_init, lsq> in loop */
4235 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4236 msq = make_ssa_name (vec_dest, NULL_TREE);
4237 phi_stmt = create_phi_node (msq, loop->header);
4238 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4239 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
4245 /* Function vect_strided_load_supported.
4247 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
4248 and FALSE otherwise. */
4251 vect_strided_load_supported (tree vectype)
4253 optab perm_even_optab, perm_odd_optab;
4256 mode = (int) TYPE_MODE (vectype);
4258 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
4259 if (!perm_even_optab)
4261 if (vect_print_dump_info (REPORT_DETAILS))
4262 fprintf (vect_dump, "no optab for perm_even.");
4266 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4268 if (vect_print_dump_info (REPORT_DETAILS))
4269 fprintf (vect_dump, "perm_even op not supported by target.");
4273 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
4274 if (!perm_odd_optab)
4276 if (vect_print_dump_info (REPORT_DETAILS))
4277 fprintf (vect_dump, "no optab for perm_odd.");
4281 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4283 if (vect_print_dump_info (REPORT_DETAILS))
4284 fprintf (vect_dump, "perm_odd op not supported by target.");
4291 /* Function vect_permute_load_chain.
4293 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4294 a power of 2, generate extract_even/odd stmts to reorder the input data
4295 correctly. Return the final references for loads in RESULT_CHAIN.
4297 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4298 The input is 4 vectors each containing 8 elements. We assign a number to each
4299 element, the input sequence is:
4301 1st vec: 0 1 2 3 4 5 6 7
4302 2nd vec: 8 9 10 11 12 13 14 15
4303 3rd vec: 16 17 18 19 20 21 22 23
4304 4th vec: 24 25 26 27 28 29 30 31
4306 The output sequence should be:
4308 1st vec: 0 4 8 12 16 20 24 28
4309 2nd vec: 1 5 9 13 17 21 25 29
4310 3rd vec: 2 6 10 14 18 22 26 30
4311 4th vec: 3 7 11 15 19 23 27 31
4313 i.e., the first output vector should contain the first elements of each
4314 interleaving group, etc.
4316 We use extract_even/odd instructions to create such output. The input of each
4317 extract_even/odd operation is two vectors
4321 and the output is the vector of extracted even/odd elements. The output of
4322 extract_even will be: 0 2 4 6
4323 and of extract_odd: 1 3 5 7
4326 The permutation is done in log LENGTH stages. In each stage extract_even and
4327 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
4328 order. In our example,
4330 E1: extract_even (1st vec, 2nd vec)
4331 E2: extract_odd (1st vec, 2nd vec)
4332 E3: extract_even (3rd vec, 4th vec)
4333 E4: extract_odd (3rd vec, 4th vec)
4335 The output for the first stage will be:
4337 E1: 0 2 4 6 8 10 12 14
4338 E2: 1 3 5 7 9 11 13 15
4339 E3: 16 18 20 22 24 26 28 30
4340 E4: 17 19 21 23 25 27 29 31
4342 In order to proceed and create the correct sequence for the next stage (or
4343 for the correct output, if the second stage is the last one, as in our
4344 example), we first put the output of extract_even operation and then the
4345 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4346 The input for the second stage is:
4348 1st vec (E1): 0 2 4 6 8 10 12 14
4349 2nd vec (E3): 16 18 20 22 24 26 28 30
4350 3rd vec (E2): 1 3 5 7 9 11 13 15
4351 4th vec (E4): 17 19 21 23 25 27 29 31
4353 The output of the second stage:
4355 E1: 0 4 8 12 16 20 24 28
4356 E2: 2 6 10 14 18 22 26 30
4357 E3: 1 5 9 13 17 21 25 29
4358 E4: 3 7 11 15 19 23 27 31
4360 And RESULT_CHAIN after reordering:
4362 1st vec (E1): 0 4 8 12 16 20 24 28
4363 2nd vec (E3): 1 5 9 13 17 21 25 29
4364 3rd vec (E2): 2 6 10 14 18 22 26 30
4365 4th vec (E4): 3 7 11 15 19 23 27 31. */
4368 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4369 unsigned int length,
4371 block_stmt_iterator *bsi,
4372 VEC(tree,heap) **result_chain)
4374 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
4375 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4380 /* Check that the operation is supported. */
4381 if (!vect_strided_load_supported (vectype))
4384 *result_chain = VEC_copy (tree, heap, dr_chain);
4385 for (i = 0; i < exact_log2 (length); i++)
4387 for (j = 0; j < length; j +=2)
4389 first_vect = VEC_index (tree, dr_chain, j);
4390 second_vect = VEC_index (tree, dr_chain, j+1);
4392 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4393 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4394 DECL_GIMPLE_REG_P (perm_dest) = 1;
4395 add_referenced_var (perm_dest);
4397 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
4398 first_vect, second_vect);
4399 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4401 data_ref = make_ssa_name (perm_dest, perm_stmt);
4402 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4403 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4404 mark_symbols_for_renaming (perm_stmt);
4406 VEC_replace (tree, *result_chain, j/2, data_ref);
4408 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4409 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4410 DECL_GIMPLE_REG_P (perm_dest) = 1;
4411 add_referenced_var (perm_dest);
4413 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
4414 first_vect, second_vect);
4415 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4416 data_ref = make_ssa_name (perm_dest, perm_stmt);
4417 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4418 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4419 mark_symbols_for_renaming (perm_stmt);
4421 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4423 dr_chain = VEC_copy (tree, heap, *result_chain);
4429 /* Function vect_transform_strided_load.
4431 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4432 to perform their permutation and ascribe the result vectorized statements to
4433 the scalar statements.
4437 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
4438 block_stmt_iterator *bsi)
4440 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4441 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4442 tree next_stmt, new_stmt;
4443 VEC(tree,heap) *result_chain = NULL;
4444 unsigned int i, gap_count;
4447 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4448 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4449 vectors, that are ready for vector computation. */
4450 result_chain = VEC_alloc (tree, heap, size);
4452 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
4455 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4456 Since we scan the chain starting from it's first node, their order
4457 corresponds the order of data-refs in RESULT_CHAIN. */
4458 next_stmt = first_stmt;
4460 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
4465 /* Skip the gaps. Loads created for the gaps will be removed by dead
4466 code elimination pass later.
4467 DR_GROUP_GAP is the number of steps in elements from the previous
4468 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4469 correspond to the gaps.
4471 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4479 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4480 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4481 copies, and we put the new vector statement in the first available
4483 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4484 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4487 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4488 tree rel_stmt = STMT_VINFO_RELATED_STMT (
4489 vinfo_for_stmt (prev_stmt));
4492 prev_stmt = rel_stmt;
4493 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4495 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
4497 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4499 /* If NEXT_STMT accesses the same DR as the previous statement,
4500 put the same TMP_DATA_REF as its vectorized statement; otherwise
4501 get the next data-ref from RESULT_CHAIN. */
4502 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4510 /* vectorizable_load.
4512 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4514 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4515 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4516 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4519 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4522 tree vec_dest = NULL;
4523 tree data_ref = NULL;
4525 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4526 stmt_vec_info prev_stmt_info;
4527 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4528 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4529 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4530 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4533 tree new_stmt = NULL_TREE;
4535 enum dr_alignment_support alignment_support_cheme;
4536 tree dataref_ptr = NULL_TREE;
4538 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4539 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4540 int i, j, group_size;
4541 tree msq = NULL_TREE, lsq;
4542 tree offset = NULL_TREE;
4543 tree realignment_token = NULL_TREE;
4544 tree phi_stmt = NULL_TREE;
4545 VEC(tree,heap) *dr_chain = NULL;
4546 bool strided_load = false;
4549 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4552 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4555 /* FORNOW: not yet supported. */
4556 if (STMT_VINFO_LIVE_P (stmt_info))
4558 if (vect_print_dump_info (REPORT_DETAILS))
4559 fprintf (vect_dump, "value used after loop.");
4563 /* Is vectorizable load? */
4564 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4567 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4568 if (TREE_CODE (scalar_dest) != SSA_NAME)
4571 op = GIMPLE_STMT_OPERAND (stmt, 1);
4572 if (TREE_CODE (op) != ARRAY_REF
4573 && TREE_CODE (op) != INDIRECT_REF
4574 && !DR_GROUP_FIRST_DR (stmt_info))
4577 if (!STMT_VINFO_DATA_REF (stmt_info))
4580 mode = (int) TYPE_MODE (vectype);
4582 /* FORNOW. In some cases can vectorize even if data-type not supported
4583 (e.g. - data copies). */
4584 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4586 if (vect_print_dump_info (REPORT_DETAILS))
4587 fprintf (vect_dump, "Aligned load, but unsupported type.");
4591 /* Check if the load is a part of an interleaving chain. */
4592 if (DR_GROUP_FIRST_DR (stmt_info))
4594 strided_load = true;
4596 /* Check if interleaving is supported. */
4597 if (!vect_strided_load_supported (vectype))
4601 if (!vec_stmt) /* transformation not required. */
4603 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4604 vect_model_load_cost (stmt_info, ncopies);
4608 if (vect_print_dump_info (REPORT_DETAILS))
4609 fprintf (vect_dump, "transform load.");
4615 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4616 /* Check if the chain of loads is already vectorized. */
4617 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4619 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4622 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4623 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4624 dr_chain = VEC_alloc (tree, heap, group_size);
4633 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
4634 gcc_assert (alignment_support_cheme);
4637 /* In case the vectorization factor (VF) is bigger than the number
4638 of elements that we can fit in a vectype (nunits), we have to generate
4639 more than one vector stmt - i.e - we need to "unroll" the
4640 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4641 from one copy of the vector stmt to the next, in the field
4642 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4643 stages to find the correct vector defs to be used when vectorizing
4644 stmts that use the defs of the current stmt. The example below illustrates
4645 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4646 4 vectorized stmts):
4648 before vectorization:
4649 RELATED_STMT VEC_STMT
4653 step 1: vectorize stmt S1:
4654 We first create the vector stmt VS1_0, and, as usual, record a
4655 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4656 Next, we create the vector stmt VS1_1, and record a pointer to
4657 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4658 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4660 RELATED_STMT VEC_STMT
4661 VS1_0: vx0 = memref0 VS1_1 -
4662 VS1_1: vx1 = memref1 VS1_2 -
4663 VS1_2: vx2 = memref2 VS1_3 -
4664 VS1_3: vx3 = memref3 - -
4665 S1: x = load - VS1_0
4668 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4669 information we recorded in RELATED_STMT field is used to vectorize
4672 /* In case of interleaving (non-unit strided access):
4679 Vectorized loads are created in the order of memory accesses
4680 starting from the access of the first stmt of the chain:
4683 VS2: vx1 = &base + vec_size*1
4684 VS3: vx3 = &base + vec_size*2
4685 VS4: vx4 = &base + vec_size*3
4687 Then permutation statements are generated:
4689 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4690 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4693 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4694 (the order of the data-refs in the output of vect_permute_load_chain
4695 corresponds to the order of scalar stmts in the interleaving chain - see
4696 the documentation of vect_permute_load_chain()).
4697 The generation of permutation stmts and recording them in
4698 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4700 In case of both multiple types and interleaving, the vector loads and
4701 permutation stmts above are created for every copy. The result vector stmts
4702 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4703 STMT_VINFO_RELATED_STMT for the next copies. */
4705 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4706 on a target that supports unaligned accesses (dr_unaligned_supported)
4707 we generate the following code:
4711 p = p + indx * vectype_size;
4716 Otherwise, the data reference is potentially unaligned on a target that
4717 does not support unaligned accesses (dr_unaligned_software_pipeline) -
4718 then generate the following code, in which the data in each iteration is
4719 obtained by two vector loads, one from the previous iteration, and one
4720 from the current iteration:
4722 msq_init = *(floor(p1))
4723 p2 = initial_addr + VS - 1;
4724 realignment_token = call target_builtin;
4727 p2 = p2 + indx * vectype_size
4729 vec_dest = realign_load (msq, lsq, realignment_token)
4734 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4736 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
4737 phi_stmt = SSA_NAME_DEF_STMT (msq);
4738 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4741 prev_stmt_info = NULL;
4742 for (j = 0; j < ncopies; j++)
4744 /* 1. Create the vector pointer update chain. */
4746 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
4747 &ptr_incr, false, NULL_TREE);
4749 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4751 for (i = 0; i < group_size; i++)
4753 /* 2. Create the vector-load in the loop. */
4754 switch (alignment_support_cheme)
4757 gcc_assert (aligned_access_p (first_dr));
4758 data_ref = build_fold_indirect_ref (dataref_ptr);
4760 case dr_unaligned_supported:
4762 int mis = DR_MISALIGNMENT (first_dr);
4763 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
4765 gcc_assert (!aligned_access_p (first_dr));
4766 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
4768 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
4771 case dr_unaligned_software_pipeline:
4772 gcc_assert (!aligned_access_p (first_dr));
4773 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
4778 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4779 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4780 new_temp = make_ssa_name (vec_dest, new_stmt);
4781 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4782 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4783 copy_virtual_operands (new_stmt, stmt);
4784 mark_symbols_for_renaming (new_stmt);
4786 /* 3. Handle explicit realignment if necessary/supported. */
4787 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4790 <vec_dest = realign_load (msq, lsq, realignment_token)> */
4791 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
4792 if (!realignment_token)
4793 realignment_token = dataref_ptr;
4794 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4796 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
4797 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4798 new_temp = make_ssa_name (vec_dest, new_stmt);
4799 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4800 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4801 if (i == group_size - 1 && j == ncopies - 1)
4802 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
4806 VEC_quick_push (tree, dr_chain, new_temp);
4807 if (i < group_size - 1)
4808 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4813 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
4815 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4816 dr_chain = VEC_alloc (tree, heap, group_size);
4821 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4823 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4824 prev_stmt_info = vinfo_for_stmt (new_stmt);
4832 /* Function vectorizable_live_operation.
4834 STMT computes a value that is used outside the loop. Check if
4835 it can be supported. */
4838 vectorizable_live_operation (tree stmt,
4839 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
4840 tree *vec_stmt ATTRIBUTE_UNUSED)
4843 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4849 enum vect_def_type dt;
4851 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4853 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4856 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4859 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4862 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4863 op_type = TREE_OPERAND_LENGTH (operation);
4865 /* FORNOW: support only if all uses are invariant. This means
4866 that the scalar operations can remain in place, unvectorized.
4867 The original last scalar value that they compute will be used. */
4869 for (i = 0; i < op_type; i++)
4871 op = TREE_OPERAND (operation, i);
4872 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4874 if (vect_print_dump_info (REPORT_DETAILS))
4875 fprintf (vect_dump, "use not simple.");
4879 if (dt != vect_invariant_def && dt != vect_constant_def)
4883 /* No transformation is required for the cases we currently support. */
4888 /* Function vect_is_simple_cond.
4891 LOOP - the loop that is being vectorized.
4892 COND - Condition that is checked for simple use.
4894 Returns whether a COND can be vectorized. Checks whether
4895 condition operands are supportable using vec_is_simple_use. */
4898 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4902 enum vect_def_type dt;
4904 if (!COMPARISON_CLASS_P (cond))
4907 lhs = TREE_OPERAND (cond, 0);
4908 rhs = TREE_OPERAND (cond, 1);
4910 if (TREE_CODE (lhs) == SSA_NAME)
4912 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4913 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
4916 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
4919 if (TREE_CODE (rhs) == SSA_NAME)
4921 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4922 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
4925 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
4931 /* vectorizable_condition.
4933 Check if STMT is conditional modify expression that can be vectorized.
4934 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4935 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4938 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4941 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4943 tree scalar_dest = NULL_TREE;
4944 tree vec_dest = NULL_TREE;
4945 tree op = NULL_TREE;
4946 tree cond_expr, then_clause, else_clause;
4947 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4948 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4949 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4950 tree vec_compare, vec_cond_expr;
4952 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4953 enum machine_mode vec_mode;
4955 enum vect_def_type dt;
4956 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4957 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4959 gcc_assert (ncopies >= 1);
4961 return false; /* FORNOW */
4963 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4966 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4969 /* FORNOW: not yet supported. */
4970 if (STMT_VINFO_LIVE_P (stmt_info))
4972 if (vect_print_dump_info (REPORT_DETAILS))
4973 fprintf (vect_dump, "value used after loop.");
4977 /* Is vectorizable conditional operation? */
4978 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4981 op = GIMPLE_STMT_OPERAND (stmt, 1);
4983 if (TREE_CODE (op) != COND_EXPR)
4986 cond_expr = TREE_OPERAND (op, 0);
4987 then_clause = TREE_OPERAND (op, 1);
4988 else_clause = TREE_OPERAND (op, 2);
4990 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4993 /* We do not handle two different vector types for the condition
4995 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
4998 if (TREE_CODE (then_clause) == SSA_NAME)
5000 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5001 if (!vect_is_simple_use (then_clause, loop_vinfo,
5002 &then_def_stmt, &def, &dt))
5005 else if (TREE_CODE (then_clause) != INTEGER_CST
5006 && TREE_CODE (then_clause) != REAL_CST)
5009 if (TREE_CODE (else_clause) == SSA_NAME)
5011 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5012 if (!vect_is_simple_use (else_clause, loop_vinfo,
5013 &else_def_stmt, &def, &dt))
5016 else if (TREE_CODE (else_clause) != INTEGER_CST
5017 && TREE_CODE (else_clause) != REAL_CST)
5021 vec_mode = TYPE_MODE (vectype);
5025 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5026 return expand_vec_cond_expr_p (op, vec_mode);
5032 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
5033 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5035 /* Handle cond expr. */
5037 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
5039 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
5040 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
5041 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
5043 /* Arguments are ready. create the new vector stmt. */
5044 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5045 vec_cond_lhs, vec_cond_rhs);
5046 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5047 vec_compare, vec_then_clause, vec_else_clause);
5049 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
5050 new_temp = make_ssa_name (vec_dest, *vec_stmt);
5051 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
5052 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
5057 /* Function vect_transform_stmt.
5059 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5062 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
5064 bool is_store = false;
5065 tree vec_stmt = NULL_TREE;
5066 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5067 tree orig_stmt_in_pattern;
5070 switch (STMT_VINFO_TYPE (stmt_info))
5072 case type_demotion_vec_info_type:
5073 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
5077 case type_promotion_vec_info_type:
5078 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
5082 case type_conversion_vec_info_type:
5083 done = vectorizable_conversion (stmt, bsi, &vec_stmt);
5087 case induc_vec_info_type:
5088 done = vectorizable_induction (stmt, bsi, &vec_stmt);
5092 case op_vec_info_type:
5093 done = vectorizable_operation (stmt, bsi, &vec_stmt);
5097 case assignment_vec_info_type:
5098 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
5102 case load_vec_info_type:
5103 done = vectorizable_load (stmt, bsi, &vec_stmt);
5107 case store_vec_info_type:
5108 done = vectorizable_store (stmt, bsi, &vec_stmt);
5110 if (DR_GROUP_FIRST_DR (stmt_info))
5112 /* In case of interleaving, the whole chain is vectorized when the
5113 last store in the chain is reached. Store stmts before the last
5114 one are skipped, and there vec_stmt_info shouldn't be freed
5116 *strided_store = true;
5117 if (STMT_VINFO_VEC_STMT (stmt_info))
5124 case condition_vec_info_type:
5125 done = vectorizable_condition (stmt, bsi, &vec_stmt);
5129 case call_vec_info_type:
5130 done = vectorizable_call (stmt, bsi, &vec_stmt);
5133 case reduc_vec_info_type:
5134 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
5139 if (!STMT_VINFO_LIVE_P (stmt_info))
5141 if (vect_print_dump_info (REPORT_DETAILS))
5142 fprintf (vect_dump, "stmt not supported.");
5147 if (STMT_VINFO_LIVE_P (stmt_info)
5148 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5150 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
5156 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5157 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
5158 if (orig_stmt_in_pattern)
5160 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
5161 /* STMT was inserted by the vectorizer to replace a computation idiom.
5162 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
5163 computed this idiom. We need to record a pointer to VEC_STMT in
5164 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
5165 documentation of vect_pattern_recog. */
5166 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
5168 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
5169 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
5178 /* This function builds ni_name = number of iterations loop executes
5179 on the loop preheader. */
5182 vect_build_loop_niters (loop_vec_info loop_vinfo)
5184 tree ni_name, stmt, var;
5186 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5187 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5189 var = create_tmp_var (TREE_TYPE (ni), "niters");
5190 add_referenced_var (var);
5191 ni_name = force_gimple_operand (ni, &stmt, false, var);
5193 pe = loop_preheader_edge (loop);
5196 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5197 gcc_assert (!new_bb);
5204 /* This function generates the following statements:
5206 ni_name = number of iterations loop executes
5207 ratio = ni_name / vf
5208 ratio_mult_vf_name = ratio * vf
5210 and places them at the loop preheader edge. */
5213 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5215 tree *ratio_mult_vf_name_ptr,
5216 tree *ratio_name_ptr)
5224 tree ratio_mult_vf_name;
5225 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5226 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
5227 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5230 pe = loop_preheader_edge (loop);
5232 /* Generate temporary variable that contains
5233 number of iterations loop executes. */
5235 ni_name = vect_build_loop_niters (loop_vinfo);
5236 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
5238 /* Create: ratio = ni >> log2(vf) */
5240 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
5241 if (!is_gimple_val (ratio_name))
5243 var = create_tmp_var (TREE_TYPE (ni), "bnd");
5244 add_referenced_var (var);
5246 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
5247 pe = loop_preheader_edge (loop);
5248 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5249 gcc_assert (!new_bb);
5252 /* Create: ratio_mult_vf = ratio << log2 (vf). */
5254 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5255 ratio_name, log_vf);
5256 if (!is_gimple_val (ratio_mult_vf_name))
5258 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
5259 add_referenced_var (var);
5261 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
5263 pe = loop_preheader_edge (loop);
5264 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5265 gcc_assert (!new_bb);
5268 *ni_name_ptr = ni_name;
5269 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5270 *ratio_name_ptr = ratio_name;
5276 /* Function update_vuses_to_preheader.
5279 STMT - a statement with potential VUSEs.
5280 LOOP - the loop whose preheader will contain STMT.
5282 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
5283 appears to be defined in a VDEF in another statement in a loop.
5284 One such case is when the VUSE is at the dereference of a __restricted__
5285 pointer in a load and the VDEF is at the dereference of a different
5286 __restricted__ pointer in a store. Vectorization may result in
5287 copy_virtual_uses being called to copy the problematic VUSE to a new
5288 statement that is being inserted in the loop preheader. This procedure
5289 is called to change the SSA_NAME in the new statement's VUSE from the
5290 SSA_NAME updated in the loop to the related SSA_NAME available on the
5291 path entering the loop.
5293 When this function is called, we have the following situation:
5298 # name1 = phi < name0 , name2>
5303 # name2 = vdef <name1>
5308 Stmt S1 was created in the loop preheader block as part of misaligned-load
5309 handling. This function fixes the name of the vuse of S1 from 'name1' to
5313 update_vuses_to_preheader (tree stmt, struct loop *loop)
5315 basic_block header_bb = loop->header;
5316 edge preheader_e = loop_preheader_edge (loop);
5318 use_operand_p use_p;
5320 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
5322 tree ssa_name = USE_FROM_PTR (use_p);
5323 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
5324 tree name_var = SSA_NAME_VAR (ssa_name);
5325 basic_block bb = bb_for_stmt (def_stmt);
5327 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
5328 if (!IS_EMPTY_STMT (def_stmt)
5329 && flow_bb_inside_loop_p (loop, bb))
5331 /* If the block containing the statement defining the SSA_NAME
5332 is in the loop then it's necessary to find the definition
5333 outside the loop using the PHI nodes of the header. */
5335 bool updated = false;
5337 for (phi = phi_nodes (header_bb); phi; phi = PHI_CHAIN (phi))
5339 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
5341 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
5346 gcc_assert (updated);
5352 /* Function vect_update_ivs_after_vectorizer.
5354 "Advance" the induction variables of LOOP to the value they should take
5355 after the execution of LOOP. This is currently necessary because the
5356 vectorizer does not handle induction variables that are used after the
5357 loop. Such a situation occurs when the last iterations of LOOP are
5359 1. We introduced new uses after LOOP for IVs that were not originally used
5360 after LOOP: the IVs of LOOP are now used by an epilog loop.
5361 2. LOOP is going to be vectorized; this means that it will iterate N/VF
5362 times, whereas the loop IVs should be bumped N times.
5365 - LOOP - a loop that is going to be vectorized. The last few iterations
5366 of LOOP were peeled.
5367 - NITERS - the number of iterations that LOOP executes (before it is
5368 vectorized). i.e, the number of times the ivs should be bumped.
5369 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
5370 coming out from LOOP on which there are uses of the LOOP ivs
5371 (this is the path from LOOP->exit to epilog_loop->preheader).
5373 The new definitions of the ivs are placed in LOOP->exit.
5374 The phi args associated with the edge UPDATE_E in the bb
5375 UPDATE_E->dest are updated accordingly.
5377 Assumption 1: Like the rest of the vectorizer, this function assumes
5378 a single loop exit that has a single predecessor.
5380 Assumption 2: The phi nodes in the LOOP header and in update_bb are
5381 organized in the same order.
5383 Assumption 3: The access function of the ivs is simple enough (see
5384 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
5386 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
5387 coming out of LOOP on which the ivs of LOOP are used (this is the path
5388 that leads to the epilog loop; other paths skip the epilog loop). This
5389 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
5390 needs to have its phis updated.
5394 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
5397 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5398 basic_block exit_bb = single_exit (loop)->dest;
5400 basic_block update_bb = update_e->dest;
5402 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
5404 /* Make sure there exists a single-predecessor exit bb: */
5405 gcc_assert (single_pred_p (exit_bb));
5407 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
5409 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
5411 tree access_fn = NULL;
5412 tree evolution_part;
5415 tree var, ni, ni_name;
5416 block_stmt_iterator last_bsi;
5418 if (vect_print_dump_info (REPORT_DETAILS))
5420 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
5421 print_generic_expr (vect_dump, phi, TDF_SLIM);
5424 /* Skip virtual phi's. */
5425 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5427 if (vect_print_dump_info (REPORT_DETAILS))
5428 fprintf (vect_dump, "virtual phi. skip.");
5432 /* Skip reduction phis. */
5433 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
5435 if (vect_print_dump_info (REPORT_DETAILS))
5436 fprintf (vect_dump, "reduc phi. skip.");
5440 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
5441 gcc_assert (access_fn);
5443 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
5444 gcc_assert (evolution_part != NULL_TREE);
5446 /* FORNOW: We do not support IVs whose evolution function is a polynomial
5447 of degree >= 2 or exponential. */
5448 gcc_assert (!tree_is_chrec (evolution_part));
5450 step_expr = evolution_part;
5451 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
5454 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
5455 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
5457 fold_convert (sizetype,
5458 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
5459 niters, step_expr)));
5461 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
5462 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
5463 fold_convert (TREE_TYPE (init_expr),
5470 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
5471 add_referenced_var (var);
5473 last_bsi = bsi_last (exit_bb);
5474 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
5475 true, BSI_SAME_STMT);
5477 /* Fix phi expressions in the successor bb. */
5478 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
5483 /* Function vect_do_peeling_for_loop_bound
5485 Peel the last iterations of the loop represented by LOOP_VINFO.
5486 The peeled iterations form a new epilog loop. Given that the loop now
5487 iterates NITERS times, the new epilog loop iterates
5488 NITERS % VECTORIZATION_FACTOR times.
5490 The original loop will later be made to iterate
5491 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
5494 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
5496 tree ni_name, ratio_mult_vf_name;
5497 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5498 struct loop *new_loop;
5500 basic_block preheader;
5503 int min_scalar_loop_bound;
5504 int min_profitable_iters;
5506 if (vect_print_dump_info (REPORT_DETAILS))
5507 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
5509 initialize_original_copy_tables ();
5511 /* Generate the following variables on the preheader of original loop:
5513 ni_name = number of iteration the original loop executes
5514 ratio = ni_name / vf
5515 ratio_mult_vf_name = ratio * vf */
5516 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
5517 &ratio_mult_vf_name, ratio);
5519 loop_num = loop->num;
5521 /* Analyze cost to set threshhold for vectorized loop. */
5522 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
5523 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
5524 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5526 /* Use the cost model only if it is more conservative than user specified
5529 th = (unsigned) min_scalar_loop_bound;
5530 if (min_profitable_iters
5531 && (!min_scalar_loop_bound
5532 || min_profitable_iters > min_scalar_loop_bound))
5533 th = (unsigned) min_profitable_iters;
5535 if (min_profitable_iters
5536 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5537 && vect_print_dump_info (REPORT_DETAILS))
5538 fprintf (vect_dump, "vectorization may not be profitable.");
5540 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
5541 ratio_mult_vf_name, ni_name, false,
5543 gcc_assert (new_loop);
5544 gcc_assert (loop_num == loop->num);
5545 #ifdef ENABLE_CHECKING
5546 slpeel_verify_cfg_after_peeling (loop, new_loop);
5549 /* A guard that controls whether the new_loop is to be executed or skipped
5550 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
5551 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
5552 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
5553 is on the path where the LOOP IVs are used and need to be updated. */
5555 preheader = loop_preheader_edge (new_loop)->src;
5556 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
5557 update_e = EDGE_PRED (preheader, 0);
5559 update_e = EDGE_PRED (preheader, 1);
5561 /* Update IVs of original loop as if they were advanced
5562 by ratio_mult_vf_name steps. */
5563 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
5565 /* After peeling we have to reset scalar evolution analyzer. */
5568 free_original_copy_tables ();
5572 /* Function vect_gen_niters_for_prolog_loop
5574 Set the number of iterations for the loop represented by LOOP_VINFO
5575 to the minimum between LOOP_NITERS (the original iteration count of the loop)
5576 and the misalignment of DR - the data reference recorded in
5577 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
5578 this loop, the data reference DR will refer to an aligned location.
5580 The following computation is generated:
5582 If the misalignment of DR is known at compile time:
5583 addr_mis = int mis = DR_MISALIGNMENT (dr);
5584 Else, compute address misalignment in bytes:
5585 addr_mis = addr & (vectype_size - 1)
5587 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
5589 (elem_size = element type size; an element is the scalar element
5590 whose type is the inner type of the vectype)
5594 prolog_niters = min ( LOOP_NITERS ,
5595 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
5596 where group_size is the size of the interleaved group.
5598 The above formulas assume that VF == number of elements in the vector. This
5599 may not hold when there are multiple-types in the loop.
5600 In this case, for some data-references in the loop the VF does not represent
5601 the number of elements that fit in the vector. Therefore, instead of VF we
5602 use TYPE_VECTOR_SUBPARTS. */
5605 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
5607 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
5608 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5610 tree iters, iters_name;
5613 tree dr_stmt = DR_STMT (dr);
5614 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
5615 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5616 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
5617 tree niters_type = TREE_TYPE (loop_niters);
5619 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
5620 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
5622 if (DR_GROUP_FIRST_DR (stmt_info))
5624 /* For interleaved access element size must be multiplied by the size of
5625 the interleaved group. */
5626 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
5627 DR_GROUP_FIRST_DR (stmt_info)));
5628 element_size *= group_size;
5631 pe = loop_preheader_edge (loop);
5633 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
5635 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
5636 int elem_misalign = byte_misalign / element_size;
5638 if (vect_print_dump_info (REPORT_DETAILS))
5639 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
5640 iters = build_int_cst (niters_type,
5641 (nelements - elem_misalign)&(nelements/group_size-1));
5645 tree new_stmts = NULL_TREE;
5647 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
5648 tree ptr_type = TREE_TYPE (start_addr);
5649 tree size = TYPE_SIZE (ptr_type);
5650 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
5651 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
5652 tree elem_size_log =
5653 build_int_cst (type, exact_log2 (vectype_align/nelements));
5654 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
5655 tree nelements_tree = build_int_cst (type, nelements);
5659 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
5660 gcc_assert (!new_bb);
5662 /* Create: byte_misalign = addr & (vectype_size - 1) */
5664 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
5666 /* Create: elem_misalign = byte_misalign / element_size */
5668 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
5670 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
5671 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
5672 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
5673 iters = fold_convert (niters_type, iters);
5676 /* Create: prolog_loop_niters = min (iters, loop_niters) */
5677 /* If the loop bound is known at compile time we already verified that it is
5678 greater than vf; since the misalignment ('iters') is at most vf, there's
5679 no need to generate the MIN_EXPR in this case. */
5680 if (TREE_CODE (loop_niters) != INTEGER_CST)
5681 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
5683 if (vect_print_dump_info (REPORT_DETAILS))
5685 fprintf (vect_dump, "niters for prolog loop: ");
5686 print_generic_expr (vect_dump, iters, TDF_SLIM);
5689 var = create_tmp_var (niters_type, "prolog_loop_niters");
5690 add_referenced_var (var);
5691 iters_name = force_gimple_operand (iters, &stmt, false, var);
5693 /* Insert stmt on loop preheader edge. */
5696 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5697 gcc_assert (!new_bb);
5704 /* Function vect_update_init_of_dr
5706 NITERS iterations were peeled from LOOP. DR represents a data reference
5707 in LOOP. This function updates the information recorded in DR to
5708 account for the fact that the first NITERS iterations had already been
5709 executed. Specifically, it updates the OFFSET field of DR. */
5712 vect_update_init_of_dr (struct data_reference *dr, tree niters)
5714 tree offset = DR_OFFSET (dr);
5716 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
5717 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
5718 DR_OFFSET (dr) = offset;
5722 /* Function vect_update_inits_of_drs
5724 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
5725 This function updates the information recorded for the data references in
5726 the loop to account for the fact that the first NITERS iterations had
5727 already been executed. Specifically, it updates the initial_condition of
5728 the access_function of all the data_references in the loop. */
5731 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
5734 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
5735 struct data_reference *dr;
5737 if (vect_print_dump_info (REPORT_DETAILS))
5738 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
5740 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
5741 vect_update_init_of_dr (dr, niters);
5745 /* Function vect_do_peeling_for_alignment
5747 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
5748 'niters' is set to the misalignment of one of the data references in the
5749 loop, thereby forcing it to refer to an aligned location at the beginning
5750 of the execution of this loop. The data reference for which we are
5751 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
5754 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
5756 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5757 tree niters_of_prolog_loop, ni_name;
5759 struct loop *new_loop;
5761 if (vect_print_dump_info (REPORT_DETAILS))
5762 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
5764 initialize_original_copy_tables ();
5766 ni_name = vect_build_loop_niters (loop_vinfo);
5767 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
5769 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
5771 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
5772 niters_of_prolog_loop, ni_name, true, 0);
5773 gcc_assert (new_loop);
5774 #ifdef ENABLE_CHECKING
5775 slpeel_verify_cfg_after_peeling (new_loop, loop);
5778 /* Update number of times loop executes. */
5779 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
5780 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
5781 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
5783 /* Update the init conditions of the access functions of all data refs. */
5784 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
5786 /* After peeling we have to reset scalar evolution analyzer. */
5789 free_original_copy_tables ();
5793 /* Function vect_create_cond_for_align_checks.
5795 Create a conditional expression that represents the alignment checks for
5796 all of data references (array element references) whose alignment must be
5800 LOOP_VINFO - two fields of the loop information are used.
5801 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
5802 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
5805 COND_EXPR_STMT_LIST - statements needed to construct the conditional
5807 The returned value is the conditional expression to be used in the if
5808 statement that controls which version of the loop gets executed at runtime.
5810 The algorithm makes two assumptions:
5811 1) The number of bytes "n" in a vector is a power of 2.
5812 2) An address "a" is aligned if a%n is zero and that this
5813 test can be done as a&(n-1) == 0. For example, for 16
5814 byte vectors the test is a&0xf == 0. */
5817 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
5818 tree *cond_expr_stmt_list)
5820 VEC(tree,heap) *may_misalign_stmts
5821 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
5823 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
5827 tree int_ptrsize_type;
5829 tree or_tmp_name = NULL_TREE;
5830 tree and_tmp, and_tmp_name, and_stmt;
5833 /* Check that mask is one less than a power of 2, i.e., mask is
5834 all zeros followed by all ones. */
5835 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
5837 /* CHECKME: what is the best integer or unsigned type to use to hold a
5838 cast from a pointer value? */
5839 psize = TYPE_SIZE (ptr_type_node);
5841 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
5843 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
5844 of the first vector of the i'th data reference. */
5846 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
5848 tree new_stmt_list = NULL_TREE;
5850 tree addr_tmp, addr_tmp_name, addr_stmt;
5851 tree or_tmp, new_or_tmp_name, or_stmt;
5853 /* create: addr_tmp = (int)(address_of_first_vector) */
5854 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
5858 if (new_stmt_list != NULL_TREE)
5859 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
5861 sprintf (tmp_name, "%s%d", "addr2int", i);
5862 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5863 add_referenced_var (addr_tmp);
5864 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
5865 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
5866 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
5867 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
5868 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
5870 /* The addresses are OR together. */
5872 if (or_tmp_name != NULL_TREE)
5874 /* create: or_tmp = or_tmp | addr_tmp */
5875 sprintf (tmp_name, "%s%d", "orptrs", i);
5876 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5877 add_referenced_var (or_tmp);
5878 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
5879 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
5880 or_tmp_name, addr_tmp_name);
5881 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
5882 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
5883 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
5884 or_tmp_name = new_or_tmp_name;
5887 or_tmp_name = addr_tmp_name;
5891 mask_cst = build_int_cst (int_ptrsize_type, mask);
5893 /* create: and_tmp = or_tmp & mask */
5894 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
5895 add_referenced_var (and_tmp);
5896 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
5898 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
5899 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
5900 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
5901 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
5903 /* Make and_tmp the left operand of the conditional test against zero.
5904 if and_tmp has a nonzero bit then some address is unaligned. */
5905 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
5906 return build2 (EQ_EXPR, boolean_type_node,
5907 and_tmp_name, ptrsize_zero);
5911 /* Function vect_transform_loop.
5913 The analysis phase has determined that the loop is vectorizable.
5914 Vectorize the loop - created vectorized stmts to replace the scalar
5915 stmts in the loop, and update the loop exit condition. */
5918 vect_transform_loop (loop_vec_info loop_vinfo)
5920 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5921 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5922 int nbbs = loop->num_nodes;
5923 block_stmt_iterator si, next_si;
5926 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5929 if (vect_print_dump_info (REPORT_DETAILS))
5930 fprintf (vect_dump, "=== vec_transform_loop ===");
5932 /* If the loop has data references that may or may not be aligned then
5933 two versions of the loop need to be generated, one which is vectorized
5934 and one which isn't. A test is then generated to control which of the
5935 loops is executed. The test checks for the alignment of all of the
5936 data references that may or may not be aligned. */
5938 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
5942 tree cond_expr_stmt_list = NULL_TREE;
5943 basic_block condition_bb;
5944 block_stmt_iterator cond_exp_bsi;
5945 basic_block merge_bb;
5946 basic_block new_exit_bb;
5948 tree orig_phi, new_phi, arg;
5949 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
5951 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
5952 &cond_expr_stmt_list);
5953 initialize_original_copy_tables ();
5954 nloop = loop_version (loop, cond_expr, &condition_bb,
5955 prob, prob, REG_BR_PROB_BASE - prob, true);
5956 free_original_copy_tables();
5958 /** Loop versioning violates an assumption we try to maintain during
5959 vectorization - that the loop exit block has a single predecessor.
5960 After versioning, the exit block of both loop versions is the same
5961 basic block (i.e. it has two predecessors). Just in order to simplify
5962 following transformations in the vectorizer, we fix this situation
5963 here by adding a new (empty) block on the exit-edge of the loop,
5964 with the proper loop-exit phis to maintain loop-closed-form. **/
5966 merge_bb = single_exit (loop)->dest;
5967 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
5968 new_exit_bb = split_edge (single_exit (loop));
5969 new_exit_e = single_exit (loop);
5970 e = EDGE_SUCC (new_exit_bb, 0);
5972 for (orig_phi = phi_nodes (merge_bb); orig_phi;
5973 orig_phi = PHI_CHAIN (orig_phi))
5975 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
5977 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
5978 add_phi_arg (new_phi, arg, new_exit_e);
5979 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
5982 /** end loop-exit-fixes after versioning **/
5984 update_ssa (TODO_update_ssa);
5985 cond_exp_bsi = bsi_last (condition_bb);
5986 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
5989 /* CHECKME: we wouldn't need this if we called update_ssa once
5991 bitmap_zero (vect_memsyms_to_rename);
5993 /* Peel the loop if there are data refs with unknown alignment.
5994 Only one data ref with unknown store is allowed. */
5996 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5997 vect_do_peeling_for_alignment (loop_vinfo);
5999 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6000 compile time constant), or it is a constant that doesn't divide by the
6001 vectorization factor, then an epilog loop needs to be created.
6002 We therefore duplicate the loop: the original loop will be vectorized,
6003 and will compute the first (n/VF) iterations. The second copy of the loop
6004 will remain scalar and will compute the remaining (n%VF) iterations.
6005 (VF is the vectorization factor). */
6007 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
6008 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
6009 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
6010 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
6012 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6013 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6015 /* 1) Make sure the loop header has exactly two entries
6016 2) Make sure we have a preheader basic block. */
6018 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6020 split_edge (loop_preheader_edge (loop));
6022 /* FORNOW: the vectorizer supports only loops which body consist
6023 of one basic block (header + empty latch). When the vectorizer will
6024 support more involved loop forms, the order by which the BBs are
6025 traversed need to be reconsidered. */
6027 for (i = 0; i < nbbs; i++)
6029 basic_block bb = bbs[i];
6030 stmt_vec_info stmt_info;
6033 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
6035 if (vect_print_dump_info (REPORT_DETAILS))
6037 fprintf (vect_dump, "------>vectorizing phi: ");
6038 print_generic_expr (vect_dump, phi, TDF_SLIM);
6040 stmt_info = vinfo_for_stmt (phi);
6043 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6044 && !STMT_VINFO_LIVE_P (stmt_info))
6047 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6048 != (unsigned HOST_WIDE_INT) vectorization_factor)
6049 && vect_print_dump_info (REPORT_DETAILS))
6050 fprintf (vect_dump, "multiple-types.");
6052 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6054 if (vect_print_dump_info (REPORT_DETAILS))
6055 fprintf (vect_dump, "transform phi.");
6056 vect_transform_stmt (phi, NULL, NULL);
6060 for (si = bsi_start (bb); !bsi_end_p (si);)
6062 tree stmt = bsi_stmt (si);
6065 if (vect_print_dump_info (REPORT_DETAILS))
6067 fprintf (vect_dump, "------>vectorizing statement: ");
6068 print_generic_expr (vect_dump, stmt, TDF_SLIM);
6070 stmt_info = vinfo_for_stmt (stmt);
6071 gcc_assert (stmt_info);
6072 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6073 && !STMT_VINFO_LIVE_P (stmt_info))
6079 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
6080 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6081 != (unsigned HOST_WIDE_INT) vectorization_factor)
6082 && vect_print_dump_info (REPORT_DETAILS))
6083 fprintf (vect_dump, "multiple-types.");
6085 /* -------- vectorize statement ------------ */
6086 if (vect_print_dump_info (REPORT_DETAILS))
6087 fprintf (vect_dump, "transform statement.");
6089 strided_store = false;
6090 is_store = vect_transform_stmt (stmt, &si, &strided_store);
6094 if (DR_GROUP_FIRST_DR (stmt_info))
6096 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6097 interleaving chain was completed - free all the stores in
6099 tree next = DR_GROUP_FIRST_DR (stmt_info);
6101 stmt_vec_info next_stmt_info;
6105 next_si = bsi_for_stmt (next);
6106 next_stmt_info = vinfo_for_stmt (next);
6107 /* Free the attached stmt_vec_info and remove the stmt. */
6108 ann = stmt_ann (next);
6109 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
6110 free (next_stmt_info);
6111 set_stmt_info (ann, NULL);
6112 bsi_remove (&next_si, true);
6115 bsi_remove (&si, true);
6120 /* Free the attached stmt_vec_info and remove the stmt. */
6121 ann = stmt_ann (stmt);
6123 set_stmt_info (ann, NULL);
6124 bsi_remove (&si, true);
6132 slpeel_make_loop_iterate_ntimes (loop, ratio);
6134 mark_set_for_renaming (vect_memsyms_to_rename);
6136 /* The memory tags and pointers in vectorized statements need to
6137 have their SSA forms updated. FIXME, why can't this be delayed
6138 until all the loops have been transformed? */
6139 update_ssa (TODO_update_ssa);
6141 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
6142 fprintf (vect_dump, "LOOP VECTORIZED.");