1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
46 /* Function vect_mark_relevant.
48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 enum vect_relevant relevant, bool live_p)
54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
58 if (vect_print_dump_info (REPORT_DETAILS))
59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
61 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
65 /* This is the last stmt in a sequence that was detected as a
66 pattern that can potentially be vectorized. Don't mark the stmt
67 as relevant/live because it's not going to be vectorized.
68 Instead mark the pattern-stmt that replaces it. */
70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
72 if (vect_print_dump_info (REPORT_DETAILS))
73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74 stmt_info = vinfo_for_stmt (pattern_stmt);
75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
81 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83 STMT_VINFO_RELEVANT (stmt_info) = relevant;
85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
88 if (vect_print_dump_info (REPORT_DETAILS))
89 fprintf (vect_dump, "already marked relevant/live.");
93 VEC_safe_push (gimple, heap, *worklist, stmt);
97 /* Function vect_stmt_relevant_p.
99 Return true if STMT in loop that is represented by LOOP_VINFO is
100 "relevant for vectorization".
102 A stmt is considered "relevant for vectorization" if:
103 - it has uses outside the loop.
104 - it has vdefs (it alters memory).
105 - control stmts in the loop (except for the exit condition).
107 CHECKME: what other side effects would the vectorizer allow? */
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 enum vect_relevant *relevant, bool *live_p)
113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
115 imm_use_iterator imm_iter;
119 *relevant = vect_unused_in_scope;
122 /* cond stmt other than loop exit cond. */
123 if (is_ctrl_stmt (stmt)
124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
125 != loop_exit_ctrl_vec_info_type)
126 *relevant = vect_used_in_scope;
128 /* changing memory. */
129 if (gimple_code (stmt) != GIMPLE_PHI)
130 if (gimple_vdef (stmt))
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
134 *relevant = vect_used_in_scope;
137 /* uses outside the loop. */
138 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
140 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
142 basic_block bb = gimple_bb (USE_STMT (use_p));
143 if (!flow_bb_inside_loop_p (loop, bb))
145 if (vect_print_dump_info (REPORT_DETAILS))
146 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
148 /* We expect all such uses to be in the loop exit phis
149 (because of loop closed form) */
150 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
151 gcc_assert (bb == single_exit (loop)->dest);
158 return (*live_p || *relevant);
162 /* Function exist_non_indexing_operands_for_use_p
164 USE is one of the uses attached to STMT. Check if USE is
165 used in STMT for anything other than indexing an array. */
168 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
173 /* USE corresponds to some operand in STMT. If there is no data
174 reference in STMT, then any operand that corresponds to USE
175 is not indexing an array. */
176 if (!STMT_VINFO_DATA_REF (stmt_info))
179 /* STMT has a data_ref. FORNOW this means that its of one of
183 (This should have been verified in analyze_data_refs).
185 'var' in the second case corresponds to a def, not a use,
186 so USE cannot correspond to any operands that are not used
189 Therefore, all we need to check is if STMT falls into the
190 first case, and whether var corresponds to USE. */
192 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
195 if (!gimple_assign_copy_p (stmt))
197 operand = gimple_assign_rhs1 (stmt);
199 if (TREE_CODE (operand) != SSA_NAME)
210 Function process_use.
213 - a USE in STMT in a loop represented by LOOP_VINFO
214 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
215 that defined USE. This is done by calling mark_relevant and passing it
216 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
219 Generally, LIVE_P and RELEVANT are used to define the liveness and
220 relevance info of the DEF_STMT of this USE:
221 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
222 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
224 - case 1: If USE is used only for address computations (e.g. array indexing),
225 which does not need to be directly vectorized, then the liveness/relevance
226 of the respective DEF_STMT is left unchanged.
227 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
228 skip DEF_STMT cause it had already been processed.
229 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
230 be modified accordingly.
232 Return true if everything is as expected. Return false otherwise. */
235 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
236 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
239 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
240 stmt_vec_info dstmt_vinfo;
241 basic_block bb, def_bb;
244 enum vect_def_type dt;
246 /* case 1: we are only interested in uses that need to be vectorized. Uses
247 that are used for address computation are not considered relevant. */
248 if (!exist_non_indexing_operands_for_use_p (use, stmt))
251 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
254 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
258 if (!def_stmt || gimple_nop_p (def_stmt))
261 def_bb = gimple_bb (def_stmt);
262 if (!flow_bb_inside_loop_p (loop, def_bb))
264 if (vect_print_dump_info (REPORT_DETAILS))
265 fprintf (vect_dump, "def_stmt is out of loop.");
269 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
270 DEF_STMT must have already been processed, because this should be the
271 only way that STMT, which is a reduction-phi, was put in the worklist,
272 as there should be no other uses for DEF_STMT in the loop. So we just
273 check that everything is as expected, and we are done. */
274 dstmt_vinfo = vinfo_for_stmt (def_stmt);
275 bb = gimple_bb (stmt);
276 if (gimple_code (stmt) == GIMPLE_PHI
277 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
278 && gimple_code (def_stmt) != GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
280 && bb->loop_father == def_bb->loop_father)
282 if (vect_print_dump_info (REPORT_DETAILS))
283 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
284 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
285 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
286 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
287 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
288 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
292 /* case 3a: outer-loop stmt defining an inner-loop stmt:
293 outer-loop-header-bb:
299 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
301 if (vect_print_dump_info (REPORT_DETAILS))
302 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
305 case vect_unused_in_scope:
306 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
307 vect_used_by_reduction : vect_unused_in_scope;
309 case vect_used_in_outer_by_reduction:
310 relevant = vect_used_by_reduction;
312 case vect_used_in_outer:
313 relevant = vect_used_in_scope;
315 case vect_used_by_reduction:
316 case vect_used_in_scope:
324 /* case 3b: inner-loop stmt defining an outer-loop stmt:
325 outer-loop-header-bb:
331 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
333 if (vect_print_dump_info (REPORT_DETAILS))
334 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
337 case vect_unused_in_scope:
338 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
339 vect_used_in_outer_by_reduction : vect_unused_in_scope;
342 case vect_used_in_outer_by_reduction:
343 case vect_used_in_outer:
346 case vect_used_by_reduction:
347 relevant = vect_used_in_outer_by_reduction;
350 case vect_used_in_scope:
351 relevant = vect_used_in_outer;
359 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
364 /* Function vect_mark_stmts_to_be_vectorized.
366 Not all stmts in the loop need to be vectorized. For example:
375 Stmt 1 and 3 do not need to be vectorized, because loop control and
376 addressing of vectorized data-refs are handled differently.
378 This pass detects such stmts. */
381 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
383 VEC(gimple,heap) *worklist;
384 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
385 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
386 unsigned int nbbs = loop->num_nodes;
387 gimple_stmt_iterator si;
390 stmt_vec_info stmt_vinfo;
394 enum vect_relevant relevant;
396 if (vect_print_dump_info (REPORT_DETAILS))
397 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
399 worklist = VEC_alloc (gimple, heap, 64);
401 /* 1. Init worklist. */
402 for (i = 0; i < nbbs; i++)
405 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
408 if (vect_print_dump_info (REPORT_DETAILS))
410 fprintf (vect_dump, "init: phi relevant? ");
411 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
414 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
415 vect_mark_relevant (&worklist, phi, relevant, live_p);
417 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
419 stmt = gsi_stmt (si);
420 if (vect_print_dump_info (REPORT_DETAILS))
422 fprintf (vect_dump, "init: stmt relevant? ");
423 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
426 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
427 vect_mark_relevant (&worklist, stmt, relevant, live_p);
431 /* 2. Process_worklist */
432 while (VEC_length (gimple, worklist) > 0)
437 stmt = VEC_pop (gimple, worklist);
438 if (vect_print_dump_info (REPORT_DETAILS))
440 fprintf (vect_dump, "worklist: examine stmt: ");
441 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
444 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
445 (DEF_STMT) as relevant/irrelevant and live/dead according to the
446 liveness and relevance properties of STMT. */
447 stmt_vinfo = vinfo_for_stmt (stmt);
448 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
449 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
451 /* Generally, the liveness and relevance properties of STMT are
452 propagated as is to the DEF_STMTs of its USEs:
453 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
454 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
456 One exception is when STMT has been identified as defining a reduction
457 variable; in this case we set the liveness/relevance as follows:
459 relevant = vect_used_by_reduction
460 This is because we distinguish between two kinds of relevant stmts -
461 those that are used by a reduction computation, and those that are
462 (also) used by a regular computation. This allows us later on to
463 identify stmts that are used solely by a reduction, and therefore the
464 order of the results that they produce does not have to be kept.
466 Reduction phis are expected to be used by a reduction stmt, or by
467 in an outer loop; Other reduction stmts are expected to be
468 in the loop, and possibly used by a stmt in an outer loop.
469 Here are the expected values of "relevant" for reduction phis/stmts:
472 vect_unused_in_scope ok
473 vect_used_in_outer_by_reduction ok ok
474 vect_used_in_outer ok ok
475 vect_used_by_reduction ok
476 vect_used_in_scope */
478 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
480 enum vect_relevant tmp_relevant = relevant;
481 switch (tmp_relevant)
483 case vect_unused_in_scope:
484 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
485 relevant = vect_used_by_reduction;
488 case vect_used_in_outer_by_reduction:
489 case vect_used_in_outer:
490 gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
491 || (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
492 && (gimple_assign_rhs_code (stmt)
496 case vect_used_by_reduction:
497 if (gimple_code (stmt) == GIMPLE_PHI)
500 case vect_used_in_scope:
502 if (vect_print_dump_info (REPORT_DETAILS))
503 fprintf (vect_dump, "unsupported use of reduction.");
504 VEC_free (gimple, heap, worklist);
510 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
512 tree op = USE_FROM_PTR (use_p);
513 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
515 VEC_free (gimple, heap, worklist);
519 } /* while worklist */
521 VEC_free (gimple, heap, worklist);
527 cost_for_stmt (gimple stmt)
529 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
531 switch (STMT_VINFO_TYPE (stmt_info))
533 case load_vec_info_type:
534 return TARG_SCALAR_LOAD_COST;
535 case store_vec_info_type:
536 return TARG_SCALAR_STORE_COST;
537 case op_vec_info_type:
538 case condition_vec_info_type:
539 case assignment_vec_info_type:
540 case reduc_vec_info_type:
541 case induc_vec_info_type:
542 case type_promotion_vec_info_type:
543 case type_demotion_vec_info_type:
544 case type_conversion_vec_info_type:
545 case call_vec_info_type:
546 return TARG_SCALAR_STMT_COST;
547 case undef_vec_info_type:
553 /* Function vect_model_simple_cost.
555 Models cost for simple operations, i.e. those that only emit ncopies of a
556 single op. Right now, this does not account for multiple insns that could
557 be generated for the single vector op. We will handle that shortly. */
560 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
561 enum vect_def_type *dt, slp_tree slp_node)
564 int inside_cost = 0, outside_cost = 0;
566 /* The SLP costs were already calculated during SLP tree build. */
567 if (PURE_SLP_STMT (stmt_info))
570 inside_cost = ncopies * TARG_VEC_STMT_COST;
572 /* FORNOW: Assuming maximum 2 args per stmts. */
573 for (i = 0; i < 2; i++)
575 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
576 outside_cost += TARG_SCALAR_TO_VEC_COST;
579 if (vect_print_dump_info (REPORT_COST))
580 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
581 "outside_cost = %d .", inside_cost, outside_cost);
583 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
584 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
585 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
589 /* Function vect_cost_strided_group_size
591 For strided load or store, return the group_size only if it is the first
592 load or store of a group, else return 1. This ensures that group size is
593 only returned once per group. */
596 vect_cost_strided_group_size (stmt_vec_info stmt_info)
598 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
600 if (first_stmt == STMT_VINFO_STMT (stmt_info))
601 return DR_GROUP_SIZE (stmt_info);
607 /* Function vect_model_store_cost
609 Models cost for stores. In the case of strided accesses, one access
610 has the overhead of the strided access attributed to it. */
613 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
614 enum vect_def_type dt, slp_tree slp_node)
617 int inside_cost = 0, outside_cost = 0;
619 /* The SLP costs were already calculated during SLP tree build. */
620 if (PURE_SLP_STMT (stmt_info))
623 if (dt == vect_constant_def || dt == vect_external_def)
624 outside_cost = TARG_SCALAR_TO_VEC_COST;
626 /* Strided access? */
627 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
628 group_size = vect_cost_strided_group_size (stmt_info);
629 /* Not a strided access. */
633 /* Is this an access in a group of stores, which provide strided access?
634 If so, add in the cost of the permutes. */
637 /* Uses a high and low interleave operation for each needed permute. */
638 inside_cost = ncopies * exact_log2(group_size) * group_size
639 * TARG_VEC_STMT_COST;
641 if (vect_print_dump_info (REPORT_COST))
642 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
647 /* Costs of the stores. */
648 inside_cost += ncopies * TARG_VEC_STORE_COST;
650 if (vect_print_dump_info (REPORT_COST))
651 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
652 "outside_cost = %d .", inside_cost, outside_cost);
654 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
655 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
656 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
660 /* Function vect_model_load_cost
662 Models cost for loads. In the case of strided accesses, the last access
663 has the overhead of the strided access attributed to it. Since unaligned
664 accesses are supported for loads, we also account for the costs of the
665 access scheme chosen. */
668 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
672 int alignment_support_cheme;
674 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
675 int inside_cost = 0, outside_cost = 0;
677 /* The SLP costs were already calculated during SLP tree build. */
678 if (PURE_SLP_STMT (stmt_info))
681 /* Strided accesses? */
682 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
683 if (first_stmt && !slp_node)
685 group_size = vect_cost_strided_group_size (stmt_info);
686 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
688 /* Not a strided access. */
695 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
697 /* Is this an access in a group of loads providing strided access?
698 If so, add in the cost of the permutes. */
701 /* Uses an even and odd extract operations for each needed permute. */
702 inside_cost = ncopies * exact_log2(group_size) * group_size
703 * TARG_VEC_STMT_COST;
705 if (vect_print_dump_info (REPORT_COST))
706 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
711 /* The loads themselves. */
712 switch (alignment_support_cheme)
716 inside_cost += ncopies * TARG_VEC_LOAD_COST;
718 if (vect_print_dump_info (REPORT_COST))
719 fprintf (vect_dump, "vect_model_load_cost: aligned.");
723 case dr_unaligned_supported:
725 /* Here, we assign an additional cost for the unaligned load. */
726 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
728 if (vect_print_dump_info (REPORT_COST))
729 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
734 case dr_explicit_realign:
736 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
738 /* FIXME: If the misalignment remains fixed across the iterations of
739 the containing loop, the following cost should be added to the
741 if (targetm.vectorize.builtin_mask_for_load)
742 inside_cost += TARG_VEC_STMT_COST;
746 case dr_explicit_realign_optimized:
748 if (vect_print_dump_info (REPORT_COST))
749 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
752 /* Unaligned software pipeline has a load of an address, an initial
753 load, and possibly a mask operation to "prime" the loop. However,
754 if this is an access in a group of loads, which provide strided
755 access, then the above cost should only be considered for one
756 access in the group. Inside the loop, there is a load op
757 and a realignment op. */
759 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
761 outside_cost = 2*TARG_VEC_STMT_COST;
762 if (targetm.vectorize.builtin_mask_for_load)
763 outside_cost += TARG_VEC_STMT_COST;
766 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
775 if (vect_print_dump_info (REPORT_COST))
776 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
777 "outside_cost = %d .", inside_cost, outside_cost);
779 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
780 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
781 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
785 /* Function vect_init_vector.
787 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
788 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
789 is not NULL. Otherwise, place the initialization at the loop preheader.
790 Return the DEF of INIT_STMT.
791 It will be used in the vectorization of STMT. */
794 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
795 gimple_stmt_iterator *gsi)
797 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
805 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
806 add_referenced_var (new_var);
807 init_stmt = gimple_build_assign (new_var, vector_var);
808 new_temp = make_ssa_name (new_var, init_stmt);
809 gimple_assign_set_lhs (init_stmt, new_temp);
812 vect_finish_stmt_generation (stmt, init_stmt, gsi);
815 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
816 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
818 if (nested_in_vect_loop_p (loop, stmt))
820 pe = loop_preheader_edge (loop);
821 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
822 gcc_assert (!new_bb);
825 if (vect_print_dump_info (REPORT_DETAILS))
827 fprintf (vect_dump, "created new init_stmt: ");
828 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
831 vec_oprnd = gimple_assign_lhs (init_stmt);
835 /* Function vect_get_vec_def_for_operand.
837 OP is an operand in STMT. This function returns a (vector) def that will be
838 used in the vectorized stmt for STMT.
840 In the case that OP is an SSA_NAME which is defined in the loop, then
841 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
843 In case OP is an invariant or constant, a new stmt that creates a vector def
844 needs to be introduced. */
847 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
852 stmt_vec_info def_stmt_info = NULL;
853 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
854 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
855 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
862 enum vect_def_type dt;
866 if (vect_print_dump_info (REPORT_DETAILS))
868 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
869 print_generic_expr (vect_dump, op, TDF_SLIM);
872 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
873 gcc_assert (is_simple_use);
874 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "def = ");
879 print_generic_expr (vect_dump, def, TDF_SLIM);
883 fprintf (vect_dump, " def_stmt = ");
884 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
890 /* Case 1: operand is a constant. */
891 case vect_constant_def:
893 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
894 gcc_assert (vector_type);
899 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
900 if (vect_print_dump_info (REPORT_DETAILS))
901 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
903 for (i = nunits - 1; i >= 0; --i)
905 t = tree_cons (NULL_TREE, op, t);
907 vec_cst = build_vector (vector_type, t);
908 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
911 /* Case 2: operand is defined outside the loop - loop invariant. */
912 case vect_external_def:
914 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
915 gcc_assert (vector_type);
916 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
921 /* Create 'vec_inv = {inv,inv,..,inv}' */
922 if (vect_print_dump_info (REPORT_DETAILS))
923 fprintf (vect_dump, "Create vector_inv.");
925 for (i = nunits - 1; i >= 0; --i)
927 t = tree_cons (NULL_TREE, def, t);
930 /* FIXME: use build_constructor directly. */
931 vec_inv = build_constructor_from_list (vector_type, t);
932 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
935 /* Case 3: operand is defined inside the loop. */
936 case vect_internal_def:
939 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
941 /* Get the def from the vectorized stmt. */
942 def_stmt_info = vinfo_for_stmt (def_stmt);
943 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
944 gcc_assert (vec_stmt);
945 if (gimple_code (vec_stmt) == GIMPLE_PHI)
946 vec_oprnd = PHI_RESULT (vec_stmt);
947 else if (is_gimple_call (vec_stmt))
948 vec_oprnd = gimple_call_lhs (vec_stmt);
950 vec_oprnd = gimple_assign_lhs (vec_stmt);
954 /* Case 4: operand is defined by a loop header phi - reduction */
955 case vect_reduction_def:
959 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
960 loop = (gimple_bb (def_stmt))->loop_father;
962 /* Get the def before the loop */
963 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
964 return get_initial_def_for_reduction (stmt, op, scalar_def);
967 /* Case 5: operand is defined by loop-header phi - induction. */
968 case vect_induction_def:
970 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
972 /* Get the def from the vectorized stmt. */
973 def_stmt_info = vinfo_for_stmt (def_stmt);
974 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
975 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
976 vec_oprnd = PHI_RESULT (vec_stmt);
986 /* Function vect_get_vec_def_for_stmt_copy
988 Return a vector-def for an operand. This function is used when the
989 vectorized stmt to be created (by the caller to this function) is a "copy"
990 created in case the vectorized result cannot fit in one vector, and several
991 copies of the vector-stmt are required. In this case the vector-def is
992 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
993 of the stmt that defines VEC_OPRND.
994 DT is the type of the vector def VEC_OPRND.
997 In case the vectorization factor (VF) is bigger than the number
998 of elements that can fit in a vectype (nunits), we have to generate
999 more than one vector stmt to vectorize the scalar stmt. This situation
1000 arises when there are multiple data-types operated upon in the loop; the
1001 smallest data-type determines the VF, and as a result, when vectorizing
1002 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1003 vector stmt (each computing a vector of 'nunits' results, and together
1004 computing 'VF' results in each iteration). This function is called when
1005 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1006 which VF=16 and nunits=4, so the number of copies required is 4):
1008 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1010 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1011 VS1.1: vx.1 = memref1 VS1.2
1012 VS1.2: vx.2 = memref2 VS1.3
1013 VS1.3: vx.3 = memref3
1015 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1016 VSnew.1: vz1 = vx.1 + ... VSnew.2
1017 VSnew.2: vz2 = vx.2 + ... VSnew.3
1018 VSnew.3: vz3 = vx.3 + ...
1020 The vectorization of S1 is explained in vectorizable_load.
1021 The vectorization of S2:
1022 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1023 the function 'vect_get_vec_def_for_operand' is called to
1024 get the relevant vector-def for each operand of S2. For operand x it
1025 returns the vector-def 'vx.0'.
1027 To create the remaining copies of the vector-stmt (VSnew.j), this
1028 function is called to get the relevant vector-def for each operand. It is
1029 obtained from the respective VS1.j stmt, which is recorded in the
1030 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1032 For example, to obtain the vector-def 'vx.1' in order to create the
1033 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1034 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1035 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1036 and return its def ('vx.1').
1037 Overall, to create the above sequence this function will be called 3 times:
1038 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1039 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1040 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1043 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1045 gimple vec_stmt_for_operand;
1046 stmt_vec_info def_stmt_info;
1048 /* Do nothing; can reuse same def. */
1049 if (dt == vect_external_def || dt == vect_constant_def )
1052 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1053 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1054 gcc_assert (def_stmt_info);
1055 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1056 gcc_assert (vec_stmt_for_operand);
1057 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1058 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1059 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1061 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1066 /* Get vectorized definitions for the operands to create a copy of an original
1067 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1070 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1071 VEC(tree,heap) **vec_oprnds0,
1072 VEC(tree,heap) **vec_oprnds1)
1074 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1076 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1077 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1079 if (vec_oprnds1 && *vec_oprnds1)
1081 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1082 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1083 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1088 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1091 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1092 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1096 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1101 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1102 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1103 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1107 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1108 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1109 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1115 /* Function vect_finish_stmt_generation.
1117 Insert a new stmt. */
1120 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1121 gimple_stmt_iterator *gsi)
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1124 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1126 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1128 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1130 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo));
1132 if (vect_print_dump_info (REPORT_DETAILS))
1134 fprintf (vect_dump, "add new stmt: ");
1135 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1138 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1141 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1142 a function declaration if the target has a vectorized version
1143 of the function, or NULL_TREE if the function cannot be vectorized. */
1146 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1148 tree fndecl = gimple_call_fndecl (call);
1149 enum built_in_function code;
1151 /* We only handle functions that do not read or clobber memory -- i.e.
1152 const or novops ones. */
1153 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1157 || TREE_CODE (fndecl) != FUNCTION_DECL
1158 || !DECL_BUILT_IN (fndecl))
1161 code = DECL_FUNCTION_CODE (fndecl);
1162 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1166 /* Function vectorizable_call.
1168 Check if STMT performs a function call that can be vectorized.
1169 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1170 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1171 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1174 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1179 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1181 tree vectype_out, vectype_in;
1184 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1185 tree fndecl, new_temp, def, rhs_type, lhs_type;
1187 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1190 VEC(tree, heap) *vargs = NULL;
1191 enum { NARROW, NONE, WIDEN } modifier;
1194 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1197 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1200 /* FORNOW: SLP not supported. */
1201 if (STMT_SLP_TYPE (stmt_info))
1204 /* Is STMT a vectorizable call? */
1205 if (!is_gimple_call (stmt))
1208 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1211 /* Process function arguments. */
1212 rhs_type = NULL_TREE;
1213 nargs = gimple_call_num_args (stmt);
1215 /* Bail out if the function has more than two arguments, we
1216 do not have interesting builtin functions to vectorize with
1217 more than two arguments. No arguments is also not good. */
1218 if (nargs == 0 || nargs > 2)
1221 for (i = 0; i < nargs; i++)
1223 op = gimple_call_arg (stmt, i);
1225 /* We can only handle calls with arguments of the same type. */
1227 && rhs_type != TREE_TYPE (op))
1229 if (vect_print_dump_info (REPORT_DETAILS))
1230 fprintf (vect_dump, "argument types differ.");
1233 rhs_type = TREE_TYPE (op);
1235 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[i]))
1237 if (vect_print_dump_info (REPORT_DETAILS))
1238 fprintf (vect_dump, "use not simple.");
1243 vectype_in = get_vectype_for_scalar_type (rhs_type);
1246 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1248 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1249 vectype_out = get_vectype_for_scalar_type (lhs_type);
1252 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1255 if (nunits_in == nunits_out / 2)
1257 else if (nunits_out == nunits_in)
1259 else if (nunits_out == nunits_in / 2)
1264 /* For now, we only vectorize functions if a target specific builtin
1265 is available. TODO -- in some cases, it might be profitable to
1266 insert the calls for pieces of the vector, in order to be able
1267 to vectorize other operations in the loop. */
1268 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1269 if (fndecl == NULL_TREE)
1271 if (vect_print_dump_info (REPORT_DETAILS))
1272 fprintf (vect_dump, "function is not vectorizable.");
1277 gcc_assert (!gimple_vuse (stmt));
1279 if (modifier == NARROW)
1280 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1282 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1284 /* Sanity check: make sure that at least one copy of the vectorized stmt
1285 needs to be generated. */
1286 gcc_assert (ncopies >= 1);
1288 if (!vec_stmt) /* transformation not required. */
1290 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1291 if (vect_print_dump_info (REPORT_DETAILS))
1292 fprintf (vect_dump, "=== vectorizable_call ===");
1293 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1299 if (vect_print_dump_info (REPORT_DETAILS))
1300 fprintf (vect_dump, "transform operation.");
1303 scalar_dest = gimple_call_lhs (stmt);
1304 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1306 prev_stmt_info = NULL;
1310 for (j = 0; j < ncopies; ++j)
1312 /* Build argument list for the vectorized call. */
1314 vargs = VEC_alloc (tree, heap, nargs);
1316 VEC_truncate (tree, vargs, 0);
1318 for (i = 0; i < nargs; i++)
1320 op = gimple_call_arg (stmt, i);
1323 = vect_get_vec_def_for_operand (op, stmt, NULL);
1326 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1328 VEC_quick_push (tree, vargs, vec_oprnd0);
1331 new_stmt = gimple_build_call_vec (fndecl, vargs);
1332 new_temp = make_ssa_name (vec_dest, new_stmt);
1333 gimple_call_set_lhs (new_stmt, new_temp);
1335 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1338 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1340 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1342 prev_stmt_info = vinfo_for_stmt (new_stmt);
1348 for (j = 0; j < ncopies; ++j)
1350 /* Build argument list for the vectorized call. */
1352 vargs = VEC_alloc (tree, heap, nargs * 2);
1354 VEC_truncate (tree, vargs, 0);
1356 for (i = 0; i < nargs; i++)
1358 op = gimple_call_arg (stmt, i);
1362 = vect_get_vec_def_for_operand (op, stmt, NULL);
1364 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1369 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
1371 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1374 VEC_quick_push (tree, vargs, vec_oprnd0);
1375 VEC_quick_push (tree, vargs, vec_oprnd1);
1378 new_stmt = gimple_build_call_vec (fndecl, vargs);
1379 new_temp = make_ssa_name (vec_dest, new_stmt);
1380 gimple_call_set_lhs (new_stmt, new_temp);
1382 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1385 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1387 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1389 prev_stmt_info = vinfo_for_stmt (new_stmt);
1392 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1397 /* No current target implements this case. */
1401 VEC_free (tree, heap, vargs);
1403 /* Update the exception handling table with the vector stmt if necessary. */
1404 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1405 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1407 /* The call in STMT might prevent it from being removed in dce.
1408 We however cannot remove it here, due to the way the ssa name
1409 it defines is mapped to the new definition. So just replace
1410 rhs of the statement with something harmless. */
1412 type = TREE_TYPE (scalar_dest);
1413 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1414 fold_convert (type, integer_zero_node));
1415 set_vinfo_for_stmt (new_stmt, stmt_info);
1416 set_vinfo_for_stmt (stmt, NULL);
1417 STMT_VINFO_STMT (stmt_info) = new_stmt;
1418 gsi_replace (gsi, new_stmt, false);
1419 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1425 /* Function vect_gen_widened_results_half
1427 Create a vector stmt whose code, type, number of arguments, and result
1428 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1429 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1430 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1431 needs to be created (DECL is a function-decl of a target-builtin).
1432 STMT is the original scalar stmt that we are vectorizing. */
1435 vect_gen_widened_results_half (enum tree_code code,
1437 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1438 tree vec_dest, gimple_stmt_iterator *gsi,
1444 /* Generate half of the widened result: */
1445 if (code == CALL_EXPR)
1447 /* Target specific support */
1448 if (op_type == binary_op)
1449 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1451 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1452 new_temp = make_ssa_name (vec_dest, new_stmt);
1453 gimple_call_set_lhs (new_stmt, new_temp);
1457 /* Generic support */
1458 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1459 if (op_type != binary_op)
1461 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1463 new_temp = make_ssa_name (vec_dest, new_stmt);
1464 gimple_assign_set_lhs (new_stmt, new_temp);
1466 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1472 /* Check if STMT performs a conversion operation, that can be vectorized.
1473 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1474 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1475 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1478 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1479 gimple *vec_stmt, slp_tree slp_node)
1484 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1485 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1486 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1487 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1488 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1492 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1493 gimple new_stmt = NULL;
1494 stmt_vec_info prev_stmt_info;
1497 tree vectype_out, vectype_in;
1500 tree rhs_type, lhs_type;
1502 enum { NARROW, NONE, WIDEN } modifier;
1504 VEC(tree,heap) *vec_oprnds0 = NULL;
1507 VEC(tree,heap) *dummy = NULL;
1510 /* Is STMT a vectorizable conversion? */
1512 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1515 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1518 if (!is_gimple_assign (stmt))
1521 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1524 code = gimple_assign_rhs_code (stmt);
1525 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1528 /* Check types of lhs and rhs. */
1529 op0 = gimple_assign_rhs1 (stmt);
1530 rhs_type = TREE_TYPE (op0);
1531 vectype_in = get_vectype_for_scalar_type (rhs_type);
1534 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1536 scalar_dest = gimple_assign_lhs (stmt);
1537 lhs_type = TREE_TYPE (scalar_dest);
1538 vectype_out = get_vectype_for_scalar_type (lhs_type);
1541 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1544 if (nunits_in == nunits_out / 2)
1546 else if (nunits_out == nunits_in)
1548 else if (nunits_out == nunits_in / 2)
1553 if (modifier == NONE)
1554 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1556 /* Bail out if the types are both integral or non-integral. */
1557 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1558 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1561 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1563 if (modifier == NARROW)
1564 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1566 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1568 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1569 this, so we can safely override NCOPIES with 1 here. */
1573 /* Sanity check: make sure that at least one copy of the vectorized stmt
1574 needs to be generated. */
1575 gcc_assert (ncopies >= 1);
1577 /* Check the operands of the operation. */
1578 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1580 if (vect_print_dump_info (REPORT_DETAILS))
1581 fprintf (vect_dump, "use not simple.");
1585 /* Supportable by target? */
1586 if ((modifier == NONE
1587 && !targetm.vectorize.builtin_conversion (code, integral_type))
1588 || (modifier == WIDEN
1589 && !supportable_widening_operation (code, stmt, vectype_in,
1592 &dummy_int, &dummy))
1593 || (modifier == NARROW
1594 && !supportable_narrowing_operation (code, stmt, vectype_in,
1595 &code1, &dummy_int, &dummy)))
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "conversion not supported by target.");
1602 if (modifier != NONE)
1604 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1605 /* FORNOW: SLP not supported. */
1606 if (STMT_SLP_TYPE (stmt_info))
1610 if (!vec_stmt) /* transformation not required. */
1612 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1617 if (vect_print_dump_info (REPORT_DETAILS))
1618 fprintf (vect_dump, "transform conversion.");
1621 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1623 if (modifier == NONE && !slp_node)
1624 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1626 prev_stmt_info = NULL;
1630 for (j = 0; j < ncopies; j++)
1633 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1635 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1638 targetm.vectorize.builtin_conversion (code, integral_type);
1639 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1641 /* Arguments are ready. create the new vector stmt. */
1642 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1643 new_temp = make_ssa_name (vec_dest, new_stmt);
1644 gimple_call_set_lhs (new_stmt, new_temp);
1645 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1647 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1651 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1653 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1654 prev_stmt_info = vinfo_for_stmt (new_stmt);
1659 /* In case the vectorization factor (VF) is bigger than the number
1660 of elements that we can fit in a vectype (nunits), we have to
1661 generate more than one vector stmt - i.e - we need to "unroll"
1662 the vector stmt by a factor VF/nunits. */
1663 for (j = 0; j < ncopies; j++)
1666 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1668 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1670 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1672 /* Generate first half of the widened result: */
1674 = vect_gen_widened_results_half (code1, decl1,
1675 vec_oprnd0, vec_oprnd1,
1676 unary_op, vec_dest, gsi, stmt);
1678 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1680 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1681 prev_stmt_info = vinfo_for_stmt (new_stmt);
1683 /* Generate second half of the widened result: */
1685 = vect_gen_widened_results_half (code2, decl2,
1686 vec_oprnd0, vec_oprnd1,
1687 unary_op, vec_dest, gsi, stmt);
1688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1689 prev_stmt_info = vinfo_for_stmt (new_stmt);
1694 /* In case the vectorization factor (VF) is bigger than the number
1695 of elements that we can fit in a vectype (nunits), we have to
1696 generate more than one vector stmt - i.e - we need to "unroll"
1697 the vector stmt by a factor VF/nunits. */
1698 for (j = 0; j < ncopies; j++)
1703 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1704 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1708 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1709 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1712 /* Arguments are ready. Create the new vector stmt. */
1713 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
1714 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1716 new_temp = make_ssa_name (vec_dest, new_stmt);
1717 gimple_assign_set_lhs (new_stmt, new_temp);
1718 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1721 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1723 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1725 prev_stmt_info = vinfo_for_stmt (new_stmt);
1728 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1732 VEC_free (tree, heap, vec_oprnds0);
1736 /* Function vectorizable_assignment.
1738 Check if STMT performs an assignment (copy) that can be vectorized.
1739 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1740 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1741 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1744 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1745 gimple *vec_stmt, slp_tree slp_node)
1750 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1751 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1752 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1756 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1757 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1760 VEC(tree,heap) *vec_oprnds = NULL;
1763 /* Multiple types in SLP are handled by creating the appropriate number of
1764 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1769 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1771 gcc_assert (ncopies >= 1);
1773 return false; /* FORNOW */
1775 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1778 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1781 /* Is vectorizable assignment? */
1782 if (!is_gimple_assign (stmt))
1785 scalar_dest = gimple_assign_lhs (stmt);
1786 if (TREE_CODE (scalar_dest) != SSA_NAME)
1789 if (gimple_assign_single_p (stmt)
1790 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
1791 op = gimple_assign_rhs1 (stmt);
1795 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
1797 if (vect_print_dump_info (REPORT_DETAILS))
1798 fprintf (vect_dump, "use not simple.");
1802 if (!vec_stmt) /* transformation not required. */
1804 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1805 if (vect_print_dump_info (REPORT_DETAILS))
1806 fprintf (vect_dump, "=== vectorizable_assignment ===");
1807 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1812 if (vect_print_dump_info (REPORT_DETAILS))
1813 fprintf (vect_dump, "transform assignment.");
1816 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1819 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1821 /* Arguments are ready. create the new vector stmt. */
1822 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1824 *vec_stmt = gimple_build_assign (vec_dest, vop);
1825 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1826 gimple_assign_set_lhs (*vec_stmt, new_temp);
1827 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
1828 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
1831 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
1834 VEC_free (tree, heap, vec_oprnds);
1838 /* Function vectorizable_operation.
1840 Check if STMT performs a binary or unary operation that can be vectorized.
1841 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1842 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1843 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1846 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1847 gimple *vec_stmt, slp_tree slp_node)
1851 tree op0, op1 = NULL;
1852 tree vec_oprnd1 = NULL_TREE;
1853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1856 enum tree_code code;
1857 enum machine_mode vec_mode;
1862 enum machine_mode optab_op2_mode;
1865 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1866 gimple new_stmt = NULL;
1867 stmt_vec_info prev_stmt_info;
1868 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1873 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1876 bool shift_p = false;
1877 bool scalar_shift_arg = false;
1879 /* Multiple types in SLP are handled by creating the appropriate number of
1880 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1885 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1887 gcc_assert (ncopies >= 1);
1889 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1892 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1895 /* Is STMT a vectorizable binary/unary operation? */
1896 if (!is_gimple_assign (stmt))
1899 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1902 scalar_dest = gimple_assign_lhs (stmt);
1903 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1906 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1907 if (nunits_out != nunits_in)
1910 code = gimple_assign_rhs_code (stmt);
1912 /* For pointer addition, we should use the normal plus for
1913 the vector addition. */
1914 if (code == POINTER_PLUS_EXPR)
1917 /* Support only unary or binary operations. */
1918 op_type = TREE_CODE_LENGTH (code);
1919 if (op_type != unary_op && op_type != binary_op)
1921 if (vect_print_dump_info (REPORT_DETAILS))
1922 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1926 op0 = gimple_assign_rhs1 (stmt);
1927 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1929 if (vect_print_dump_info (REPORT_DETAILS))
1930 fprintf (vect_dump, "use not simple.");
1934 if (op_type == binary_op)
1936 op1 = gimple_assign_rhs2 (stmt);
1937 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
1939 if (vect_print_dump_info (REPORT_DETAILS))
1940 fprintf (vect_dump, "use not simple.");
1945 /* If this is a shift/rotate, determine whether the shift amount is a vector,
1946 or scalar. If the shift/rotate amount is a vector, use the vector/vector
1948 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
1949 || code == RROTATE_EXPR)
1953 /* vector shifted by vector */
1954 if (dt[1] == vect_internal_def)
1956 optab = optab_for_tree_code (code, vectype, optab_vector);
1957 if (vect_print_dump_info (REPORT_DETAILS))
1958 fprintf (vect_dump, "vector/vector shift/rotate found.");
1961 /* See if the machine has a vector shifted by scalar insn and if not
1962 then see if it has a vector shifted by vector insn */
1963 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
1965 optab = optab_for_tree_code (code, vectype, optab_scalar);
1967 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1968 != CODE_FOR_nothing))
1970 scalar_shift_arg = true;
1971 if (vect_print_dump_info (REPORT_DETAILS))
1972 fprintf (vect_dump, "vector/scalar shift/rotate found.");
1976 optab = optab_for_tree_code (code, vectype, optab_vector);
1977 if (vect_print_dump_info (REPORT_DETAILS)
1979 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1980 != CODE_FOR_nothing))
1981 fprintf (vect_dump, "vector/vector shift/rotate found.");
1987 if (vect_print_dump_info (REPORT_DETAILS))
1988 fprintf (vect_dump, "operand mode requires invariant argument.");
1993 optab = optab_for_tree_code (code, vectype, optab_default);
1995 /* Supportable by target? */
1998 if (vect_print_dump_info (REPORT_DETAILS))
1999 fprintf (vect_dump, "no optab.");
2002 vec_mode = TYPE_MODE (vectype);
2003 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2004 if (icode == CODE_FOR_nothing)
2006 if (vect_print_dump_info (REPORT_DETAILS))
2007 fprintf (vect_dump, "op not supported by target.");
2008 /* Check only during analysis. */
2009 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2010 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2011 < vect_min_worthwhile_factor (code)
2014 if (vect_print_dump_info (REPORT_DETAILS))
2015 fprintf (vect_dump, "proceeding using word mode.");
2018 /* Worthwhile without SIMD support? Check only during analysis. */
2019 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2020 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2021 < vect_min_worthwhile_factor (code)
2024 if (vect_print_dump_info (REPORT_DETAILS))
2025 fprintf (vect_dump, "not worthwhile without SIMD support.");
2029 if (!vec_stmt) /* transformation not required. */
2031 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2032 if (vect_print_dump_info (REPORT_DETAILS))
2033 fprintf (vect_dump, "=== vectorizable_operation ===");
2034 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2040 if (vect_print_dump_info (REPORT_DETAILS))
2041 fprintf (vect_dump, "transform binary/unary operation.");
2044 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2046 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2047 created in the previous stages of the recursion, so no allocation is
2048 needed, except for the case of shift with scalar shift argument. In that
2049 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2050 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2051 In case of loop-based vectorization we allocate VECs of size 1. We
2052 allocate VEC_OPRNDS1 only in case of binary operation. */
2055 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2056 if (op_type == binary_op)
2057 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2059 else if (scalar_shift_arg)
2060 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2062 /* In case the vectorization factor (VF) is bigger than the number
2063 of elements that we can fit in a vectype (nunits), we have to generate
2064 more than one vector stmt - i.e - we need to "unroll" the
2065 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2066 from one copy of the vector stmt to the next, in the field
2067 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2068 stages to find the correct vector defs to be used when vectorizing
2069 stmts that use the defs of the current stmt. The example below illustrates
2070 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2071 4 vectorized stmts):
2073 before vectorization:
2074 RELATED_STMT VEC_STMT
2078 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2080 RELATED_STMT VEC_STMT
2081 VS1_0: vx0 = memref0 VS1_1 -
2082 VS1_1: vx1 = memref1 VS1_2 -
2083 VS1_2: vx2 = memref2 VS1_3 -
2084 VS1_3: vx3 = memref3 - -
2085 S1: x = load - VS1_0
2088 step2: vectorize stmt S2 (done here):
2089 To vectorize stmt S2 we first need to find the relevant vector
2090 def for the first operand 'x'. This is, as usual, obtained from
2091 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2092 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2093 relevant vector def 'vx0'. Having found 'vx0' we can generate
2094 the vector stmt VS2_0, and as usual, record it in the
2095 STMT_VINFO_VEC_STMT of stmt S2.
2096 When creating the second copy (VS2_1), we obtain the relevant vector
2097 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2098 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2099 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2100 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2101 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2102 chain of stmts and pointers:
2103 RELATED_STMT VEC_STMT
2104 VS1_0: vx0 = memref0 VS1_1 -
2105 VS1_1: vx1 = memref1 VS1_2 -
2106 VS1_2: vx2 = memref2 VS1_3 -
2107 VS1_3: vx3 = memref3 - -
2108 S1: x = load - VS1_0
2109 VS2_0: vz0 = vx0 + v1 VS2_1 -
2110 VS2_1: vz1 = vx1 + v1 VS2_2 -
2111 VS2_2: vz2 = vx2 + v1 VS2_3 -
2112 VS2_3: vz3 = vx3 + v1 - -
2113 S2: z = x + 1 - VS2_0 */
2115 prev_stmt_info = NULL;
2116 for (j = 0; j < ncopies; j++)
2121 if (op_type == binary_op && scalar_shift_arg)
2123 /* Vector shl and shr insn patterns can be defined with scalar
2124 operand 2 (shift operand). In this case, use constant or loop
2125 invariant op1 directly, without extending it to vector mode
2127 optab_op2_mode = insn_data[icode].operand[2].mode;
2128 if (!VECTOR_MODE_P (optab_op2_mode))
2130 if (vect_print_dump_info (REPORT_DETAILS))
2131 fprintf (vect_dump, "operand 1 using scalar mode.");
2133 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2136 /* Store vec_oprnd1 for every vector stmt to be created
2137 for SLP_NODE. We check during the analysis that all the
2138 shift arguments are the same.
2139 TODO: Allow different constants for different vector
2140 stmts generated for an SLP instance. */
2141 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2142 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2147 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2148 (a special case for certain kind of vector shifts); otherwise,
2149 operand 1 should be of a vector type (the usual case). */
2150 if (op_type == binary_op && !vec_oprnd1)
2151 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2154 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2158 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2160 /* Arguments are ready. Create the new vector stmt. */
2161 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2163 vop1 = ((op_type == binary_op)
2164 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2165 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2166 new_temp = make_ssa_name (vec_dest, new_stmt);
2167 gimple_assign_set_lhs (new_stmt, new_temp);
2168 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2170 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2177 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2179 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2180 prev_stmt_info = vinfo_for_stmt (new_stmt);
2183 VEC_free (tree, heap, vec_oprnds0);
2185 VEC_free (tree, heap, vec_oprnds1);
2191 /* Get vectorized definitions for loop-based vectorization. For the first
2192 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2193 scalar operand), and for the rest we get a copy with
2194 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2195 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2196 The vectors are collected into VEC_OPRNDS. */
2199 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2200 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2204 /* Get first vector operand. */
2205 /* All the vector operands except the very first one (that is scalar oprnd)
2207 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2208 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2210 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2212 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2214 /* Get second vector operand. */
2215 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2216 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2220 /* For conversion in multiple steps, continue to get operands
2223 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2227 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2228 For multi-step conversions store the resulting vectors and call the function
2232 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2233 int multi_step_cvt, gimple stmt,
2234 VEC (tree, heap) *vec_dsts,
2235 gimple_stmt_iterator *gsi,
2236 slp_tree slp_node, enum tree_code code,
2237 stmt_vec_info *prev_stmt_info)
2240 tree vop0, vop1, new_tmp, vec_dest;
2242 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2244 vec_dest = VEC_pop (tree, vec_dsts);
2246 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2248 /* Create demotion operation. */
2249 vop0 = VEC_index (tree, *vec_oprnds, i);
2250 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2251 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2252 new_tmp = make_ssa_name (vec_dest, new_stmt);
2253 gimple_assign_set_lhs (new_stmt, new_tmp);
2254 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2257 /* Store the resulting vector for next recursive call. */
2258 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2261 /* This is the last step of the conversion sequence. Store the
2262 vectors in SLP_NODE or in vector info of the scalar statement
2263 (or in STMT_VINFO_RELATED_STMT chain). */
2265 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2268 if (!*prev_stmt_info)
2269 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2271 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2273 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2278 /* For multi-step demotion operations we first generate demotion operations
2279 from the source type to the intermediate types, and then combine the
2280 results (stored in VEC_OPRNDS) in demotion operation to the destination
2284 /* At each level of recursion we have have of the operands we had at the
2286 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2287 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2288 stmt, vec_dsts, gsi, slp_node,
2289 code, prev_stmt_info);
2294 /* Function vectorizable_type_demotion
2296 Check if STMT performs a binary or unary operation that involves
2297 type demotion, and if it can be vectorized.
2298 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2299 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2300 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2303 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2304 gimple *vec_stmt, slp_tree slp_node)
2309 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2310 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2311 enum tree_code code, code1 = ERROR_MARK;
2314 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2315 stmt_vec_info prev_stmt_info;
2322 int multi_step_cvt = 0;
2323 VEC (tree, heap) *vec_oprnds0 = NULL;
2324 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2325 tree last_oprnd, intermediate_type;
2327 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2330 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2333 /* Is STMT a vectorizable type-demotion operation? */
2334 if (!is_gimple_assign (stmt))
2337 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2340 code = gimple_assign_rhs_code (stmt);
2341 if (!CONVERT_EXPR_CODE_P (code))
2344 op0 = gimple_assign_rhs1 (stmt);
2345 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2348 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2350 scalar_dest = gimple_assign_lhs (stmt);
2351 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2354 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2355 if (nunits_in >= nunits_out)
2358 /* Multiple types in SLP are handled by creating the appropriate number of
2359 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2364 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2366 gcc_assert (ncopies >= 1);
2368 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2369 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2370 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2371 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2372 && CONVERT_EXPR_CODE_P (code))))
2375 /* Check the operands of the operation. */
2376 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "use not simple.");
2383 /* Supportable by target? */
2384 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2385 &multi_step_cvt, &interm_types))
2388 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2390 if (!vec_stmt) /* transformation not required. */
2392 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2393 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "=== vectorizable_demotion ===");
2395 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2400 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2404 /* In case of multi-step demotion, we first generate demotion operations to
2405 the intermediate types, and then from that types to the final one.
2406 We create vector destinations for the intermediate type (TYPES) received
2407 from supportable_narrowing_operation, and store them in the correct order
2408 for future use in vect_create_vectorized_demotion_stmts(). */
2410 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2412 vec_dsts = VEC_alloc (tree, heap, 1);
2414 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2415 VEC_quick_push (tree, vec_dsts, vec_dest);
2419 for (i = VEC_length (tree, interm_types) - 1;
2420 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2422 vec_dest = vect_create_destination_var (scalar_dest,
2424 VEC_quick_push (tree, vec_dsts, vec_dest);
2428 /* In case the vectorization factor (VF) is bigger than the number
2429 of elements that we can fit in a vectype (nunits), we have to generate
2430 more than one vector stmt - i.e - we need to "unroll" the
2431 vector stmt by a factor VF/nunits. */
2433 prev_stmt_info = NULL;
2434 for (j = 0; j < ncopies; j++)
2438 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2441 VEC_free (tree, heap, vec_oprnds0);
2442 vec_oprnds0 = VEC_alloc (tree, heap,
2443 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2444 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2445 vect_pow2 (multi_step_cvt) - 1);
2448 /* Arguments are ready. Create the new vector stmts. */
2449 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2450 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2451 multi_step_cvt, stmt, tmp_vec_dsts,
2452 gsi, slp_node, code1,
2456 VEC_free (tree, heap, vec_oprnds0);
2457 VEC_free (tree, heap, vec_dsts);
2458 VEC_free (tree, heap, tmp_vec_dsts);
2459 VEC_free (tree, heap, interm_types);
2461 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2466 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2467 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2468 the resulting vectors and call the function recursively. */
2471 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2472 VEC (tree, heap) **vec_oprnds1,
2473 int multi_step_cvt, gimple stmt,
2474 VEC (tree, heap) *vec_dsts,
2475 gimple_stmt_iterator *gsi,
2476 slp_tree slp_node, enum tree_code code1,
2477 enum tree_code code2, tree decl1,
2478 tree decl2, int op_type,
2479 stmt_vec_info *prev_stmt_info)
2482 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2483 gimple new_stmt1, new_stmt2;
2484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2485 VEC (tree, heap) *vec_tmp;
2487 vec_dest = VEC_pop (tree, vec_dsts);
2488 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2490 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2492 if (op_type == binary_op)
2493 vop1 = VEC_index (tree, *vec_oprnds1, i);
2497 /* Generate the two halves of promotion operation. */
2498 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2499 op_type, vec_dest, gsi, stmt);
2500 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2501 op_type, vec_dest, gsi, stmt);
2502 if (is_gimple_call (new_stmt1))
2504 new_tmp1 = gimple_call_lhs (new_stmt1);
2505 new_tmp2 = gimple_call_lhs (new_stmt2);
2509 new_tmp1 = gimple_assign_lhs (new_stmt1);
2510 new_tmp2 = gimple_assign_lhs (new_stmt2);
2515 /* Store the results for the recursive call. */
2516 VEC_quick_push (tree, vec_tmp, new_tmp1);
2517 VEC_quick_push (tree, vec_tmp, new_tmp2);
2521 /* Last step of promotion sequience - store the results. */
2524 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2525 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2529 if (!*prev_stmt_info)
2530 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2532 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2534 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2535 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2536 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2543 /* For multi-step promotion operation we first generate we call the
2544 function recurcively for every stage. We start from the input type,
2545 create promotion operations to the intermediate types, and then
2546 create promotions to the output type. */
2547 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2548 VEC_free (tree, heap, vec_tmp);
2549 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2550 multi_step_cvt - 1, stmt,
2551 vec_dsts, gsi, slp_node, code1,
2552 code2, decl2, decl2, op_type,
2558 /* Function vectorizable_type_promotion
2560 Check if STMT performs a binary or unary operation that involves
2561 type promotion, and if it can be vectorized.
2562 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2563 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2564 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2567 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2568 gimple *vec_stmt, slp_tree slp_node)
2572 tree op0, op1 = NULL;
2573 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2574 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2575 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2576 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2577 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2581 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2582 stmt_vec_info prev_stmt_info;
2589 tree intermediate_type = NULL_TREE;
2590 int multi_step_cvt = 0;
2591 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2592 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2594 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2597 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2600 /* Is STMT a vectorizable type-promotion operation? */
2601 if (!is_gimple_assign (stmt))
2604 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2607 code = gimple_assign_rhs_code (stmt);
2608 if (!CONVERT_EXPR_CODE_P (code)
2609 && code != WIDEN_MULT_EXPR)
2612 op0 = gimple_assign_rhs1 (stmt);
2613 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2616 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2618 scalar_dest = gimple_assign_lhs (stmt);
2619 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2622 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2623 if (nunits_in <= nunits_out)
2626 /* Multiple types in SLP are handled by creating the appropriate number of
2627 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2632 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2634 gcc_assert (ncopies >= 1);
2636 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2637 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2638 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2639 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2640 && CONVERT_EXPR_CODE_P (code))))
2643 /* Check the operands of the operation. */
2644 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2646 if (vect_print_dump_info (REPORT_DETAILS))
2647 fprintf (vect_dump, "use not simple.");
2651 op_type = TREE_CODE_LENGTH (code);
2652 if (op_type == binary_op)
2654 op1 = gimple_assign_rhs2 (stmt);
2655 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
2657 if (vect_print_dump_info (REPORT_DETAILS))
2658 fprintf (vect_dump, "use not simple.");
2663 /* Supportable by target? */
2664 if (!supportable_widening_operation (code, stmt, vectype_in,
2665 &decl1, &decl2, &code1, &code2,
2666 &multi_step_cvt, &interm_types))
2669 /* Binary widening operation can only be supported directly by the
2671 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2673 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2675 if (!vec_stmt) /* transformation not required. */
2677 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2678 if (vect_print_dump_info (REPORT_DETAILS))
2679 fprintf (vect_dump, "=== vectorizable_promotion ===");
2680 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2686 if (vect_print_dump_info (REPORT_DETAILS))
2687 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2691 /* In case of multi-step promotion, we first generate promotion operations
2692 to the intermediate types, and then from that types to the final one.
2693 We store vector destination in VEC_DSTS in the correct order for
2694 recursive creation of promotion operations in
2695 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2696 according to TYPES recieved from supportable_widening_operation(). */
2698 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2700 vec_dsts = VEC_alloc (tree, heap, 1);
2702 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2703 VEC_quick_push (tree, vec_dsts, vec_dest);
2707 for (i = VEC_length (tree, interm_types) - 1;
2708 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2710 vec_dest = vect_create_destination_var (scalar_dest,
2712 VEC_quick_push (tree, vec_dsts, vec_dest);
2718 vec_oprnds0 = VEC_alloc (tree, heap,
2719 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2720 if (op_type == binary_op)
2721 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2724 /* In case the vectorization factor (VF) is bigger than the number
2725 of elements that we can fit in a vectype (nunits), we have to generate
2726 more than one vector stmt - i.e - we need to "unroll" the
2727 vector stmt by a factor VF/nunits. */
2729 prev_stmt_info = NULL;
2730 for (j = 0; j < ncopies; j++)
2736 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2739 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2740 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2741 if (op_type == binary_op)
2743 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2744 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2750 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2751 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2752 if (op_type == binary_op)
2754 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2755 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2759 /* Arguments are ready. Create the new vector stmts. */
2760 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2761 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2762 multi_step_cvt, stmt,
2764 gsi, slp_node, code1, code2,
2765 decl1, decl2, op_type,
2769 VEC_free (tree, heap, vec_dsts);
2770 VEC_free (tree, heap, tmp_vec_dsts);
2771 VEC_free (tree, heap, interm_types);
2772 VEC_free (tree, heap, vec_oprnds0);
2773 VEC_free (tree, heap, vec_oprnds1);
2775 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2780 /* Function vectorizable_store.
2782 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2784 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2785 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2786 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2789 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2795 tree vec_oprnd = NULL_TREE;
2796 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2797 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2798 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2799 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2800 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2801 enum machine_mode vec_mode;
2803 enum dr_alignment_support alignment_support_scheme;
2806 enum vect_def_type dt;
2807 stmt_vec_info prev_stmt_info = NULL;
2808 tree dataref_ptr = NULL_TREE;
2809 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2812 gimple next_stmt, first_stmt = NULL;
2813 bool strided_store = false;
2814 unsigned int group_size, i;
2815 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2817 VEC(tree,heap) *vec_oprnds = NULL;
2818 bool slp = (slp_node != NULL);
2819 stmt_vec_info first_stmt_vinfo;
2820 unsigned int vec_num;
2822 /* Multiple types in SLP are handled by creating the appropriate number of
2823 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2828 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2830 gcc_assert (ncopies >= 1);
2832 /* FORNOW. This restriction should be relaxed. */
2833 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2835 if (vect_print_dump_info (REPORT_DETAILS))
2836 fprintf (vect_dump, "multiple types in nested loop.");
2840 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2843 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2846 /* Is vectorizable store? */
2848 if (!is_gimple_assign (stmt))
2851 scalar_dest = gimple_assign_lhs (stmt);
2852 if (TREE_CODE (scalar_dest) != ARRAY_REF
2853 && TREE_CODE (scalar_dest) != INDIRECT_REF
2854 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
2857 gcc_assert (gimple_assign_single_p (stmt));
2858 op = gimple_assign_rhs1 (stmt);
2859 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2861 if (vect_print_dump_info (REPORT_DETAILS))
2862 fprintf (vect_dump, "use not simple.");
2866 /* The scalar rhs type needs to be trivially convertible to the vector
2867 component type. This should always be the case. */
2868 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2870 if (vect_print_dump_info (REPORT_DETAILS))
2871 fprintf (vect_dump, "??? operands of different types");
2875 vec_mode = TYPE_MODE (vectype);
2876 /* FORNOW. In some cases can vectorize even if data-type not supported
2877 (e.g. - array initialization with 0). */
2878 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2881 if (!STMT_VINFO_DATA_REF (stmt_info))
2884 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2886 strided_store = true;
2887 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2888 if (!vect_strided_store_supported (vectype)
2889 && !PURE_SLP_STMT (stmt_info) && !slp)
2892 if (first_stmt == stmt)
2894 /* STMT is the leader of the group. Check the operands of all the
2895 stmts of the group. */
2896 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
2899 gcc_assert (gimple_assign_single_p (next_stmt));
2900 op = gimple_assign_rhs1 (next_stmt);
2901 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2903 if (vect_print_dump_info (REPORT_DETAILS))
2904 fprintf (vect_dump, "use not simple.");
2907 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2912 if (!vec_stmt) /* transformation not required. */
2914 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2915 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
2923 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2924 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2926 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2929 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
2931 /* We vectorize all the stmts of the interleaving group when we
2932 reach the last stmt in the group. */
2933 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2934 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2942 strided_store = false;
2944 /* VEC_NUM is the number of vect stmts to be created for this group. */
2946 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2948 vec_num = group_size;
2954 group_size = vec_num = 1;
2955 first_stmt_vinfo = stmt_info;
2958 if (vect_print_dump_info (REPORT_DETAILS))
2959 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2961 dr_chain = VEC_alloc (tree, heap, group_size);
2962 oprnds = VEC_alloc (tree, heap, group_size);
2964 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
2965 gcc_assert (alignment_support_scheme);
2966 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
2968 /* In case the vectorization factor (VF) is bigger than the number
2969 of elements that we can fit in a vectype (nunits), we have to generate
2970 more than one vector stmt - i.e - we need to "unroll" the
2971 vector stmt by a factor VF/nunits. For more details see documentation in
2972 vect_get_vec_def_for_copy_stmt. */
2974 /* In case of interleaving (non-unit strided access):
2981 We create vectorized stores starting from base address (the access of the
2982 first stmt in the chain (S2 in the above example), when the last store stmt
2983 of the chain (S4) is reached:
2986 VS2: &base + vec_size*1 = vx0
2987 VS3: &base + vec_size*2 = vx1
2988 VS4: &base + vec_size*3 = vx3
2990 Then permutation statements are generated:
2992 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
2993 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
2996 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
2997 (the order of the data-refs in the output of vect_permute_store_chain
2998 corresponds to the order of scalar stmts in the interleaving chain - see
2999 the documentation of vect_permute_store_chain()).
3001 In case of both multiple types and interleaving, above vector stores and
3002 permutation stmts are created for every copy. The result vector stmts are
3003 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3004 STMT_VINFO_RELATED_STMT for the next copies.
3007 prev_stmt_info = NULL;
3008 for (j = 0; j < ncopies; j++)
3017 /* Get vectorized arguments for SLP_NODE. */
3018 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3020 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3024 /* For interleaved stores we collect vectorized defs for all the
3025 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3026 used as an input to vect_permute_store_chain(), and OPRNDS as
3027 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3029 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3030 OPRNDS are of size 1. */
3031 next_stmt = first_stmt;
3032 for (i = 0; i < group_size; i++)
3034 /* Since gaps are not supported for interleaved stores,
3035 GROUP_SIZE is the exact number of stmts in the chain.
3036 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3037 there is no interleaving, GROUP_SIZE is 1, and only one
3038 iteration of the loop will be executed. */
3039 gcc_assert (next_stmt
3040 && gimple_assign_single_p (next_stmt));
3041 op = gimple_assign_rhs1 (next_stmt);
3043 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3045 VEC_quick_push(tree, dr_chain, vec_oprnd);
3046 VEC_quick_push(tree, oprnds, vec_oprnd);
3047 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3051 /* We should have catched mismatched types earlier. */
3052 gcc_assert (useless_type_conversion_p (vectype,
3053 TREE_TYPE (vec_oprnd)));
3054 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3055 &dummy, &ptr_incr, false,
3057 gcc_assert (!inv_p);
3061 /* For interleaved stores we created vectorized defs for all the
3062 defs stored in OPRNDS in the previous iteration (previous copy).
3063 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3064 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3066 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3067 OPRNDS are of size 1. */
3068 for (i = 0; i < group_size; i++)
3070 op = VEC_index (tree, oprnds, i);
3071 vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
3072 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3073 VEC_replace(tree, dr_chain, i, vec_oprnd);
3074 VEC_replace(tree, oprnds, i, vec_oprnd);
3077 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3082 result_chain = VEC_alloc (tree, heap, group_size);
3084 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3089 next_stmt = first_stmt;
3090 for (i = 0; i < vec_num; i++)
3093 /* Bump the vector pointer. */
3094 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3098 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3099 else if (strided_store)
3100 /* For strided stores vectorized defs are interleaved in
3101 vect_permute_store_chain(). */
3102 vec_oprnd = VEC_index (tree, result_chain, i);
3104 data_ref = build_fold_indirect_ref (dataref_ptr);
3105 /* If accesses through a pointer to vectype do not alias the original
3106 memory reference we have a problem. This should never happen. */
3107 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3108 get_alias_set (gimple_assign_lhs (stmt))));
3110 /* Arguments are ready. Create the new vector stmt. */
3111 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3113 mark_symbols_for_renaming (new_stmt);
3119 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3121 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3123 prev_stmt_info = vinfo_for_stmt (new_stmt);
3124 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3130 VEC_free (tree, heap, dr_chain);
3131 VEC_free (tree, heap, oprnds);
3133 VEC_free (tree, heap, result_chain);
3138 /* vectorizable_load.
3140 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3142 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3143 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3144 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3147 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3148 slp_tree slp_node, slp_instance slp_node_instance)
3151 tree vec_dest = NULL;
3152 tree data_ref = NULL;
3153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3154 stmt_vec_info prev_stmt_info;
3155 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3156 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3157 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3158 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3159 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3160 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3163 gimple new_stmt = NULL;
3165 enum dr_alignment_support alignment_support_scheme;
3166 tree dataref_ptr = NULL_TREE;
3168 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3170 int i, j, group_size;
3171 tree msq = NULL_TREE, lsq;
3172 tree offset = NULL_TREE;
3173 tree realignment_token = NULL_TREE;
3175 VEC(tree,heap) *dr_chain = NULL;
3176 bool strided_load = false;
3180 bool compute_in_loop = false;
3181 struct loop *at_loop;
3183 bool slp = (slp_node != NULL);
3184 bool slp_perm = false;
3185 enum tree_code code;
3187 /* Multiple types in SLP are handled by creating the appropriate number of
3188 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3193 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3195 gcc_assert (ncopies >= 1);
3197 /* FORNOW. This restriction should be relaxed. */
3198 if (nested_in_vect_loop && ncopies > 1)
3200 if (vect_print_dump_info (REPORT_DETAILS))
3201 fprintf (vect_dump, "multiple types in nested loop.");
3205 if (slp && SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3208 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3211 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3214 /* Is vectorizable load? */
3215 if (!is_gimple_assign (stmt))
3218 scalar_dest = gimple_assign_lhs (stmt);
3219 if (TREE_CODE (scalar_dest) != SSA_NAME)
3222 code = gimple_assign_rhs_code (stmt);
3223 if (code != ARRAY_REF
3224 && code != INDIRECT_REF
3225 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
3228 if (!STMT_VINFO_DATA_REF (stmt_info))
3231 scalar_type = TREE_TYPE (DR_REF (dr));
3232 mode = (int) TYPE_MODE (vectype);
3234 /* FORNOW. In some cases can vectorize even if data-type not supported
3235 (e.g. - data copies). */
3236 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3238 if (vect_print_dump_info (REPORT_DETAILS))
3239 fprintf (vect_dump, "Aligned load, but unsupported type.");
3243 /* The vector component type needs to be trivially convertible to the
3244 scalar lhs. This should always be the case. */
3245 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3247 if (vect_print_dump_info (REPORT_DETAILS))
3248 fprintf (vect_dump, "??? operands of different types");
3252 /* Check if the load is a part of an interleaving chain. */
3253 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3255 strided_load = true;
3257 gcc_assert (! nested_in_vect_loop);
3259 /* Check if interleaving is supported. */
3260 if (!vect_strided_load_supported (vectype)
3261 && !PURE_SLP_STMT (stmt_info) && !slp)
3265 if (!vec_stmt) /* transformation not required. */
3267 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3268 vect_model_load_cost (stmt_info, ncopies, NULL);
3272 if (vect_print_dump_info (REPORT_DETAILS))
3273 fprintf (vect_dump, "transform load.");
3279 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3280 /* Check if the chain of loads is already vectorized. */
3281 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3283 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3286 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3287 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3289 /* VEC_NUM is the number of vect stmts to be created for this group. */
3292 strided_load = false;
3293 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3296 vec_num = group_size;
3298 dr_chain = VEC_alloc (tree, heap, vec_num);
3304 group_size = vec_num = 1;
3307 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3308 gcc_assert (alignment_support_scheme);
3310 /* In case the vectorization factor (VF) is bigger than the number
3311 of elements that we can fit in a vectype (nunits), we have to generate
3312 more than one vector stmt - i.e - we need to "unroll" the
3313 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3314 from one copy of the vector stmt to the next, in the field
3315 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3316 stages to find the correct vector defs to be used when vectorizing
3317 stmts that use the defs of the current stmt. The example below illustrates
3318 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3319 4 vectorized stmts):
3321 before vectorization:
3322 RELATED_STMT VEC_STMT
3326 step 1: vectorize stmt S1:
3327 We first create the vector stmt VS1_0, and, as usual, record a
3328 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3329 Next, we create the vector stmt VS1_1, and record a pointer to
3330 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3331 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3333 RELATED_STMT VEC_STMT
3334 VS1_0: vx0 = memref0 VS1_1 -
3335 VS1_1: vx1 = memref1 VS1_2 -
3336 VS1_2: vx2 = memref2 VS1_3 -
3337 VS1_3: vx3 = memref3 - -
3338 S1: x = load - VS1_0
3341 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3342 information we recorded in RELATED_STMT field is used to vectorize
3345 /* In case of interleaving (non-unit strided access):
3352 Vectorized loads are created in the order of memory accesses
3353 starting from the access of the first stmt of the chain:
3356 VS2: vx1 = &base + vec_size*1
3357 VS3: vx3 = &base + vec_size*2
3358 VS4: vx4 = &base + vec_size*3
3360 Then permutation statements are generated:
3362 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3363 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3366 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3367 (the order of the data-refs in the output of vect_permute_load_chain
3368 corresponds to the order of scalar stmts in the interleaving chain - see
3369 the documentation of vect_permute_load_chain()).
3370 The generation of permutation stmts and recording them in
3371 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3373 In case of both multiple types and interleaving, the vector loads and
3374 permutation stmts above are created for every copy. The result vector stmts
3375 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3376 STMT_VINFO_RELATED_STMT for the next copies. */
3378 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3379 on a target that supports unaligned accesses (dr_unaligned_supported)
3380 we generate the following code:
3384 p = p + indx * vectype_size;
3389 Otherwise, the data reference is potentially unaligned on a target that
3390 does not support unaligned accesses (dr_explicit_realign_optimized) -
3391 then generate the following code, in which the data in each iteration is
3392 obtained by two vector loads, one from the previous iteration, and one
3393 from the current iteration:
3395 msq_init = *(floor(p1))
3396 p2 = initial_addr + VS - 1;
3397 realignment_token = call target_builtin;
3400 p2 = p2 + indx * vectype_size
3402 vec_dest = realign_load (msq, lsq, realignment_token)
3407 /* If the misalignment remains the same throughout the execution of the
3408 loop, we can create the init_addr and permutation mask at the loop
3409 preheader. Otherwise, it needs to be created inside the loop.
3410 This can only occur when vectorizing memory accesses in the inner-loop
3411 nested within an outer-loop that is being vectorized. */
3413 if (nested_in_vect_loop_p (loop, stmt)
3414 && (TREE_INT_CST_LOW (DR_STEP (dr))
3415 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3417 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3418 compute_in_loop = true;
3421 if ((alignment_support_scheme == dr_explicit_realign_optimized
3422 || alignment_support_scheme == dr_explicit_realign)
3423 && !compute_in_loop)
3425 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3426 alignment_support_scheme, NULL_TREE,
3428 if (alignment_support_scheme == dr_explicit_realign_optimized)
3430 phi = SSA_NAME_DEF_STMT (msq);
3431 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3437 prev_stmt_info = NULL;
3438 for (j = 0; j < ncopies; j++)
3440 /* 1. Create the vector pointer update chain. */
3442 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3444 &dummy, &ptr_incr, false,
3448 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3450 for (i = 0; i < vec_num; i++)
3453 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3456 /* 2. Create the vector-load in the loop. */
3457 switch (alignment_support_scheme)
3460 gcc_assert (aligned_access_p (first_dr));
3461 data_ref = build_fold_indirect_ref (dataref_ptr);
3463 case dr_unaligned_supported:
3465 int mis = DR_MISALIGNMENT (first_dr);
3466 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3468 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3470 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3473 case dr_explicit_realign:
3476 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3478 if (compute_in_loop)
3479 msq = vect_setup_realignment (first_stmt, gsi,
3481 dr_explicit_realign,
3484 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3485 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3486 new_stmt = gimple_build_assign (vec_dest, data_ref);
3487 new_temp = make_ssa_name (vec_dest, new_stmt);
3488 gimple_assign_set_lhs (new_stmt, new_temp);
3489 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3490 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3494 bump = size_binop (MULT_EXPR, vs_minus_1,
3495 TYPE_SIZE_UNIT (scalar_type));
3496 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3497 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3500 case dr_explicit_realign_optimized:
3501 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3506 /* If accesses through a pointer to vectype do not alias the original
3507 memory reference we have a problem. This should never happen. */
3508 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3509 get_alias_set (gimple_assign_rhs1 (stmt))));
3510 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3511 new_stmt = gimple_build_assign (vec_dest, data_ref);
3512 new_temp = make_ssa_name (vec_dest, new_stmt);
3513 gimple_assign_set_lhs (new_stmt, new_temp);
3514 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3515 mark_symbols_for_renaming (new_stmt);
3517 /* 3. Handle explicit realignment if necessary/supported. Create in
3518 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3519 if (alignment_support_scheme == dr_explicit_realign_optimized
3520 || alignment_support_scheme == dr_explicit_realign)
3524 lsq = gimple_assign_lhs (new_stmt);
3525 if (!realignment_token)
3526 realignment_token = dataref_ptr;
3527 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3528 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3530 new_stmt = gimple_build_assign (vec_dest, tmp);
3531 new_temp = make_ssa_name (vec_dest, new_stmt);
3532 gimple_assign_set_lhs (new_stmt, new_temp);
3533 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3535 if (alignment_support_scheme == dr_explicit_realign_optimized)
3538 if (i == vec_num - 1 && j == ncopies - 1)
3539 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
3544 /* 4. Handle invariant-load. */
3547 gcc_assert (!strided_load);
3548 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3553 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3555 /* CHECKME: bitpos depends on endianess? */
3556 bitpos = bitsize_zero_node;
3557 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3560 vect_create_destination_var (scalar_dest, NULL_TREE);
3561 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3562 new_temp = make_ssa_name (vec_dest, new_stmt);
3563 gimple_assign_set_lhs (new_stmt, new_temp);
3564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3566 for (k = nunits - 1; k >= 0; --k)
3567 t = tree_cons (NULL_TREE, new_temp, t);
3568 /* FIXME: use build_constructor directly. */
3569 vec_inv = build_constructor_from_list (vectype, t);
3570 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3571 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3574 gcc_unreachable (); /* FORNOW. */
3577 /* Collect vector loads and later create their permutation in
3578 vect_transform_strided_load (). */
3579 if (strided_load || slp_perm)
3580 VEC_quick_push (tree, dr_chain, new_temp);
3582 /* Store vector loads in the corresponding SLP_NODE. */
3583 if (slp && !slp_perm)
3584 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3587 if (slp && !slp_perm)
3592 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi,
3593 LOOP_VINFO_VECT_FACTOR (loop_vinfo),
3594 slp_node_instance, false))
3596 VEC_free (tree, heap, dr_chain);
3604 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3607 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3608 VEC_free (tree, heap, dr_chain);
3609 dr_chain = VEC_alloc (tree, heap, group_size);
3614 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3616 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3617 prev_stmt_info = vinfo_for_stmt (new_stmt);
3623 VEC_free (tree, heap, dr_chain);
3628 /* Function vect_is_simple_cond.
3631 LOOP - the loop that is being vectorized.
3632 COND - Condition that is checked for simple use.
3634 Returns whether a COND can be vectorized. Checks whether
3635 condition operands are supportable using vec_is_simple_use. */
3638 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3642 enum vect_def_type dt;
3644 if (!COMPARISON_CLASS_P (cond))
3647 lhs = TREE_OPERAND (cond, 0);
3648 rhs = TREE_OPERAND (cond, 1);
3650 if (TREE_CODE (lhs) == SSA_NAME)
3652 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3653 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
3656 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3657 && TREE_CODE (lhs) != FIXED_CST)
3660 if (TREE_CODE (rhs) == SSA_NAME)
3662 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3663 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
3666 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3667 && TREE_CODE (rhs) != FIXED_CST)
3673 /* vectorizable_condition.
3675 Check if STMT is conditional modify expression that can be vectorized.
3676 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3677 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3680 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3683 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3686 tree scalar_dest = NULL_TREE;
3687 tree vec_dest = NULL_TREE;
3688 tree op = NULL_TREE;
3689 tree cond_expr, then_clause, else_clause;
3690 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3691 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3692 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3693 tree vec_compare, vec_cond_expr;
3695 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3696 enum machine_mode vec_mode;
3698 enum vect_def_type dt;
3699 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3700 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3701 enum tree_code code;
3703 gcc_assert (ncopies >= 1);
3705 return false; /* FORNOW */
3707 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3710 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3713 /* FORNOW: SLP not supported. */
3714 if (STMT_SLP_TYPE (stmt_info))
3717 /* FORNOW: not yet supported. */
3718 if (STMT_VINFO_LIVE_P (stmt_info))
3720 if (vect_print_dump_info (REPORT_DETAILS))
3721 fprintf (vect_dump, "value used after loop.");
3725 /* Is vectorizable conditional operation? */
3726 if (!is_gimple_assign (stmt))
3729 code = gimple_assign_rhs_code (stmt);
3731 if (code != COND_EXPR)
3734 gcc_assert (gimple_assign_single_p (stmt));
3735 op = gimple_assign_rhs1 (stmt);
3736 cond_expr = TREE_OPERAND (op, 0);
3737 then_clause = TREE_OPERAND (op, 1);
3738 else_clause = TREE_OPERAND (op, 2);
3740 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3743 /* We do not handle two different vector types for the condition
3745 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3748 if (TREE_CODE (then_clause) == SSA_NAME)
3750 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3751 if (!vect_is_simple_use (then_clause, loop_vinfo,
3752 &then_def_stmt, &def, &dt))
3755 else if (TREE_CODE (then_clause) != INTEGER_CST
3756 && TREE_CODE (then_clause) != REAL_CST
3757 && TREE_CODE (then_clause) != FIXED_CST)
3760 if (TREE_CODE (else_clause) == SSA_NAME)
3762 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3763 if (!vect_is_simple_use (else_clause, loop_vinfo,
3764 &else_def_stmt, &def, &dt))
3767 else if (TREE_CODE (else_clause) != INTEGER_CST
3768 && TREE_CODE (else_clause) != REAL_CST
3769 && TREE_CODE (else_clause) != FIXED_CST)
3773 vec_mode = TYPE_MODE (vectype);
3777 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3778 return expand_vec_cond_expr_p (op, vec_mode);
3784 scalar_dest = gimple_assign_lhs (stmt);
3785 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3787 /* Handle cond expr. */
3789 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3791 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3792 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3793 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3795 /* Arguments are ready. Create the new vector stmt. */
3796 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3797 vec_cond_lhs, vec_cond_rhs);
3798 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3799 vec_compare, vec_then_clause, vec_else_clause);
3801 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3802 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3803 gimple_assign_set_lhs (*vec_stmt, new_temp);
3804 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3810 /* Make sure the statement is vectorizable. */
3813 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize)
3815 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3816 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3819 if (vect_print_dump_info (REPORT_DETAILS))
3821 fprintf (vect_dump, "==> examining statement: ");
3822 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3825 /* Skip stmts that do not need to be vectorized. In loops this is expected
3827 - the COND_EXPR which is the loop exit condition
3828 - any LABEL_EXPRs in the loop
3829 - computations that are used only for array indexing or loop control.
3830 In basic blocks we only analyze statements that are a part of some SLP
3831 instance, therefore, all the statements are relevant. */
3833 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3834 && !STMT_VINFO_LIVE_P (stmt_info))
3836 if (vect_print_dump_info (REPORT_DETAILS))
3837 fprintf (vect_dump, "irrelevant.");
3842 switch (STMT_VINFO_DEF_TYPE (stmt_info))
3844 case vect_internal_def:
3847 case vect_reduction_def:
3848 gcc_assert (relevance == vect_used_in_outer
3849 || relevance == vect_used_in_outer_by_reduction
3850 || relevance == vect_unused_in_scope);
3853 case vect_induction_def:
3854 case vect_constant_def:
3855 case vect_external_def:
3856 case vect_unknown_def_type:
3861 if (STMT_VINFO_RELEVANT_P (stmt_info))
3863 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
3864 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
3865 *need_to_vectorize = true;
3869 if (STMT_VINFO_RELEVANT_P (stmt_info)
3870 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
3871 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
3872 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
3873 || vectorizable_conversion (stmt, NULL, NULL, NULL)
3874 || vectorizable_operation (stmt, NULL, NULL, NULL)
3875 || vectorizable_assignment (stmt, NULL, NULL, NULL)
3876 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
3877 || vectorizable_call (stmt, NULL, NULL)
3878 || vectorizable_store (stmt, NULL, NULL, NULL)
3879 || vectorizable_condition (stmt, NULL, NULL)
3880 || vectorizable_reduction (stmt, NULL, NULL));
3884 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3886 fprintf (vect_dump, "not vectorized: relevant stmt not ");
3887 fprintf (vect_dump, "supported: ");
3888 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3894 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
3895 need extra handling, except for vectorizable reductions. */
3896 if (STMT_VINFO_LIVE_P (stmt_info)
3897 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
3898 ok = vectorizable_live_operation (stmt, NULL, NULL);
3902 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3904 fprintf (vect_dump, "not vectorized: live stmt not ");
3905 fprintf (vect_dump, "supported: ");
3906 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3912 if (!PURE_SLP_STMT (stmt_info))
3914 /* Groups of strided accesses whose size is not a power of 2 are not
3915 vectorizable yet using loop-vectorization. Therefore, if this stmt
3916 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
3917 loop-based vectorized), the loop cannot be vectorized. */
3918 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
3919 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
3920 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
3922 if (vect_print_dump_info (REPORT_DETAILS))
3924 fprintf (vect_dump, "not vectorized: the size of group "
3925 "of strided accesses is not a power of 2");
3926 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3937 /* Function vect_transform_stmt.
3939 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3942 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
3943 bool *strided_store, slp_tree slp_node,
3944 slp_instance slp_node_instance)
3946 bool is_store = false;
3947 gimple vec_stmt = NULL;
3948 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3949 gimple orig_stmt_in_pattern;
3951 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3952 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3954 switch (STMT_VINFO_TYPE (stmt_info))
3956 case type_demotion_vec_info_type:
3957 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
3961 case type_promotion_vec_info_type:
3962 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
3966 case type_conversion_vec_info_type:
3967 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
3971 case induc_vec_info_type:
3972 gcc_assert (!slp_node);
3973 done = vectorizable_induction (stmt, gsi, &vec_stmt);
3977 case op_vec_info_type:
3978 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
3982 case assignment_vec_info_type:
3983 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
3987 case load_vec_info_type:
3988 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
3993 case store_vec_info_type:
3994 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
3996 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
3998 /* In case of interleaving, the whole chain is vectorized when the
3999 last store in the chain is reached. Store stmts before the last
4000 one are skipped, and there vec_stmt_info shouldn't be freed
4002 *strided_store = true;
4003 if (STMT_VINFO_VEC_STMT (stmt_info))
4010 case condition_vec_info_type:
4011 gcc_assert (!slp_node);
4012 done = vectorizable_condition (stmt, gsi, &vec_stmt);
4016 case call_vec_info_type:
4017 gcc_assert (!slp_node);
4018 done = vectorizable_call (stmt, gsi, &vec_stmt);
4021 case reduc_vec_info_type:
4022 gcc_assert (!slp_node);
4023 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4028 if (!STMT_VINFO_LIVE_P (stmt_info))
4030 if (vect_print_dump_info (REPORT_DETAILS))
4031 fprintf (vect_dump, "stmt not supported.");
4036 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4037 is being vectorized, but outside the immediately enclosing loop. */
4039 && nested_in_vect_loop_p (loop, stmt)
4040 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4041 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4042 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer_by_reduction))
4044 struct loop *innerloop = loop->inner;
4045 imm_use_iterator imm_iter;
4046 use_operand_p use_p;
4050 if (vect_print_dump_info (REPORT_DETAILS))
4051 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4053 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4054 (to be used when vectorizing outer-loop stmts that use the DEF of
4056 if (gimple_code (stmt) == GIMPLE_PHI)
4057 scalar_dest = PHI_RESULT (stmt);
4059 scalar_dest = gimple_assign_lhs (stmt);
4061 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4063 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4065 exit_phi = USE_STMT (use_p);
4066 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4071 /* Handle stmts whose DEF is used outside the loop-nest that is
4072 being vectorized. */
4073 if (STMT_VINFO_LIVE_P (stmt_info)
4074 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4076 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4082 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4083 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4084 if (orig_stmt_in_pattern)
4086 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4087 /* STMT was inserted by the vectorizer to replace a computation idiom.
4088 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4089 computed this idiom. We need to record a pointer to VEC_STMT in
4090 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4091 documentation of vect_pattern_recog. */
4092 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4094 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4095 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4104 /* Remove a group of stores (for SLP or interleaving), free their
4108 vect_remove_stores (gimple first_stmt)
4110 gimple next = first_stmt;
4112 gimple_stmt_iterator next_si;
4116 /* Free the attached stmt_vec_info and remove the stmt. */
4117 next_si = gsi_for_stmt (next);
4118 gsi_remove (&next_si, true);
4119 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4120 free_stmt_vec_info (next);
4126 /* Function new_stmt_vec_info.
4128 Create and initialize a new stmt_vec_info struct for STMT. */
4131 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo)
4134 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4136 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4137 STMT_VINFO_STMT (res) = stmt;
4138 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4139 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4140 STMT_VINFO_LIVE_P (res) = false;
4141 STMT_VINFO_VECTYPE (res) = NULL;
4142 STMT_VINFO_VEC_STMT (res) = NULL;
4143 STMT_VINFO_IN_PATTERN_P (res) = false;
4144 STMT_VINFO_RELATED_STMT (res) = NULL;
4145 STMT_VINFO_DATA_REF (res) = NULL;
4147 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4148 STMT_VINFO_DR_OFFSET (res) = NULL;
4149 STMT_VINFO_DR_INIT (res) = NULL;
4150 STMT_VINFO_DR_STEP (res) = NULL;
4151 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4153 if (gimple_code (stmt) == GIMPLE_PHI
4154 && is_loop_header_bb_p (gimple_bb (stmt)))
4155 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4157 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4159 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4160 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4161 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4162 STMT_SLP_TYPE (res) = loop_vect;
4163 DR_GROUP_FIRST_DR (res) = NULL;
4164 DR_GROUP_NEXT_DR (res) = NULL;
4165 DR_GROUP_SIZE (res) = 0;
4166 DR_GROUP_STORE_COUNT (res) = 0;
4167 DR_GROUP_GAP (res) = 0;
4168 DR_GROUP_SAME_DR_STMT (res) = NULL;
4169 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4175 /* Create a hash table for stmt_vec_info. */
4178 init_stmt_vec_info_vec (void)
4180 gcc_assert (!stmt_vec_info_vec);
4181 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4185 /* Free hash table for stmt_vec_info. */
4188 free_stmt_vec_info_vec (void)
4190 gcc_assert (stmt_vec_info_vec);
4191 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4195 /* Free stmt vectorization related info. */
4198 free_stmt_vec_info (gimple stmt)
4200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4205 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4206 set_vinfo_for_stmt (stmt, NULL);
4211 /* Function get_vectype_for_scalar_type.
4213 Returns the vector type corresponding to SCALAR_TYPE as supported
4217 get_vectype_for_scalar_type (tree scalar_type)
4219 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4220 int nbytes = GET_MODE_SIZE (inner_mode);
4224 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4227 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4229 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4231 vectype = build_vector_type (scalar_type, nunits);
4232 if (vect_print_dump_info (REPORT_DETAILS))
4234 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4235 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4241 if (vect_print_dump_info (REPORT_DETAILS))
4243 fprintf (vect_dump, "vectype: ");
4244 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4247 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4248 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4250 if (vect_print_dump_info (REPORT_DETAILS))
4251 fprintf (vect_dump, "mode not supported by target.");
4258 /* Function vect_is_simple_use.
4261 LOOP - the loop that is being vectorized.
4262 OPERAND - operand of a stmt in LOOP.
4263 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4265 Returns whether a stmt with OPERAND can be vectorized.
4266 Supportable operands are constants, loop invariants, and operands that are
4267 defined by the current iteration of the loop. Unsupportable operands are
4268 those that are defined by a previous iteration of the loop (as is the case
4269 in reduction/induction computations). */
4272 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
4273 tree *def, enum vect_def_type *dt)
4276 stmt_vec_info stmt_vinfo;
4277 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4282 if (vect_print_dump_info (REPORT_DETAILS))
4284 fprintf (vect_dump, "vect_is_simple_use: operand ");
4285 print_generic_expr (vect_dump, operand, TDF_SLIM);
4288 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4290 *dt = vect_constant_def;
4293 if (is_gimple_min_invariant (operand))
4296 *dt = vect_external_def;
4300 if (TREE_CODE (operand) == PAREN_EXPR)
4302 if (vect_print_dump_info (REPORT_DETAILS))
4303 fprintf (vect_dump, "non-associatable copy.");
4304 operand = TREE_OPERAND (operand, 0);
4306 if (TREE_CODE (operand) != SSA_NAME)
4308 if (vect_print_dump_info (REPORT_DETAILS))
4309 fprintf (vect_dump, "not ssa-name.");
4313 *def_stmt = SSA_NAME_DEF_STMT (operand);
4314 if (*def_stmt == NULL)
4316 if (vect_print_dump_info (REPORT_DETAILS))
4317 fprintf (vect_dump, "no def_stmt.");
4321 if (vect_print_dump_info (REPORT_DETAILS))
4323 fprintf (vect_dump, "def_stmt: ");
4324 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4327 /* Empty stmt is expected only in case of a function argument.
4328 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4329 if (gimple_nop_p (*def_stmt))
4332 *dt = vect_external_def;
4336 bb = gimple_bb (*def_stmt);
4337 if (!flow_bb_inside_loop_p (loop, bb))
4338 *dt = vect_external_def;
4341 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4342 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4345 if (*dt == vect_unknown_def_type)
4347 if (vect_print_dump_info (REPORT_DETAILS))
4348 fprintf (vect_dump, "Unsupported pattern.");
4352 if (vect_print_dump_info (REPORT_DETAILS))
4353 fprintf (vect_dump, "type of def: %d.",*dt);
4355 switch (gimple_code (*def_stmt))
4358 *def = gimple_phi_result (*def_stmt);
4362 *def = gimple_assign_lhs (*def_stmt);
4366 *def = gimple_call_lhs (*def_stmt);
4371 if (vect_print_dump_info (REPORT_DETAILS))
4372 fprintf (vect_dump, "unsupported defining stmt: ");
4380 /* Function supportable_widening_operation
4382 Check whether an operation represented by the code CODE is a
4383 widening operation that is supported by the target platform in
4384 vector form (i.e., when operating on arguments of type VECTYPE).
4386 Widening operations we currently support are NOP (CONVERT), FLOAT
4387 and WIDEN_MULT. This function checks if these operations are supported
4388 by the target platform either directly (via vector tree-codes), or via
4392 - CODE1 and CODE2 are codes of vector operations to be used when
4393 vectorizing the operation, if available.
4394 - DECL1 and DECL2 are decls of target builtin functions to be used
4395 when vectorizing the operation, if available. In this case,
4396 CODE1 and CODE2 are CALL_EXPR.
4397 - MULTI_STEP_CVT determines the number of required intermediate steps in
4398 case of multi-step conversion (like char->short->int - in that case
4399 MULTI_STEP_CVT will be 1).
4400 - INTERM_TYPES contains the intermediate type required to perform the
4401 widening operation (short in the above example). */
4404 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4405 tree *decl1, tree *decl2,
4406 enum tree_code *code1, enum tree_code *code2,
4407 int *multi_step_cvt,
4408 VEC (tree, heap) **interm_types)
4410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4411 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4412 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4414 enum machine_mode vec_mode;
4415 enum insn_code icode1, icode2;
4416 optab optab1, optab2;
4417 tree type = gimple_expr_type (stmt);
4418 tree wide_vectype = get_vectype_for_scalar_type (type);
4419 enum tree_code c1, c2;
4421 /* The result of a vectorized widening operation usually requires two vectors
4422 (because the widened results do not fit int one vector). The generated
4423 vector results would normally be expected to be generated in the same
4424 order as in the original scalar computation, i.e. if 8 results are
4425 generated in each vector iteration, they are to be organized as follows:
4426 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4428 However, in the special case that the result of the widening operation is
4429 used in a reduction computation only, the order doesn't matter (because
4430 when vectorizing a reduction we change the order of the computation).
4431 Some targets can take advantage of this and generate more efficient code.
4432 For example, targets like Altivec, that support widen_mult using a sequence
4433 of {mult_even,mult_odd} generate the following vectors:
4434 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4436 When vectorizing outer-loops, we execute the inner-loop sequentially
4437 (each vectorized inner-loop iteration contributes to VF outer-loop
4438 iterations in parallel). We therefore don't allow to change the order
4439 of the computation in the inner-loop during outer-loop vectorization. */
4441 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4442 && !nested_in_vect_loop_p (vect_loop, stmt))
4448 && code == WIDEN_MULT_EXPR
4449 && targetm.vectorize.builtin_mul_widen_even
4450 && targetm.vectorize.builtin_mul_widen_even (vectype)
4451 && targetm.vectorize.builtin_mul_widen_odd
4452 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4454 if (vect_print_dump_info (REPORT_DETAILS))
4455 fprintf (vect_dump, "Unordered widening operation detected.");
4457 *code1 = *code2 = CALL_EXPR;
4458 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4459 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4465 case WIDEN_MULT_EXPR:
4466 if (BYTES_BIG_ENDIAN)
4468 c1 = VEC_WIDEN_MULT_HI_EXPR;
4469 c2 = VEC_WIDEN_MULT_LO_EXPR;
4473 c2 = VEC_WIDEN_MULT_HI_EXPR;
4474 c1 = VEC_WIDEN_MULT_LO_EXPR;
4479 if (BYTES_BIG_ENDIAN)
4481 c1 = VEC_UNPACK_HI_EXPR;
4482 c2 = VEC_UNPACK_LO_EXPR;
4486 c2 = VEC_UNPACK_HI_EXPR;
4487 c1 = VEC_UNPACK_LO_EXPR;
4492 if (BYTES_BIG_ENDIAN)
4494 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4495 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4499 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4500 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4504 case FIX_TRUNC_EXPR:
4505 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4506 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4507 computing the operation. */
4514 if (code == FIX_TRUNC_EXPR)
4516 /* The signedness is determined from output operand. */
4517 optab1 = optab_for_tree_code (c1, type, optab_default);
4518 optab2 = optab_for_tree_code (c2, type, optab_default);
4522 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4523 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4526 if (!optab1 || !optab2)
4529 vec_mode = TYPE_MODE (vectype);
4530 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4531 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4532 == CODE_FOR_nothing)
4535 /* Check if it's a multi-step conversion that can be done using intermediate
4537 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4538 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4541 tree prev_type = vectype, intermediate_type;
4542 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4543 optab optab3, optab4;
4545 if (!CONVERT_EXPR_CODE_P (code))
4551 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4552 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4553 to get to NARROW_VECTYPE, and fail if we do not. */
4554 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4555 for (i = 0; i < 3; i++)
4557 intermediate_mode = insn_data[icode1].operand[0].mode;
4558 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4559 TYPE_UNSIGNED (prev_type));
4560 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4561 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4563 if (!optab3 || !optab4
4564 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4566 || insn_data[icode1].operand[0].mode != intermediate_mode
4567 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4569 || insn_data[icode2].operand[0].mode != intermediate_mode
4570 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4572 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4573 == CODE_FOR_nothing)
4576 VEC_quick_push (tree, *interm_types, intermediate_type);
4577 (*multi_step_cvt)++;
4579 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4580 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4583 prev_type = intermediate_type;
4584 prev_mode = intermediate_mode;
4596 /* Function supportable_narrowing_operation
4598 Check whether an operation represented by the code CODE is a
4599 narrowing operation that is supported by the target platform in
4600 vector form (i.e., when operating on arguments of type VECTYPE).
4602 Narrowing operations we currently support are NOP (CONVERT) and
4603 FIX_TRUNC. This function checks if these operations are supported by
4604 the target platform directly via vector tree-codes.
4607 - CODE1 is the code of a vector operation to be used when
4608 vectorizing the operation, if available.
4609 - MULTI_STEP_CVT determines the number of required intermediate steps in
4610 case of multi-step conversion (like int->short->char - in that case
4611 MULTI_STEP_CVT will be 1).
4612 - INTERM_TYPES contains the intermediate type required to perform the
4613 narrowing operation (short in the above example). */
4616 supportable_narrowing_operation (enum tree_code code,
4617 const_gimple stmt, tree vectype,
4618 enum tree_code *code1, int *multi_step_cvt,
4619 VEC (tree, heap) **interm_types)
4621 enum machine_mode vec_mode;
4622 enum insn_code icode1;
4623 optab optab1, interm_optab;
4624 tree type = gimple_expr_type (stmt);
4625 tree narrow_vectype = get_vectype_for_scalar_type (type);
4627 tree intermediate_type, prev_type;
4633 c1 = VEC_PACK_TRUNC_EXPR;
4636 case FIX_TRUNC_EXPR:
4637 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4641 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4642 tree code and optabs used for computing the operation. */
4649 if (code == FIX_TRUNC_EXPR)
4650 /* The signedness is determined from output operand. */
4651 optab1 = optab_for_tree_code (c1, type, optab_default);
4653 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4658 vec_mode = TYPE_MODE (vectype);
4659 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4660 == CODE_FOR_nothing)
4663 /* Check if it's a multi-step conversion that can be done using intermediate
4665 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4667 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4670 prev_type = vectype;
4671 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4672 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4673 to get to NARROW_VECTYPE, and fail if we do not. */
4674 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4675 for (i = 0; i < 3; i++)
4677 intermediate_mode = insn_data[icode1].operand[0].mode;
4678 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4679 TYPE_UNSIGNED (prev_type));
4680 interm_optab = optab_for_tree_code (c1, intermediate_type,
4683 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4685 || insn_data[icode1].operand[0].mode != intermediate_mode
4687 = interm_optab->handlers[(int) intermediate_mode].insn_code)
4688 == CODE_FOR_nothing)
4691 VEC_quick_push (tree, *interm_types, intermediate_type);
4692 (*multi_step_cvt)++;
4694 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4697 prev_type = intermediate_type;
4698 prev_mode = intermediate_mode;