1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
46 /* Function vect_mark_relevant.
48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 enum vect_relevant relevant, bool live_p)
54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
58 if (vect_print_dump_info (REPORT_DETAILS))
59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
61 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
65 /* This is the last stmt in a sequence that was detected as a
66 pattern that can potentially be vectorized. Don't mark the stmt
67 as relevant/live because it's not going to be vectorized.
68 Instead mark the pattern-stmt that replaces it. */
70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
72 if (vect_print_dump_info (REPORT_DETAILS))
73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74 stmt_info = vinfo_for_stmt (pattern_stmt);
75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
81 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83 STMT_VINFO_RELEVANT (stmt_info) = relevant;
85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
88 if (vect_print_dump_info (REPORT_DETAILS))
89 fprintf (vect_dump, "already marked relevant/live.");
93 VEC_safe_push (gimple, heap, *worklist, stmt);
97 /* Function vect_stmt_relevant_p.
99 Return true if STMT in loop that is represented by LOOP_VINFO is
100 "relevant for vectorization".
102 A stmt is considered "relevant for vectorization" if:
103 - it has uses outside the loop.
104 - it has vdefs (it alters memory).
105 - control stmts in the loop (except for the exit condition).
107 CHECKME: what other side effects would the vectorizer allow? */
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 enum vect_relevant *relevant, bool *live_p)
113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
115 imm_use_iterator imm_iter;
119 *relevant = vect_unused_in_scope;
122 /* cond stmt other than loop exit cond. */
123 if (is_ctrl_stmt (stmt)
124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
125 != loop_exit_ctrl_vec_info_type)
126 *relevant = vect_used_in_scope;
128 /* changing memory. */
129 if (gimple_code (stmt) != GIMPLE_PHI)
130 if (gimple_vdef (stmt))
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
134 *relevant = vect_used_in_scope;
137 /* uses outside the loop. */
138 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
140 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
142 basic_block bb = gimple_bb (USE_STMT (use_p));
143 if (!flow_bb_inside_loop_p (loop, bb))
145 if (vect_print_dump_info (REPORT_DETAILS))
146 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
148 /* We expect all such uses to be in the loop exit phis
149 (because of loop closed form) */
150 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
151 gcc_assert (bb == single_exit (loop)->dest);
158 return (*live_p || *relevant);
162 /* Function exist_non_indexing_operands_for_use_p
164 USE is one of the uses attached to STMT. Check if USE is
165 used in STMT for anything other than indexing an array. */
168 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
173 /* USE corresponds to some operand in STMT. If there is no data
174 reference in STMT, then any operand that corresponds to USE
175 is not indexing an array. */
176 if (!STMT_VINFO_DATA_REF (stmt_info))
179 /* STMT has a data_ref. FORNOW this means that its of one of
183 (This should have been verified in analyze_data_refs).
185 'var' in the second case corresponds to a def, not a use,
186 so USE cannot correspond to any operands that are not used
189 Therefore, all we need to check is if STMT falls into the
190 first case, and whether var corresponds to USE. */
192 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
195 if (!gimple_assign_copy_p (stmt))
197 operand = gimple_assign_rhs1 (stmt);
199 if (TREE_CODE (operand) != SSA_NAME)
210 Function process_use.
213 - a USE in STMT in a loop represented by LOOP_VINFO
214 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
215 that defined USE. This is done by calling mark_relevant and passing it
216 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
219 Generally, LIVE_P and RELEVANT are used to define the liveness and
220 relevance info of the DEF_STMT of this USE:
221 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
222 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
224 - case 1: If USE is used only for address computations (e.g. array indexing),
225 which does not need to be directly vectorized, then the liveness/relevance
226 of the respective DEF_STMT is left unchanged.
227 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
228 skip DEF_STMT cause it had already been processed.
229 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
230 be modified accordingly.
232 Return true if everything is as expected. Return false otherwise. */
235 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
236 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
239 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
240 stmt_vec_info dstmt_vinfo;
241 basic_block bb, def_bb;
244 enum vect_def_type dt;
246 /* case 1: we are only interested in uses that need to be vectorized. Uses
247 that are used for address computation are not considered relevant. */
248 if (!exist_non_indexing_operands_for_use_p (use, stmt))
251 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
254 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
258 if (!def_stmt || gimple_nop_p (def_stmt))
261 def_bb = gimple_bb (def_stmt);
262 if (!flow_bb_inside_loop_p (loop, def_bb))
264 if (vect_print_dump_info (REPORT_DETAILS))
265 fprintf (vect_dump, "def_stmt is out of loop.");
269 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
270 DEF_STMT must have already been processed, because this should be the
271 only way that STMT, which is a reduction-phi, was put in the worklist,
272 as there should be no other uses for DEF_STMT in the loop. So we just
273 check that everything is as expected, and we are done. */
274 dstmt_vinfo = vinfo_for_stmt (def_stmt);
275 bb = gimple_bb (stmt);
276 if (gimple_code (stmt) == GIMPLE_PHI
277 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
278 && gimple_code (def_stmt) != GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
280 && bb->loop_father == def_bb->loop_father)
282 if (vect_print_dump_info (REPORT_DETAILS))
283 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
284 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
285 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
286 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
287 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
288 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
292 /* case 3a: outer-loop stmt defining an inner-loop stmt:
293 outer-loop-header-bb:
299 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
301 if (vect_print_dump_info (REPORT_DETAILS))
302 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
305 case vect_unused_in_scope:
306 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
307 vect_used_by_reduction : vect_unused_in_scope;
309 case vect_used_in_outer_by_reduction:
310 relevant = vect_used_by_reduction;
312 case vect_used_in_outer:
313 relevant = vect_used_in_scope;
315 case vect_used_by_reduction:
316 case vect_used_in_scope:
324 /* case 3b: inner-loop stmt defining an outer-loop stmt:
325 outer-loop-header-bb:
331 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
333 if (vect_print_dump_info (REPORT_DETAILS))
334 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
337 case vect_unused_in_scope:
338 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
339 vect_used_in_outer_by_reduction : vect_unused_in_scope;
342 case vect_used_in_outer_by_reduction:
343 case vect_used_in_outer:
346 case vect_used_by_reduction:
347 relevant = vect_used_in_outer_by_reduction;
350 case vect_used_in_scope:
351 relevant = vect_used_in_outer;
359 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
364 /* Function vect_mark_stmts_to_be_vectorized.
366 Not all stmts in the loop need to be vectorized. For example:
375 Stmt 1 and 3 do not need to be vectorized, because loop control and
376 addressing of vectorized data-refs are handled differently.
378 This pass detects such stmts. */
381 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
383 VEC(gimple,heap) *worklist;
384 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
385 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
386 unsigned int nbbs = loop->num_nodes;
387 gimple_stmt_iterator si;
390 stmt_vec_info stmt_vinfo;
394 enum vect_relevant relevant;
396 if (vect_print_dump_info (REPORT_DETAILS))
397 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
399 worklist = VEC_alloc (gimple, heap, 64);
401 /* 1. Init worklist. */
402 for (i = 0; i < nbbs; i++)
405 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
408 if (vect_print_dump_info (REPORT_DETAILS))
410 fprintf (vect_dump, "init: phi relevant? ");
411 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
414 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
415 vect_mark_relevant (&worklist, phi, relevant, live_p);
417 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
419 stmt = gsi_stmt (si);
420 if (vect_print_dump_info (REPORT_DETAILS))
422 fprintf (vect_dump, "init: stmt relevant? ");
423 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
426 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
427 vect_mark_relevant (&worklist, stmt, relevant, live_p);
431 /* 2. Process_worklist */
432 while (VEC_length (gimple, worklist) > 0)
437 stmt = VEC_pop (gimple, worklist);
438 if (vect_print_dump_info (REPORT_DETAILS))
440 fprintf (vect_dump, "worklist: examine stmt: ");
441 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
444 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
445 (DEF_STMT) as relevant/irrelevant and live/dead according to the
446 liveness and relevance properties of STMT. */
447 stmt_vinfo = vinfo_for_stmt (stmt);
448 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
449 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
451 /* Generally, the liveness and relevance properties of STMT are
452 propagated as is to the DEF_STMTs of its USEs:
453 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
454 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
456 One exception is when STMT has been identified as defining a reduction
457 variable; in this case we set the liveness/relevance as follows:
459 relevant = vect_used_by_reduction
460 This is because we distinguish between two kinds of relevant stmts -
461 those that are used by a reduction computation, and those that are
462 (also) used by a regular computation. This allows us later on to
463 identify stmts that are used solely by a reduction, and therefore the
464 order of the results that they produce does not have to be kept.
466 Reduction phis are expected to be used by a reduction stmt, or by
467 in an outer loop; Other reduction stmts are expected to be
468 in the loop, and possibly used by a stmt in an outer loop.
469 Here are the expected values of "relevant" for reduction phis/stmts:
472 vect_unused_in_scope ok
473 vect_used_in_outer_by_reduction ok ok
474 vect_used_in_outer ok ok
475 vect_used_by_reduction ok
476 vect_used_in_scope */
478 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
480 enum vect_relevant tmp_relevant = relevant;
481 switch (tmp_relevant)
483 case vect_unused_in_scope:
484 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
485 relevant = vect_used_by_reduction;
488 case vect_used_in_outer_by_reduction:
489 case vect_used_in_outer:
490 gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
491 || (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
492 && (gimple_assign_rhs_code (stmt)
496 case vect_used_by_reduction:
497 if (gimple_code (stmt) == GIMPLE_PHI)
500 case vect_used_in_scope:
502 if (vect_print_dump_info (REPORT_DETAILS))
503 fprintf (vect_dump, "unsupported use of reduction.");
504 VEC_free (gimple, heap, worklist);
510 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
512 tree op = USE_FROM_PTR (use_p);
513 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
515 VEC_free (gimple, heap, worklist);
519 } /* while worklist */
521 VEC_free (gimple, heap, worklist);
527 cost_for_stmt (gimple stmt)
529 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
531 switch (STMT_VINFO_TYPE (stmt_info))
533 case load_vec_info_type:
534 return TARG_SCALAR_LOAD_COST;
535 case store_vec_info_type:
536 return TARG_SCALAR_STORE_COST;
537 case op_vec_info_type:
538 case condition_vec_info_type:
539 case assignment_vec_info_type:
540 case reduc_vec_info_type:
541 case induc_vec_info_type:
542 case type_promotion_vec_info_type:
543 case type_demotion_vec_info_type:
544 case type_conversion_vec_info_type:
545 case call_vec_info_type:
546 return TARG_SCALAR_STMT_COST;
547 case undef_vec_info_type:
553 /* Function vect_model_simple_cost.
555 Models cost for simple operations, i.e. those that only emit ncopies of a
556 single op. Right now, this does not account for multiple insns that could
557 be generated for the single vector op. We will handle that shortly. */
560 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
561 enum vect_def_type *dt, slp_tree slp_node)
564 int inside_cost = 0, outside_cost = 0;
566 /* The SLP costs were already calculated during SLP tree build. */
567 if (PURE_SLP_STMT (stmt_info))
570 inside_cost = ncopies * TARG_VEC_STMT_COST;
572 /* FORNOW: Assuming maximum 2 args per stmts. */
573 for (i = 0; i < 2; i++)
575 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
576 outside_cost += TARG_SCALAR_TO_VEC_COST;
579 if (vect_print_dump_info (REPORT_COST))
580 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
581 "outside_cost = %d .", inside_cost, outside_cost);
583 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
584 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
585 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
589 /* Function vect_cost_strided_group_size
591 For strided load or store, return the group_size only if it is the first
592 load or store of a group, else return 1. This ensures that group size is
593 only returned once per group. */
596 vect_cost_strided_group_size (stmt_vec_info stmt_info)
598 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
600 if (first_stmt == STMT_VINFO_STMT (stmt_info))
601 return DR_GROUP_SIZE (stmt_info);
607 /* Function vect_model_store_cost
609 Models cost for stores. In the case of strided accesses, one access
610 has the overhead of the strided access attributed to it. */
613 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
614 enum vect_def_type dt, slp_tree slp_node)
617 int inside_cost = 0, outside_cost = 0;
619 /* The SLP costs were already calculated during SLP tree build. */
620 if (PURE_SLP_STMT (stmt_info))
623 if (dt == vect_constant_def || dt == vect_external_def)
624 outside_cost = TARG_SCALAR_TO_VEC_COST;
626 /* Strided access? */
627 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
628 group_size = vect_cost_strided_group_size (stmt_info);
629 /* Not a strided access. */
633 /* Is this an access in a group of stores, which provide strided access?
634 If so, add in the cost of the permutes. */
637 /* Uses a high and low interleave operation for each needed permute. */
638 inside_cost = ncopies * exact_log2(group_size) * group_size
639 * TARG_VEC_STMT_COST;
641 if (vect_print_dump_info (REPORT_COST))
642 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
647 /* Costs of the stores. */
648 inside_cost += ncopies * TARG_VEC_STORE_COST;
650 if (vect_print_dump_info (REPORT_COST))
651 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
652 "outside_cost = %d .", inside_cost, outside_cost);
654 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
655 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
656 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
660 /* Function vect_model_load_cost
662 Models cost for loads. In the case of strided accesses, the last access
663 has the overhead of the strided access attributed to it. Since unaligned
664 accesses are supported for loads, we also account for the costs of the
665 access scheme chosen. */
668 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
672 int alignment_support_cheme;
674 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
675 int inside_cost = 0, outside_cost = 0;
677 /* The SLP costs were already calculated during SLP tree build. */
678 if (PURE_SLP_STMT (stmt_info))
681 /* Strided accesses? */
682 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
683 if (first_stmt && !slp_node)
685 group_size = vect_cost_strided_group_size (stmt_info);
686 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
688 /* Not a strided access. */
695 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
697 /* Is this an access in a group of loads providing strided access?
698 If so, add in the cost of the permutes. */
701 /* Uses an even and odd extract operations for each needed permute. */
702 inside_cost = ncopies * exact_log2(group_size) * group_size
703 * TARG_VEC_STMT_COST;
705 if (vect_print_dump_info (REPORT_COST))
706 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
711 /* The loads themselves. */
712 switch (alignment_support_cheme)
716 inside_cost += ncopies * TARG_VEC_LOAD_COST;
718 if (vect_print_dump_info (REPORT_COST))
719 fprintf (vect_dump, "vect_model_load_cost: aligned.");
723 case dr_unaligned_supported:
725 /* Here, we assign an additional cost for the unaligned load. */
726 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
728 if (vect_print_dump_info (REPORT_COST))
729 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
734 case dr_explicit_realign:
736 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
738 /* FIXME: If the misalignment remains fixed across the iterations of
739 the containing loop, the following cost should be added to the
741 if (targetm.vectorize.builtin_mask_for_load)
742 inside_cost += TARG_VEC_STMT_COST;
746 case dr_explicit_realign_optimized:
748 if (vect_print_dump_info (REPORT_COST))
749 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
752 /* Unaligned software pipeline has a load of an address, an initial
753 load, and possibly a mask operation to "prime" the loop. However,
754 if this is an access in a group of loads, which provide strided
755 access, then the above cost should only be considered for one
756 access in the group. Inside the loop, there is a load op
757 and a realignment op. */
759 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
761 outside_cost = 2*TARG_VEC_STMT_COST;
762 if (targetm.vectorize.builtin_mask_for_load)
763 outside_cost += TARG_VEC_STMT_COST;
766 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
775 if (vect_print_dump_info (REPORT_COST))
776 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
777 "outside_cost = %d .", inside_cost, outside_cost);
779 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
780 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
781 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
785 /* Function vect_init_vector.
787 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
788 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
789 is not NULL. Otherwise, place the initialization at the loop preheader.
790 Return the DEF of INIT_STMT.
791 It will be used in the vectorization of STMT. */
794 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
795 gimple_stmt_iterator *gsi)
797 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
805 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
806 add_referenced_var (new_var);
807 init_stmt = gimple_build_assign (new_var, vector_var);
808 new_temp = make_ssa_name (new_var, init_stmt);
809 gimple_assign_set_lhs (init_stmt, new_temp);
812 vect_finish_stmt_generation (stmt, init_stmt, gsi);
815 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
816 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
818 if (nested_in_vect_loop_p (loop, stmt))
820 pe = loop_preheader_edge (loop);
821 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
822 gcc_assert (!new_bb);
825 if (vect_print_dump_info (REPORT_DETAILS))
827 fprintf (vect_dump, "created new init_stmt: ");
828 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
831 vec_oprnd = gimple_assign_lhs (init_stmt);
835 /* Function vect_get_vec_def_for_operand.
837 OP is an operand in STMT. This function returns a (vector) def that will be
838 used in the vectorized stmt for STMT.
840 In the case that OP is an SSA_NAME which is defined in the loop, then
841 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
843 In case OP is an invariant or constant, a new stmt that creates a vector def
844 needs to be introduced. */
847 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
852 stmt_vec_info def_stmt_info = NULL;
853 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
854 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
855 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
862 enum vect_def_type dt;
866 if (vect_print_dump_info (REPORT_DETAILS))
868 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
869 print_generic_expr (vect_dump, op, TDF_SLIM);
872 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
873 gcc_assert (is_simple_use);
874 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "def = ");
879 print_generic_expr (vect_dump, def, TDF_SLIM);
883 fprintf (vect_dump, " def_stmt = ");
884 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
890 /* Case 1: operand is a constant. */
891 case vect_constant_def:
896 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
897 if (vect_print_dump_info (REPORT_DETAILS))
898 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
900 for (i = nunits - 1; i >= 0; --i)
902 t = tree_cons (NULL_TREE, op, t);
904 vec_cst = build_vector (vectype, t);
905 return vect_init_vector (stmt, vec_cst, vectype, NULL);
908 /* Case 2: operand is defined outside the loop - loop invariant. */
909 case vect_external_def:
911 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
912 gcc_assert (vector_type);
913 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
918 /* Create 'vec_inv = {inv,inv,..,inv}' */
919 if (vect_print_dump_info (REPORT_DETAILS))
920 fprintf (vect_dump, "Create vector_inv.");
922 for (i = nunits - 1; i >= 0; --i)
924 t = tree_cons (NULL_TREE, def, t);
927 /* FIXME: use build_constructor directly. */
928 vec_inv = build_constructor_from_list (vector_type, t);
929 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
932 /* Case 3: operand is defined inside the loop. */
933 case vect_internal_def:
936 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
938 /* Get the def from the vectorized stmt. */
939 def_stmt_info = vinfo_for_stmt (def_stmt);
940 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
941 gcc_assert (vec_stmt);
942 if (gimple_code (vec_stmt) == GIMPLE_PHI)
943 vec_oprnd = PHI_RESULT (vec_stmt);
944 else if (is_gimple_call (vec_stmt))
945 vec_oprnd = gimple_call_lhs (vec_stmt);
947 vec_oprnd = gimple_assign_lhs (vec_stmt);
951 /* Case 4: operand is defined by a loop header phi - reduction */
952 case vect_reduction_def:
956 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
957 loop = (gimple_bb (def_stmt))->loop_father;
959 /* Get the def before the loop */
960 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
961 return get_initial_def_for_reduction (stmt, op, scalar_def);
964 /* Case 5: operand is defined by loop-header phi - induction. */
965 case vect_induction_def:
967 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
969 /* Get the def from the vectorized stmt. */
970 def_stmt_info = vinfo_for_stmt (def_stmt);
971 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
972 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
973 vec_oprnd = PHI_RESULT (vec_stmt);
983 /* Function vect_get_vec_def_for_stmt_copy
985 Return a vector-def for an operand. This function is used when the
986 vectorized stmt to be created (by the caller to this function) is a "copy"
987 created in case the vectorized result cannot fit in one vector, and several
988 copies of the vector-stmt are required. In this case the vector-def is
989 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
990 of the stmt that defines VEC_OPRND.
991 DT is the type of the vector def VEC_OPRND.
994 In case the vectorization factor (VF) is bigger than the number
995 of elements that can fit in a vectype (nunits), we have to generate
996 more than one vector stmt to vectorize the scalar stmt. This situation
997 arises when there are multiple data-types operated upon in the loop; the
998 smallest data-type determines the VF, and as a result, when vectorizing
999 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1000 vector stmt (each computing a vector of 'nunits' results, and together
1001 computing 'VF' results in each iteration). This function is called when
1002 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1003 which VF=16 and nunits=4, so the number of copies required is 4):
1005 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1007 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1008 VS1.1: vx.1 = memref1 VS1.2
1009 VS1.2: vx.2 = memref2 VS1.3
1010 VS1.3: vx.3 = memref3
1012 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1013 VSnew.1: vz1 = vx.1 + ... VSnew.2
1014 VSnew.2: vz2 = vx.2 + ... VSnew.3
1015 VSnew.3: vz3 = vx.3 + ...
1017 The vectorization of S1 is explained in vectorizable_load.
1018 The vectorization of S2:
1019 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1020 the function 'vect_get_vec_def_for_operand' is called to
1021 get the relevant vector-def for each operand of S2. For operand x it
1022 returns the vector-def 'vx.0'.
1024 To create the remaining copies of the vector-stmt (VSnew.j), this
1025 function is called to get the relevant vector-def for each operand. It is
1026 obtained from the respective VS1.j stmt, which is recorded in the
1027 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1029 For example, to obtain the vector-def 'vx.1' in order to create the
1030 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1031 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1032 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1033 and return its def ('vx.1').
1034 Overall, to create the above sequence this function will be called 3 times:
1035 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1036 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1037 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1040 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1042 gimple vec_stmt_for_operand;
1043 stmt_vec_info def_stmt_info;
1045 /* Do nothing; can reuse same def. */
1046 if (dt == vect_external_def || dt == vect_constant_def )
1049 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1050 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1051 gcc_assert (def_stmt_info);
1052 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1053 gcc_assert (vec_stmt_for_operand);
1054 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1055 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1056 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1058 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1063 /* Get vectorized definitions for the operands to create a copy of an original
1064 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1067 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1068 VEC(tree,heap) **vec_oprnds0,
1069 VEC(tree,heap) **vec_oprnds1)
1071 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1073 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1074 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1076 if (vec_oprnds1 && *vec_oprnds1)
1078 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1079 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1080 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1085 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1088 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1089 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1093 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1098 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1099 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1100 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1104 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1105 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1106 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1112 /* Function vect_finish_stmt_generation.
1114 Insert a new stmt. */
1117 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1118 gimple_stmt_iterator *gsi)
1120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1121 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1123 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1125 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1127 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo));
1129 if (vect_print_dump_info (REPORT_DETAILS))
1131 fprintf (vect_dump, "add new stmt: ");
1132 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1135 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1138 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1139 a function declaration if the target has a vectorized version
1140 of the function, or NULL_TREE if the function cannot be vectorized. */
1143 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1145 tree fndecl = gimple_call_fndecl (call);
1146 enum built_in_function code;
1148 /* We only handle functions that do not read or clobber memory -- i.e.
1149 const or novops ones. */
1150 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1154 || TREE_CODE (fndecl) != FUNCTION_DECL
1155 || !DECL_BUILT_IN (fndecl))
1158 code = DECL_FUNCTION_CODE (fndecl);
1159 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1163 /* Function vectorizable_call.
1165 Check if STMT performs a function call that can be vectorized.
1166 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1167 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1168 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1171 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1176 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1177 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1178 tree vectype_out, vectype_in;
1181 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1182 tree fndecl, new_temp, def, rhs_type, lhs_type;
1184 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1187 VEC(tree, heap) *vargs = NULL;
1188 enum { NARROW, NONE, WIDEN } modifier;
1191 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1194 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1197 /* FORNOW: SLP not supported. */
1198 if (STMT_SLP_TYPE (stmt_info))
1201 /* Is STMT a vectorizable call? */
1202 if (!is_gimple_call (stmt))
1205 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1208 /* Process function arguments. */
1209 rhs_type = NULL_TREE;
1210 nargs = gimple_call_num_args (stmt);
1212 /* Bail out if the function has more than two arguments, we
1213 do not have interesting builtin functions to vectorize with
1214 more than two arguments. No arguments is also not good. */
1215 if (nargs == 0 || nargs > 2)
1218 for (i = 0; i < nargs; i++)
1220 op = gimple_call_arg (stmt, i);
1222 /* We can only handle calls with arguments of the same type. */
1224 && rhs_type != TREE_TYPE (op))
1226 if (vect_print_dump_info (REPORT_DETAILS))
1227 fprintf (vect_dump, "argument types differ.");
1230 rhs_type = TREE_TYPE (op);
1232 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[i]))
1234 if (vect_print_dump_info (REPORT_DETAILS))
1235 fprintf (vect_dump, "use not simple.");
1240 vectype_in = get_vectype_for_scalar_type (rhs_type);
1243 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1245 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1246 vectype_out = get_vectype_for_scalar_type (lhs_type);
1249 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1252 if (nunits_in == nunits_out / 2)
1254 else if (nunits_out == nunits_in)
1256 else if (nunits_out == nunits_in / 2)
1261 /* For now, we only vectorize functions if a target specific builtin
1262 is available. TODO -- in some cases, it might be profitable to
1263 insert the calls for pieces of the vector, in order to be able
1264 to vectorize other operations in the loop. */
1265 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1266 if (fndecl == NULL_TREE)
1268 if (vect_print_dump_info (REPORT_DETAILS))
1269 fprintf (vect_dump, "function is not vectorizable.");
1274 gcc_assert (!gimple_vuse (stmt));
1276 if (modifier == NARROW)
1277 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1279 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1281 /* Sanity check: make sure that at least one copy of the vectorized stmt
1282 needs to be generated. */
1283 gcc_assert (ncopies >= 1);
1285 if (!vec_stmt) /* transformation not required. */
1287 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1288 if (vect_print_dump_info (REPORT_DETAILS))
1289 fprintf (vect_dump, "=== vectorizable_call ===");
1290 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1296 if (vect_print_dump_info (REPORT_DETAILS))
1297 fprintf (vect_dump, "transform operation.");
1300 scalar_dest = gimple_call_lhs (stmt);
1301 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1303 prev_stmt_info = NULL;
1307 for (j = 0; j < ncopies; ++j)
1309 /* Build argument list for the vectorized call. */
1311 vargs = VEC_alloc (tree, heap, nargs);
1313 VEC_truncate (tree, vargs, 0);
1315 for (i = 0; i < nargs; i++)
1317 op = gimple_call_arg (stmt, i);
1320 = vect_get_vec_def_for_operand (op, stmt, NULL);
1323 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1325 VEC_quick_push (tree, vargs, vec_oprnd0);
1328 new_stmt = gimple_build_call_vec (fndecl, vargs);
1329 new_temp = make_ssa_name (vec_dest, new_stmt);
1330 gimple_call_set_lhs (new_stmt, new_temp);
1332 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1339 prev_stmt_info = vinfo_for_stmt (new_stmt);
1345 for (j = 0; j < ncopies; ++j)
1347 /* Build argument list for the vectorized call. */
1349 vargs = VEC_alloc (tree, heap, nargs * 2);
1351 VEC_truncate (tree, vargs, 0);
1353 for (i = 0; i < nargs; i++)
1355 op = gimple_call_arg (stmt, i);
1359 = vect_get_vec_def_for_operand (op, stmt, NULL);
1361 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1366 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
1368 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1371 VEC_quick_push (tree, vargs, vec_oprnd0);
1372 VEC_quick_push (tree, vargs, vec_oprnd1);
1375 new_stmt = gimple_build_call_vec (fndecl, vargs);
1376 new_temp = make_ssa_name (vec_dest, new_stmt);
1377 gimple_call_set_lhs (new_stmt, new_temp);
1379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1382 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1384 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1386 prev_stmt_info = vinfo_for_stmt (new_stmt);
1389 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1394 /* No current target implements this case. */
1398 VEC_free (tree, heap, vargs);
1400 /* Update the exception handling table with the vector stmt if necessary. */
1401 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1402 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1404 /* The call in STMT might prevent it from being removed in dce.
1405 We however cannot remove it here, due to the way the ssa name
1406 it defines is mapped to the new definition. So just replace
1407 rhs of the statement with something harmless. */
1409 type = TREE_TYPE (scalar_dest);
1410 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1411 fold_convert (type, integer_zero_node));
1412 set_vinfo_for_stmt (new_stmt, stmt_info);
1413 set_vinfo_for_stmt (stmt, NULL);
1414 STMT_VINFO_STMT (stmt_info) = new_stmt;
1415 gsi_replace (gsi, new_stmt, false);
1416 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1422 /* Function vect_gen_widened_results_half
1424 Create a vector stmt whose code, type, number of arguments, and result
1425 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1426 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1427 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1428 needs to be created (DECL is a function-decl of a target-builtin).
1429 STMT is the original scalar stmt that we are vectorizing. */
1432 vect_gen_widened_results_half (enum tree_code code,
1434 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1435 tree vec_dest, gimple_stmt_iterator *gsi,
1441 /* Generate half of the widened result: */
1442 if (code == CALL_EXPR)
1444 /* Target specific support */
1445 if (op_type == binary_op)
1446 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1448 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1449 new_temp = make_ssa_name (vec_dest, new_stmt);
1450 gimple_call_set_lhs (new_stmt, new_temp);
1454 /* Generic support */
1455 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1456 if (op_type != binary_op)
1458 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1460 new_temp = make_ssa_name (vec_dest, new_stmt);
1461 gimple_assign_set_lhs (new_stmt, new_temp);
1463 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1469 /* Check if STMT performs a conversion operation, that can be vectorized.
1470 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1471 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1472 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1475 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1476 gimple *vec_stmt, slp_tree slp_node)
1481 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1482 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1484 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1485 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1489 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1490 gimple new_stmt = NULL;
1491 stmt_vec_info prev_stmt_info;
1494 tree vectype_out, vectype_in;
1497 tree rhs_type, lhs_type;
1499 enum { NARROW, NONE, WIDEN } modifier;
1501 VEC(tree,heap) *vec_oprnds0 = NULL;
1504 VEC(tree,heap) *dummy = NULL;
1507 /* Is STMT a vectorizable conversion? */
1509 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1512 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1515 if (!is_gimple_assign (stmt))
1518 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1521 code = gimple_assign_rhs_code (stmt);
1522 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1525 /* Check types of lhs and rhs. */
1526 op0 = gimple_assign_rhs1 (stmt);
1527 rhs_type = TREE_TYPE (op0);
1528 vectype_in = get_vectype_for_scalar_type (rhs_type);
1531 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1533 scalar_dest = gimple_assign_lhs (stmt);
1534 lhs_type = TREE_TYPE (scalar_dest);
1535 vectype_out = get_vectype_for_scalar_type (lhs_type);
1538 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1541 if (nunits_in == nunits_out / 2)
1543 else if (nunits_out == nunits_in)
1545 else if (nunits_out == nunits_in / 2)
1550 if (modifier == NONE)
1551 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1553 /* Bail out if the types are both integral or non-integral. */
1554 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1555 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1558 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1560 if (modifier == NARROW)
1561 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1563 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1565 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1566 this, so we can safely override NCOPIES with 1 here. */
1570 /* Sanity check: make sure that at least one copy of the vectorized stmt
1571 needs to be generated. */
1572 gcc_assert (ncopies >= 1);
1574 /* Check the operands of the operation. */
1575 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1577 if (vect_print_dump_info (REPORT_DETAILS))
1578 fprintf (vect_dump, "use not simple.");
1582 /* Supportable by target? */
1583 if ((modifier == NONE
1584 && !targetm.vectorize.builtin_conversion (code, integral_type))
1585 || (modifier == WIDEN
1586 && !supportable_widening_operation (code, stmt, vectype_in,
1589 &dummy_int, &dummy))
1590 || (modifier == NARROW
1591 && !supportable_narrowing_operation (code, stmt, vectype_in,
1592 &code1, &dummy_int, &dummy)))
1594 if (vect_print_dump_info (REPORT_DETAILS))
1595 fprintf (vect_dump, "conversion not supported by target.");
1599 if (modifier != NONE)
1601 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1602 /* FORNOW: SLP not supported. */
1603 if (STMT_SLP_TYPE (stmt_info))
1607 if (!vec_stmt) /* transformation not required. */
1609 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1614 if (vect_print_dump_info (REPORT_DETAILS))
1615 fprintf (vect_dump, "transform conversion.");
1618 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1620 if (modifier == NONE && !slp_node)
1621 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1623 prev_stmt_info = NULL;
1627 for (j = 0; j < ncopies; j++)
1630 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1632 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1635 targetm.vectorize.builtin_conversion (code, integral_type);
1636 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1638 /* Arguments are ready. create the new vector stmt. */
1639 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1640 new_temp = make_ssa_name (vec_dest, new_stmt);
1641 gimple_call_set_lhs (new_stmt, new_temp);
1642 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1644 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1648 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1650 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1651 prev_stmt_info = vinfo_for_stmt (new_stmt);
1656 /* In case the vectorization factor (VF) is bigger than the number
1657 of elements that we can fit in a vectype (nunits), we have to
1658 generate more than one vector stmt - i.e - we need to "unroll"
1659 the vector stmt by a factor VF/nunits. */
1660 for (j = 0; j < ncopies; j++)
1663 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1665 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1667 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1669 /* Generate first half of the widened result: */
1671 = vect_gen_widened_results_half (code1, decl1,
1672 vec_oprnd0, vec_oprnd1,
1673 unary_op, vec_dest, gsi, stmt);
1675 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1677 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1678 prev_stmt_info = vinfo_for_stmt (new_stmt);
1680 /* Generate second half of the widened result: */
1682 = vect_gen_widened_results_half (code2, decl2,
1683 vec_oprnd0, vec_oprnd1,
1684 unary_op, vec_dest, gsi, stmt);
1685 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1686 prev_stmt_info = vinfo_for_stmt (new_stmt);
1691 /* In case the vectorization factor (VF) is bigger than the number
1692 of elements that we can fit in a vectype (nunits), we have to
1693 generate more than one vector stmt - i.e - we need to "unroll"
1694 the vector stmt by a factor VF/nunits. */
1695 for (j = 0; j < ncopies; j++)
1700 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1701 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1705 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1706 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1709 /* Arguments are ready. Create the new vector stmt. */
1710 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
1711 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1713 new_temp = make_ssa_name (vec_dest, new_stmt);
1714 gimple_assign_set_lhs (new_stmt, new_temp);
1715 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1718 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1720 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1722 prev_stmt_info = vinfo_for_stmt (new_stmt);
1725 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1729 VEC_free (tree, heap, vec_oprnds0);
1733 /* Function vectorizable_assignment.
1735 Check if STMT performs an assignment (copy) that can be vectorized.
1736 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1737 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1738 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1741 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1742 gimple *vec_stmt, slp_tree slp_node)
1747 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1748 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1749 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1753 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1754 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1757 VEC(tree,heap) *vec_oprnds = NULL;
1760 /* Multiple types in SLP are handled by creating the appropriate number of
1761 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1766 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1768 gcc_assert (ncopies >= 1);
1770 return false; /* FORNOW */
1772 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1775 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1778 /* Is vectorizable assignment? */
1779 if (!is_gimple_assign (stmt))
1782 scalar_dest = gimple_assign_lhs (stmt);
1783 if (TREE_CODE (scalar_dest) != SSA_NAME)
1786 if (gimple_assign_single_p (stmt)
1787 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
1788 op = gimple_assign_rhs1 (stmt);
1792 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
1794 if (vect_print_dump_info (REPORT_DETAILS))
1795 fprintf (vect_dump, "use not simple.");
1799 if (!vec_stmt) /* transformation not required. */
1801 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 fprintf (vect_dump, "=== vectorizable_assignment ===");
1804 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1809 if (vect_print_dump_info (REPORT_DETAILS))
1810 fprintf (vect_dump, "transform assignment.");
1813 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1816 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1818 /* Arguments are ready. create the new vector stmt. */
1819 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1821 *vec_stmt = gimple_build_assign (vec_dest, vop);
1822 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1823 gimple_assign_set_lhs (*vec_stmt, new_temp);
1824 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
1825 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
1828 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
1831 VEC_free (tree, heap, vec_oprnds);
1835 /* Function vectorizable_operation.
1837 Check if STMT performs a binary or unary operation that can be vectorized.
1838 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1839 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1840 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1843 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1844 gimple *vec_stmt, slp_tree slp_node)
1848 tree op0, op1 = NULL;
1849 tree vec_oprnd1 = NULL_TREE;
1850 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1851 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1852 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1853 enum tree_code code;
1854 enum machine_mode vec_mode;
1859 enum machine_mode optab_op2_mode;
1862 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1863 gimple new_stmt = NULL;
1864 stmt_vec_info prev_stmt_info;
1865 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1870 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1873 bool shift_p = false;
1874 bool scalar_shift_arg = false;
1876 /* Multiple types in SLP are handled by creating the appropriate number of
1877 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1882 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1884 gcc_assert (ncopies >= 1);
1886 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1889 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1892 /* Is STMT a vectorizable binary/unary operation? */
1893 if (!is_gimple_assign (stmt))
1896 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1899 scalar_dest = gimple_assign_lhs (stmt);
1900 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1903 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1904 if (nunits_out != nunits_in)
1907 code = gimple_assign_rhs_code (stmt);
1909 /* For pointer addition, we should use the normal plus for
1910 the vector addition. */
1911 if (code == POINTER_PLUS_EXPR)
1914 /* Support only unary or binary operations. */
1915 op_type = TREE_CODE_LENGTH (code);
1916 if (op_type != unary_op && op_type != binary_op)
1918 if (vect_print_dump_info (REPORT_DETAILS))
1919 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1923 op0 = gimple_assign_rhs1 (stmt);
1924 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1926 if (vect_print_dump_info (REPORT_DETAILS))
1927 fprintf (vect_dump, "use not simple.");
1931 if (op_type == binary_op)
1933 op1 = gimple_assign_rhs2 (stmt);
1934 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
1936 if (vect_print_dump_info (REPORT_DETAILS))
1937 fprintf (vect_dump, "use not simple.");
1942 /* If this is a shift/rotate, determine whether the shift amount is a vector,
1943 or scalar. If the shift/rotate amount is a vector, use the vector/vector
1945 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
1946 || code == RROTATE_EXPR)
1950 /* vector shifted by vector */
1951 if (dt[1] == vect_internal_def)
1953 optab = optab_for_tree_code (code, vectype, optab_vector);
1954 if (vect_print_dump_info (REPORT_DETAILS))
1955 fprintf (vect_dump, "vector/vector shift/rotate found.");
1958 /* See if the machine has a vector shifted by scalar insn and if not
1959 then see if it has a vector shifted by vector insn */
1960 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
1962 optab = optab_for_tree_code (code, vectype, optab_scalar);
1964 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1965 != CODE_FOR_nothing))
1967 scalar_shift_arg = true;
1968 if (vect_print_dump_info (REPORT_DETAILS))
1969 fprintf (vect_dump, "vector/scalar shift/rotate found.");
1973 optab = optab_for_tree_code (code, vectype, optab_vector);
1974 if (vect_print_dump_info (REPORT_DETAILS)
1976 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1977 != CODE_FOR_nothing))
1978 fprintf (vect_dump, "vector/vector shift/rotate found.");
1984 if (vect_print_dump_info (REPORT_DETAILS))
1985 fprintf (vect_dump, "operand mode requires invariant argument.");
1990 optab = optab_for_tree_code (code, vectype, optab_default);
1992 /* Supportable by target? */
1995 if (vect_print_dump_info (REPORT_DETAILS))
1996 fprintf (vect_dump, "no optab.");
1999 vec_mode = TYPE_MODE (vectype);
2000 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2001 if (icode == CODE_FOR_nothing)
2003 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "op not supported by target.");
2005 /* Check only during analysis. */
2006 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2007 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2008 < vect_min_worthwhile_factor (code)
2011 if (vect_print_dump_info (REPORT_DETAILS))
2012 fprintf (vect_dump, "proceeding using word mode.");
2015 /* Worthwhile without SIMD support? Check only during analysis. */
2016 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2017 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2018 < vect_min_worthwhile_factor (code)
2021 if (vect_print_dump_info (REPORT_DETAILS))
2022 fprintf (vect_dump, "not worthwhile without SIMD support.");
2026 if (!vec_stmt) /* transformation not required. */
2028 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2029 if (vect_print_dump_info (REPORT_DETAILS))
2030 fprintf (vect_dump, "=== vectorizable_operation ===");
2031 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2037 if (vect_print_dump_info (REPORT_DETAILS))
2038 fprintf (vect_dump, "transform binary/unary operation.");
2041 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2043 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2044 created in the previous stages of the recursion, so no allocation is
2045 needed, except for the case of shift with scalar shift argument. In that
2046 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2047 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2048 In case of loop-based vectorization we allocate VECs of size 1. We
2049 allocate VEC_OPRNDS1 only in case of binary operation. */
2052 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2053 if (op_type == binary_op)
2054 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2056 else if (scalar_shift_arg)
2057 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2059 /* In case the vectorization factor (VF) is bigger than the number
2060 of elements that we can fit in a vectype (nunits), we have to generate
2061 more than one vector stmt - i.e - we need to "unroll" the
2062 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2063 from one copy of the vector stmt to the next, in the field
2064 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2065 stages to find the correct vector defs to be used when vectorizing
2066 stmts that use the defs of the current stmt. The example below illustrates
2067 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2068 4 vectorized stmts):
2070 before vectorization:
2071 RELATED_STMT VEC_STMT
2075 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2077 RELATED_STMT VEC_STMT
2078 VS1_0: vx0 = memref0 VS1_1 -
2079 VS1_1: vx1 = memref1 VS1_2 -
2080 VS1_2: vx2 = memref2 VS1_3 -
2081 VS1_3: vx3 = memref3 - -
2082 S1: x = load - VS1_0
2085 step2: vectorize stmt S2 (done here):
2086 To vectorize stmt S2 we first need to find the relevant vector
2087 def for the first operand 'x'. This is, as usual, obtained from
2088 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2089 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2090 relevant vector def 'vx0'. Having found 'vx0' we can generate
2091 the vector stmt VS2_0, and as usual, record it in the
2092 STMT_VINFO_VEC_STMT of stmt S2.
2093 When creating the second copy (VS2_1), we obtain the relevant vector
2094 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2095 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2096 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2097 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2098 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2099 chain of stmts and pointers:
2100 RELATED_STMT VEC_STMT
2101 VS1_0: vx0 = memref0 VS1_1 -
2102 VS1_1: vx1 = memref1 VS1_2 -
2103 VS1_2: vx2 = memref2 VS1_3 -
2104 VS1_3: vx3 = memref3 - -
2105 S1: x = load - VS1_0
2106 VS2_0: vz0 = vx0 + v1 VS2_1 -
2107 VS2_1: vz1 = vx1 + v1 VS2_2 -
2108 VS2_2: vz2 = vx2 + v1 VS2_3 -
2109 VS2_3: vz3 = vx3 + v1 - -
2110 S2: z = x + 1 - VS2_0 */
2112 prev_stmt_info = NULL;
2113 for (j = 0; j < ncopies; j++)
2118 if (op_type == binary_op && scalar_shift_arg)
2120 /* Vector shl and shr insn patterns can be defined with scalar
2121 operand 2 (shift operand). In this case, use constant or loop
2122 invariant op1 directly, without extending it to vector mode
2124 optab_op2_mode = insn_data[icode].operand[2].mode;
2125 if (!VECTOR_MODE_P (optab_op2_mode))
2127 if (vect_print_dump_info (REPORT_DETAILS))
2128 fprintf (vect_dump, "operand 1 using scalar mode.");
2130 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2133 /* Store vec_oprnd1 for every vector stmt to be created
2134 for SLP_NODE. We check during the analysis that all the
2135 shift arguments are the same.
2136 TODO: Allow different constants for different vector
2137 stmts generated for an SLP instance. */
2138 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2139 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2144 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2145 (a special case for certain kind of vector shifts); otherwise,
2146 operand 1 should be of a vector type (the usual case). */
2147 if (op_type == binary_op && !vec_oprnd1)
2148 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2151 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2155 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2157 /* Arguments are ready. Create the new vector stmt. */
2158 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2160 vop1 = ((op_type == binary_op)
2161 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2162 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2163 new_temp = make_ssa_name (vec_dest, new_stmt);
2164 gimple_assign_set_lhs (new_stmt, new_temp);
2165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2167 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2174 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2176 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2177 prev_stmt_info = vinfo_for_stmt (new_stmt);
2180 VEC_free (tree, heap, vec_oprnds0);
2182 VEC_free (tree, heap, vec_oprnds1);
2188 /* Get vectorized definitions for loop-based vectorization. For the first
2189 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2190 scalar operand), and for the rest we get a copy with
2191 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2192 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2193 The vectors are collected into VEC_OPRNDS. */
2196 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2197 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2201 /* Get first vector operand. */
2202 /* All the vector operands except the very first one (that is scalar oprnd)
2204 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2205 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2207 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2209 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2211 /* Get second vector operand. */
2212 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2213 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2217 /* For conversion in multiple steps, continue to get operands
2220 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2224 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2225 For multi-step conversions store the resulting vectors and call the function
2229 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2230 int multi_step_cvt, gimple stmt,
2231 VEC (tree, heap) *vec_dsts,
2232 gimple_stmt_iterator *gsi,
2233 slp_tree slp_node, enum tree_code code,
2234 stmt_vec_info *prev_stmt_info)
2237 tree vop0, vop1, new_tmp, vec_dest;
2239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2241 vec_dest = VEC_pop (tree, vec_dsts);
2243 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2245 /* Create demotion operation. */
2246 vop0 = VEC_index (tree, *vec_oprnds, i);
2247 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2248 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2249 new_tmp = make_ssa_name (vec_dest, new_stmt);
2250 gimple_assign_set_lhs (new_stmt, new_tmp);
2251 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2254 /* Store the resulting vector for next recursive call. */
2255 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2258 /* This is the last step of the conversion sequence. Store the
2259 vectors in SLP_NODE or in vector info of the scalar statement
2260 (or in STMT_VINFO_RELATED_STMT chain). */
2262 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2265 if (!*prev_stmt_info)
2266 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2268 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2270 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2275 /* For multi-step demotion operations we first generate demotion operations
2276 from the source type to the intermediate types, and then combine the
2277 results (stored in VEC_OPRNDS) in demotion operation to the destination
2281 /* At each level of recursion we have have of the operands we had at the
2283 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2284 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2285 stmt, vec_dsts, gsi, slp_node,
2286 code, prev_stmt_info);
2291 /* Function vectorizable_type_demotion
2293 Check if STMT performs a binary or unary operation that involves
2294 type demotion, and if it can be vectorized.
2295 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2296 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2297 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2300 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2301 gimple *vec_stmt, slp_tree slp_node)
2306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2307 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2308 enum tree_code code, code1 = ERROR_MARK;
2311 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2312 stmt_vec_info prev_stmt_info;
2319 int multi_step_cvt = 0;
2320 VEC (tree, heap) *vec_oprnds0 = NULL;
2321 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2322 tree last_oprnd, intermediate_type;
2324 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2327 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2330 /* Is STMT a vectorizable type-demotion operation? */
2331 if (!is_gimple_assign (stmt))
2334 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2337 code = gimple_assign_rhs_code (stmt);
2338 if (!CONVERT_EXPR_CODE_P (code))
2341 op0 = gimple_assign_rhs1 (stmt);
2342 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2345 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2347 scalar_dest = gimple_assign_lhs (stmt);
2348 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2351 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2352 if (nunits_in >= nunits_out)
2355 /* Multiple types in SLP are handled by creating the appropriate number of
2356 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2361 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2363 gcc_assert (ncopies >= 1);
2365 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2366 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2367 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2368 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2369 && CONVERT_EXPR_CODE_P (code))))
2372 /* Check the operands of the operation. */
2373 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2375 if (vect_print_dump_info (REPORT_DETAILS))
2376 fprintf (vect_dump, "use not simple.");
2380 /* Supportable by target? */
2381 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2382 &multi_step_cvt, &interm_types))
2385 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2387 if (!vec_stmt) /* transformation not required. */
2389 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2390 if (vect_print_dump_info (REPORT_DETAILS))
2391 fprintf (vect_dump, "=== vectorizable_demotion ===");
2392 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2397 if (vect_print_dump_info (REPORT_DETAILS))
2398 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2401 /* In case of multi-step demotion, we first generate demotion operations to
2402 the intermediate types, and then from that types to the final one.
2403 We create vector destinations for the intermediate type (TYPES) received
2404 from supportable_narrowing_operation, and store them in the correct order
2405 for future use in vect_create_vectorized_demotion_stmts(). */
2407 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2409 vec_dsts = VEC_alloc (tree, heap, 1);
2411 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2412 VEC_quick_push (tree, vec_dsts, vec_dest);
2416 for (i = VEC_length (tree, interm_types) - 1;
2417 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2419 vec_dest = vect_create_destination_var (scalar_dest,
2421 VEC_quick_push (tree, vec_dsts, vec_dest);
2425 /* In case the vectorization factor (VF) is bigger than the number
2426 of elements that we can fit in a vectype (nunits), we have to generate
2427 more than one vector stmt - i.e - we need to "unroll" the
2428 vector stmt by a factor VF/nunits. */
2430 prev_stmt_info = NULL;
2431 for (j = 0; j < ncopies; j++)
2435 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2438 VEC_free (tree, heap, vec_oprnds0);
2439 vec_oprnds0 = VEC_alloc (tree, heap,
2440 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2441 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2442 vect_pow2 (multi_step_cvt) - 1);
2445 /* Arguments are ready. Create the new vector stmts. */
2446 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2447 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2448 multi_step_cvt, stmt, tmp_vec_dsts,
2449 gsi, slp_node, code1,
2453 VEC_free (tree, heap, vec_oprnds0);
2454 VEC_free (tree, heap, vec_dsts);
2455 VEC_free (tree, heap, tmp_vec_dsts);
2456 VEC_free (tree, heap, interm_types);
2458 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2463 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2464 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2465 the resulting vectors and call the function recursively. */
2468 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2469 VEC (tree, heap) **vec_oprnds1,
2470 int multi_step_cvt, gimple stmt,
2471 VEC (tree, heap) *vec_dsts,
2472 gimple_stmt_iterator *gsi,
2473 slp_tree slp_node, enum tree_code code1,
2474 enum tree_code code2, tree decl1,
2475 tree decl2, int op_type,
2476 stmt_vec_info *prev_stmt_info)
2479 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2480 gimple new_stmt1, new_stmt2;
2481 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2482 VEC (tree, heap) *vec_tmp;
2484 vec_dest = VEC_pop (tree, vec_dsts);
2485 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2487 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2489 if (op_type == binary_op)
2490 vop1 = VEC_index (tree, *vec_oprnds1, i);
2494 /* Generate the two halves of promotion operation. */
2495 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2496 op_type, vec_dest, gsi, stmt);
2497 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2498 op_type, vec_dest, gsi, stmt);
2499 if (is_gimple_call (new_stmt1))
2501 new_tmp1 = gimple_call_lhs (new_stmt1);
2502 new_tmp2 = gimple_call_lhs (new_stmt2);
2506 new_tmp1 = gimple_assign_lhs (new_stmt1);
2507 new_tmp2 = gimple_assign_lhs (new_stmt2);
2512 /* Store the results for the recursive call. */
2513 VEC_quick_push (tree, vec_tmp, new_tmp1);
2514 VEC_quick_push (tree, vec_tmp, new_tmp2);
2518 /* Last step of promotion sequience - store the results. */
2521 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2522 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2526 if (!*prev_stmt_info)
2527 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2529 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2531 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2532 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2533 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2540 /* For multi-step promotion operation we first generate we call the
2541 function recurcively for every stage. We start from the input type,
2542 create promotion operations to the intermediate types, and then
2543 create promotions to the output type. */
2544 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2545 VEC_free (tree, heap, vec_tmp);
2546 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2547 multi_step_cvt - 1, stmt,
2548 vec_dsts, gsi, slp_node, code1,
2549 code2, decl2, decl2, op_type,
2555 /* Function vectorizable_type_promotion
2557 Check if STMT performs a binary or unary operation that involves
2558 type promotion, and if it can be vectorized.
2559 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2560 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2561 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2564 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2565 gimple *vec_stmt, slp_tree slp_node)
2569 tree op0, op1 = NULL;
2570 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2571 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2572 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2573 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2574 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2578 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2579 stmt_vec_info prev_stmt_info;
2586 tree intermediate_type = NULL_TREE;
2587 int multi_step_cvt = 0;
2588 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2589 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2591 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2594 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2597 /* Is STMT a vectorizable type-promotion operation? */
2598 if (!is_gimple_assign (stmt))
2601 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2604 code = gimple_assign_rhs_code (stmt);
2605 if (!CONVERT_EXPR_CODE_P (code)
2606 && code != WIDEN_MULT_EXPR)
2609 op0 = gimple_assign_rhs1 (stmt);
2610 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2613 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2615 scalar_dest = gimple_assign_lhs (stmt);
2616 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2619 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2620 if (nunits_in <= nunits_out)
2623 /* Multiple types in SLP are handled by creating the appropriate number of
2624 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2629 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2631 gcc_assert (ncopies >= 1);
2633 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2634 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2635 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2636 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2637 && CONVERT_EXPR_CODE_P (code))))
2640 /* Check the operands of the operation. */
2641 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2643 if (vect_print_dump_info (REPORT_DETAILS))
2644 fprintf (vect_dump, "use not simple.");
2648 op_type = TREE_CODE_LENGTH (code);
2649 if (op_type == binary_op)
2651 op1 = gimple_assign_rhs2 (stmt);
2652 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
2654 if (vect_print_dump_info (REPORT_DETAILS))
2655 fprintf (vect_dump, "use not simple.");
2660 /* Supportable by target? */
2661 if (!supportable_widening_operation (code, stmt, vectype_in,
2662 &decl1, &decl2, &code1, &code2,
2663 &multi_step_cvt, &interm_types))
2666 /* Binary widening operation can only be supported directly by the
2668 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2670 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2672 if (!vec_stmt) /* transformation not required. */
2674 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2675 if (vect_print_dump_info (REPORT_DETAILS))
2676 fprintf (vect_dump, "=== vectorizable_promotion ===");
2677 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2683 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2688 /* In case of multi-step promotion, we first generate promotion operations
2689 to the intermediate types, and then from that types to the final one.
2690 We store vector destination in VEC_DSTS in the correct order for
2691 recursive creation of promotion operations in
2692 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2693 according to TYPES recieved from supportable_widening_operation(). */
2695 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2697 vec_dsts = VEC_alloc (tree, heap, 1);
2699 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2700 VEC_quick_push (tree, vec_dsts, vec_dest);
2704 for (i = VEC_length (tree, interm_types) - 1;
2705 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2707 vec_dest = vect_create_destination_var (scalar_dest,
2709 VEC_quick_push (tree, vec_dsts, vec_dest);
2715 vec_oprnds0 = VEC_alloc (tree, heap,
2716 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2717 if (op_type == binary_op)
2718 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2721 /* In case the vectorization factor (VF) is bigger than the number
2722 of elements that we can fit in a vectype (nunits), we have to generate
2723 more than one vector stmt - i.e - we need to "unroll" the
2724 vector stmt by a factor VF/nunits. */
2726 prev_stmt_info = NULL;
2727 for (j = 0; j < ncopies; j++)
2733 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2736 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2737 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2738 if (op_type == binary_op)
2740 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2741 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2747 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2748 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2749 if (op_type == binary_op)
2751 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2752 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2756 /* Arguments are ready. Create the new vector stmts. */
2757 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2758 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2759 multi_step_cvt, stmt,
2761 gsi, slp_node, code1, code2,
2762 decl1, decl2, op_type,
2766 VEC_free (tree, heap, vec_dsts);
2767 VEC_free (tree, heap, tmp_vec_dsts);
2768 VEC_free (tree, heap, interm_types);
2769 VEC_free (tree, heap, vec_oprnds0);
2770 VEC_free (tree, heap, vec_oprnds1);
2772 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2777 /* Function vectorizable_store.
2779 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2781 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2782 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2783 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2786 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2792 tree vec_oprnd = NULL_TREE;
2793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2794 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2795 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2796 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2797 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2798 enum machine_mode vec_mode;
2800 enum dr_alignment_support alignment_support_scheme;
2803 enum vect_def_type dt;
2804 stmt_vec_info prev_stmt_info = NULL;
2805 tree dataref_ptr = NULL_TREE;
2806 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2809 gimple next_stmt, first_stmt = NULL;
2810 bool strided_store = false;
2811 unsigned int group_size, i;
2812 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2814 VEC(tree,heap) *vec_oprnds = NULL;
2815 bool slp = (slp_node != NULL);
2816 stmt_vec_info first_stmt_vinfo;
2817 unsigned int vec_num;
2819 /* Multiple types in SLP are handled by creating the appropriate number of
2820 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2825 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2827 gcc_assert (ncopies >= 1);
2829 /* FORNOW. This restriction should be relaxed. */
2830 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2832 if (vect_print_dump_info (REPORT_DETAILS))
2833 fprintf (vect_dump, "multiple types in nested loop.");
2837 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2840 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2843 /* Is vectorizable store? */
2845 if (!is_gimple_assign (stmt))
2848 scalar_dest = gimple_assign_lhs (stmt);
2849 if (TREE_CODE (scalar_dest) != ARRAY_REF
2850 && TREE_CODE (scalar_dest) != INDIRECT_REF
2851 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
2854 gcc_assert (gimple_assign_single_p (stmt));
2855 op = gimple_assign_rhs1 (stmt);
2856 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2858 if (vect_print_dump_info (REPORT_DETAILS))
2859 fprintf (vect_dump, "use not simple.");
2863 /* The scalar rhs type needs to be trivially convertible to the vector
2864 component type. This should always be the case. */
2865 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2867 if (vect_print_dump_info (REPORT_DETAILS))
2868 fprintf (vect_dump, "??? operands of different types");
2872 vec_mode = TYPE_MODE (vectype);
2873 /* FORNOW. In some cases can vectorize even if data-type not supported
2874 (e.g. - array initialization with 0). */
2875 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2878 if (!STMT_VINFO_DATA_REF (stmt_info))
2881 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2883 strided_store = true;
2884 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2885 if (!vect_strided_store_supported (vectype)
2886 && !PURE_SLP_STMT (stmt_info) && !slp)
2889 if (first_stmt == stmt)
2891 /* STMT is the leader of the group. Check the operands of all the
2892 stmts of the group. */
2893 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
2896 gcc_assert (gimple_assign_single_p (next_stmt));
2897 op = gimple_assign_rhs1 (next_stmt);
2898 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2900 if (vect_print_dump_info (REPORT_DETAILS))
2901 fprintf (vect_dump, "use not simple.");
2904 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2909 if (!vec_stmt) /* transformation not required. */
2911 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2912 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
2920 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2921 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2923 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2926 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
2928 /* We vectorize all the stmts of the interleaving group when we
2929 reach the last stmt in the group. */
2930 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2931 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2939 strided_store = false;
2941 /* VEC_NUM is the number of vect stmts to be created for this group. */
2943 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2945 vec_num = group_size;
2951 group_size = vec_num = 1;
2952 first_stmt_vinfo = stmt_info;
2955 if (vect_print_dump_info (REPORT_DETAILS))
2956 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2958 dr_chain = VEC_alloc (tree, heap, group_size);
2959 oprnds = VEC_alloc (tree, heap, group_size);
2961 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
2962 gcc_assert (alignment_support_scheme);
2963 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
2965 /* In case the vectorization factor (VF) is bigger than the number
2966 of elements that we can fit in a vectype (nunits), we have to generate
2967 more than one vector stmt - i.e - we need to "unroll" the
2968 vector stmt by a factor VF/nunits. For more details see documentation in
2969 vect_get_vec_def_for_copy_stmt. */
2971 /* In case of interleaving (non-unit strided access):
2978 We create vectorized stores starting from base address (the access of the
2979 first stmt in the chain (S2 in the above example), when the last store stmt
2980 of the chain (S4) is reached:
2983 VS2: &base + vec_size*1 = vx0
2984 VS3: &base + vec_size*2 = vx1
2985 VS4: &base + vec_size*3 = vx3
2987 Then permutation statements are generated:
2989 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
2990 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
2993 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
2994 (the order of the data-refs in the output of vect_permute_store_chain
2995 corresponds to the order of scalar stmts in the interleaving chain - see
2996 the documentation of vect_permute_store_chain()).
2998 In case of both multiple types and interleaving, above vector stores and
2999 permutation stmts are created for every copy. The result vector stmts are
3000 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3001 STMT_VINFO_RELATED_STMT for the next copies.
3004 prev_stmt_info = NULL;
3005 for (j = 0; j < ncopies; j++)
3014 /* Get vectorized arguments for SLP_NODE. */
3015 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3017 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3021 /* For interleaved stores we collect vectorized defs for all the
3022 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3023 used as an input to vect_permute_store_chain(), and OPRNDS as
3024 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3026 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3027 OPRNDS are of size 1. */
3028 next_stmt = first_stmt;
3029 for (i = 0; i < group_size; i++)
3031 /* Since gaps are not supported for interleaved stores,
3032 GROUP_SIZE is the exact number of stmts in the chain.
3033 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3034 there is no interleaving, GROUP_SIZE is 1, and only one
3035 iteration of the loop will be executed. */
3036 gcc_assert (next_stmt
3037 && gimple_assign_single_p (next_stmt));
3038 op = gimple_assign_rhs1 (next_stmt);
3040 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3042 VEC_quick_push(tree, dr_chain, vec_oprnd);
3043 VEC_quick_push(tree, oprnds, vec_oprnd);
3044 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3048 /* We should have catched mismatched types earlier. */
3049 gcc_assert (useless_type_conversion_p (vectype,
3050 TREE_TYPE (vec_oprnd)));
3051 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3052 &dummy, &ptr_incr, false,
3054 gcc_assert (!inv_p);
3058 /* For interleaved stores we created vectorized defs for all the
3059 defs stored in OPRNDS in the previous iteration (previous copy).
3060 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3061 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3063 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3064 OPRNDS are of size 1. */
3065 for (i = 0; i < group_size; i++)
3067 op = VEC_index (tree, oprnds, i);
3068 vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
3069 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3070 VEC_replace(tree, dr_chain, i, vec_oprnd);
3071 VEC_replace(tree, oprnds, i, vec_oprnd);
3074 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3079 result_chain = VEC_alloc (tree, heap, group_size);
3081 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3086 next_stmt = first_stmt;
3087 for (i = 0; i < vec_num; i++)
3090 /* Bump the vector pointer. */
3091 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3095 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3096 else if (strided_store)
3097 /* For strided stores vectorized defs are interleaved in
3098 vect_permute_store_chain(). */
3099 vec_oprnd = VEC_index (tree, result_chain, i);
3101 data_ref = build_fold_indirect_ref (dataref_ptr);
3102 /* If accesses through a pointer to vectype do not alias the original
3103 memory reference we have a problem. This should never happen. */
3104 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3105 get_alias_set (gimple_assign_lhs (stmt))));
3107 /* Arguments are ready. Create the new vector stmt. */
3108 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3109 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3110 mark_symbols_for_renaming (new_stmt);
3116 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3118 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3120 prev_stmt_info = vinfo_for_stmt (new_stmt);
3121 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3127 VEC_free (tree, heap, dr_chain);
3128 VEC_free (tree, heap, oprnds);
3130 VEC_free (tree, heap, result_chain);
3135 /* vectorizable_load.
3137 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3139 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3140 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3141 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3144 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3145 slp_tree slp_node, slp_instance slp_node_instance)
3148 tree vec_dest = NULL;
3149 tree data_ref = NULL;
3150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3151 stmt_vec_info prev_stmt_info;
3152 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3153 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3154 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3155 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3156 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3157 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3160 gimple new_stmt = NULL;
3162 enum dr_alignment_support alignment_support_scheme;
3163 tree dataref_ptr = NULL_TREE;
3165 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3167 int i, j, group_size;
3168 tree msq = NULL_TREE, lsq;
3169 tree offset = NULL_TREE;
3170 tree realignment_token = NULL_TREE;
3172 VEC(tree,heap) *dr_chain = NULL;
3173 bool strided_load = false;
3177 bool compute_in_loop = false;
3178 struct loop *at_loop;
3180 bool slp = (slp_node != NULL);
3181 bool slp_perm = false;
3182 enum tree_code code;
3184 /* Multiple types in SLP are handled by creating the appropriate number of
3185 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3190 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3192 gcc_assert (ncopies >= 1);
3194 /* FORNOW. This restriction should be relaxed. */
3195 if (nested_in_vect_loop && ncopies > 1)
3197 if (vect_print_dump_info (REPORT_DETAILS))
3198 fprintf (vect_dump, "multiple types in nested loop.");
3202 if (slp && SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3205 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3208 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3211 /* Is vectorizable load? */
3212 if (!is_gimple_assign (stmt))
3215 scalar_dest = gimple_assign_lhs (stmt);
3216 if (TREE_CODE (scalar_dest) != SSA_NAME)
3219 code = gimple_assign_rhs_code (stmt);
3220 if (code != ARRAY_REF
3221 && code != INDIRECT_REF
3222 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
3225 if (!STMT_VINFO_DATA_REF (stmt_info))
3228 scalar_type = TREE_TYPE (DR_REF (dr));
3229 mode = (int) TYPE_MODE (vectype);
3231 /* FORNOW. In some cases can vectorize even if data-type not supported
3232 (e.g. - data copies). */
3233 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3235 if (vect_print_dump_info (REPORT_DETAILS))
3236 fprintf (vect_dump, "Aligned load, but unsupported type.");
3240 /* The vector component type needs to be trivially convertible to the
3241 scalar lhs. This should always be the case. */
3242 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3244 if (vect_print_dump_info (REPORT_DETAILS))
3245 fprintf (vect_dump, "??? operands of different types");
3249 /* Check if the load is a part of an interleaving chain. */
3250 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3252 strided_load = true;
3254 gcc_assert (! nested_in_vect_loop);
3256 /* Check if interleaving is supported. */
3257 if (!vect_strided_load_supported (vectype)
3258 && !PURE_SLP_STMT (stmt_info) && !slp)
3262 if (!vec_stmt) /* transformation not required. */
3264 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3265 vect_model_load_cost (stmt_info, ncopies, NULL);
3269 if (vect_print_dump_info (REPORT_DETAILS))
3270 fprintf (vect_dump, "transform load.");
3276 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3277 /* Check if the chain of loads is already vectorized. */
3278 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3280 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3283 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3284 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3286 /* VEC_NUM is the number of vect stmts to be created for this group. */
3289 strided_load = false;
3290 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3293 vec_num = group_size;
3295 dr_chain = VEC_alloc (tree, heap, vec_num);
3301 group_size = vec_num = 1;
3304 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3305 gcc_assert (alignment_support_scheme);
3307 /* In case the vectorization factor (VF) is bigger than the number
3308 of elements that we can fit in a vectype (nunits), we have to generate
3309 more than one vector stmt - i.e - we need to "unroll" the
3310 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3311 from one copy of the vector stmt to the next, in the field
3312 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3313 stages to find the correct vector defs to be used when vectorizing
3314 stmts that use the defs of the current stmt. The example below illustrates
3315 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3316 4 vectorized stmts):
3318 before vectorization:
3319 RELATED_STMT VEC_STMT
3323 step 1: vectorize stmt S1:
3324 We first create the vector stmt VS1_0, and, as usual, record a
3325 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3326 Next, we create the vector stmt VS1_1, and record a pointer to
3327 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3328 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3330 RELATED_STMT VEC_STMT
3331 VS1_0: vx0 = memref0 VS1_1 -
3332 VS1_1: vx1 = memref1 VS1_2 -
3333 VS1_2: vx2 = memref2 VS1_3 -
3334 VS1_3: vx3 = memref3 - -
3335 S1: x = load - VS1_0
3338 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3339 information we recorded in RELATED_STMT field is used to vectorize
3342 /* In case of interleaving (non-unit strided access):
3349 Vectorized loads are created in the order of memory accesses
3350 starting from the access of the first stmt of the chain:
3353 VS2: vx1 = &base + vec_size*1
3354 VS3: vx3 = &base + vec_size*2
3355 VS4: vx4 = &base + vec_size*3
3357 Then permutation statements are generated:
3359 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3360 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3363 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3364 (the order of the data-refs in the output of vect_permute_load_chain
3365 corresponds to the order of scalar stmts in the interleaving chain - see
3366 the documentation of vect_permute_load_chain()).
3367 The generation of permutation stmts and recording them in
3368 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3370 In case of both multiple types and interleaving, the vector loads and
3371 permutation stmts above are created for every copy. The result vector stmts
3372 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3373 STMT_VINFO_RELATED_STMT for the next copies. */
3375 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3376 on a target that supports unaligned accesses (dr_unaligned_supported)
3377 we generate the following code:
3381 p = p + indx * vectype_size;
3386 Otherwise, the data reference is potentially unaligned on a target that
3387 does not support unaligned accesses (dr_explicit_realign_optimized) -
3388 then generate the following code, in which the data in each iteration is
3389 obtained by two vector loads, one from the previous iteration, and one
3390 from the current iteration:
3392 msq_init = *(floor(p1))
3393 p2 = initial_addr + VS - 1;
3394 realignment_token = call target_builtin;
3397 p2 = p2 + indx * vectype_size
3399 vec_dest = realign_load (msq, lsq, realignment_token)
3404 /* If the misalignment remains the same throughout the execution of the
3405 loop, we can create the init_addr and permutation mask at the loop
3406 preheader. Otherwise, it needs to be created inside the loop.
3407 This can only occur when vectorizing memory accesses in the inner-loop
3408 nested within an outer-loop that is being vectorized. */
3410 if (nested_in_vect_loop_p (loop, stmt)
3411 && (TREE_INT_CST_LOW (DR_STEP (dr))
3412 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3414 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3415 compute_in_loop = true;
3418 if ((alignment_support_scheme == dr_explicit_realign_optimized
3419 || alignment_support_scheme == dr_explicit_realign)
3420 && !compute_in_loop)
3422 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3423 alignment_support_scheme, NULL_TREE,
3425 if (alignment_support_scheme == dr_explicit_realign_optimized)
3427 phi = SSA_NAME_DEF_STMT (msq);
3428 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3434 prev_stmt_info = NULL;
3435 for (j = 0; j < ncopies; j++)
3437 /* 1. Create the vector pointer update chain. */
3439 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3441 &dummy, &ptr_incr, false,
3445 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3447 for (i = 0; i < vec_num; i++)
3450 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3453 /* 2. Create the vector-load in the loop. */
3454 switch (alignment_support_scheme)
3457 gcc_assert (aligned_access_p (first_dr));
3458 data_ref = build_fold_indirect_ref (dataref_ptr);
3460 case dr_unaligned_supported:
3462 int mis = DR_MISALIGNMENT (first_dr);
3463 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3465 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3467 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3470 case dr_explicit_realign:
3473 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3475 if (compute_in_loop)
3476 msq = vect_setup_realignment (first_stmt, gsi,
3478 dr_explicit_realign,
3481 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3482 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3483 new_stmt = gimple_build_assign (vec_dest, data_ref);
3484 new_temp = make_ssa_name (vec_dest, new_stmt);
3485 gimple_assign_set_lhs (new_stmt, new_temp);
3486 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3487 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3491 bump = size_binop (MULT_EXPR, vs_minus_1,
3492 TYPE_SIZE_UNIT (scalar_type));
3493 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3494 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3497 case dr_explicit_realign_optimized:
3498 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3503 /* If accesses through a pointer to vectype do not alias the original
3504 memory reference we have a problem. This should never happen. */
3505 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3506 get_alias_set (gimple_assign_rhs1 (stmt))));
3507 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3508 new_stmt = gimple_build_assign (vec_dest, data_ref);
3509 new_temp = make_ssa_name (vec_dest, new_stmt);
3510 gimple_assign_set_lhs (new_stmt, new_temp);
3511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3512 mark_symbols_for_renaming (new_stmt);
3514 /* 3. Handle explicit realignment if necessary/supported. Create in
3515 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3516 if (alignment_support_scheme == dr_explicit_realign_optimized
3517 || alignment_support_scheme == dr_explicit_realign)
3521 lsq = gimple_assign_lhs (new_stmt);
3522 if (!realignment_token)
3523 realignment_token = dataref_ptr;
3524 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3525 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3527 new_stmt = gimple_build_assign (vec_dest, tmp);
3528 new_temp = make_ssa_name (vec_dest, new_stmt);
3529 gimple_assign_set_lhs (new_stmt, new_temp);
3530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3532 if (alignment_support_scheme == dr_explicit_realign_optimized)
3535 if (i == vec_num - 1 && j == ncopies - 1)
3536 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
3541 /* 4. Handle invariant-load. */
3544 gcc_assert (!strided_load);
3545 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3550 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3552 /* CHECKME: bitpos depends on endianess? */
3553 bitpos = bitsize_zero_node;
3554 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3557 vect_create_destination_var (scalar_dest, NULL_TREE);
3558 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3559 new_temp = make_ssa_name (vec_dest, new_stmt);
3560 gimple_assign_set_lhs (new_stmt, new_temp);
3561 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3563 for (k = nunits - 1; k >= 0; --k)
3564 t = tree_cons (NULL_TREE, new_temp, t);
3565 /* FIXME: use build_constructor directly. */
3566 vec_inv = build_constructor_from_list (vectype, t);
3567 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3568 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3571 gcc_unreachable (); /* FORNOW. */
3574 /* Collect vector loads and later create their permutation in
3575 vect_transform_strided_load (). */
3576 if (strided_load || slp_perm)
3577 VEC_quick_push (tree, dr_chain, new_temp);
3579 /* Store vector loads in the corresponding SLP_NODE. */
3580 if (slp && !slp_perm)
3581 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3584 if (slp && !slp_perm)
3589 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi,
3590 LOOP_VINFO_VECT_FACTOR (loop_vinfo),
3591 slp_node_instance, false))
3593 VEC_free (tree, heap, dr_chain);
3601 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3604 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3605 VEC_free (tree, heap, dr_chain);
3606 dr_chain = VEC_alloc (tree, heap, group_size);
3611 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3613 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3614 prev_stmt_info = vinfo_for_stmt (new_stmt);
3620 VEC_free (tree, heap, dr_chain);
3625 /* Function vect_is_simple_cond.
3628 LOOP - the loop that is being vectorized.
3629 COND - Condition that is checked for simple use.
3631 Returns whether a COND can be vectorized. Checks whether
3632 condition operands are supportable using vec_is_simple_use. */
3635 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3639 enum vect_def_type dt;
3641 if (!COMPARISON_CLASS_P (cond))
3644 lhs = TREE_OPERAND (cond, 0);
3645 rhs = TREE_OPERAND (cond, 1);
3647 if (TREE_CODE (lhs) == SSA_NAME)
3649 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3650 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
3653 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3654 && TREE_CODE (lhs) != FIXED_CST)
3657 if (TREE_CODE (rhs) == SSA_NAME)
3659 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3660 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
3663 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3664 && TREE_CODE (rhs) != FIXED_CST)
3670 /* vectorizable_condition.
3672 Check if STMT is conditional modify expression that can be vectorized.
3673 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3674 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3677 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3680 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3683 tree scalar_dest = NULL_TREE;
3684 tree vec_dest = NULL_TREE;
3685 tree op = NULL_TREE;
3686 tree cond_expr, then_clause, else_clause;
3687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3689 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3690 tree vec_compare, vec_cond_expr;
3692 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3693 enum machine_mode vec_mode;
3695 enum vect_def_type dt;
3696 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3697 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3698 enum tree_code code;
3700 gcc_assert (ncopies >= 1);
3702 return false; /* FORNOW */
3704 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3707 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3710 /* FORNOW: SLP not supported. */
3711 if (STMT_SLP_TYPE (stmt_info))
3714 /* FORNOW: not yet supported. */
3715 if (STMT_VINFO_LIVE_P (stmt_info))
3717 if (vect_print_dump_info (REPORT_DETAILS))
3718 fprintf (vect_dump, "value used after loop.");
3722 /* Is vectorizable conditional operation? */
3723 if (!is_gimple_assign (stmt))
3726 code = gimple_assign_rhs_code (stmt);
3728 if (code != COND_EXPR)
3731 gcc_assert (gimple_assign_single_p (stmt));
3732 op = gimple_assign_rhs1 (stmt);
3733 cond_expr = TREE_OPERAND (op, 0);
3734 then_clause = TREE_OPERAND (op, 1);
3735 else_clause = TREE_OPERAND (op, 2);
3737 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3740 /* We do not handle two different vector types for the condition
3742 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3745 if (TREE_CODE (then_clause) == SSA_NAME)
3747 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3748 if (!vect_is_simple_use (then_clause, loop_vinfo,
3749 &then_def_stmt, &def, &dt))
3752 else if (TREE_CODE (then_clause) != INTEGER_CST
3753 && TREE_CODE (then_clause) != REAL_CST
3754 && TREE_CODE (then_clause) != FIXED_CST)
3757 if (TREE_CODE (else_clause) == SSA_NAME)
3759 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3760 if (!vect_is_simple_use (else_clause, loop_vinfo,
3761 &else_def_stmt, &def, &dt))
3764 else if (TREE_CODE (else_clause) != INTEGER_CST
3765 && TREE_CODE (else_clause) != REAL_CST
3766 && TREE_CODE (else_clause) != FIXED_CST)
3770 vec_mode = TYPE_MODE (vectype);
3774 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3775 return expand_vec_cond_expr_p (op, vec_mode);
3781 scalar_dest = gimple_assign_lhs (stmt);
3782 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3784 /* Handle cond expr. */
3786 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3788 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3789 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3790 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3792 /* Arguments are ready. Create the new vector stmt. */
3793 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3794 vec_cond_lhs, vec_cond_rhs);
3795 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3796 vec_compare, vec_then_clause, vec_else_clause);
3798 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3799 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3800 gimple_assign_set_lhs (*vec_stmt, new_temp);
3801 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3807 /* Make sure the statement is vectorizable. */
3810 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize)
3812 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3813 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3816 if (vect_print_dump_info (REPORT_DETAILS))
3818 fprintf (vect_dump, "==> examining statement: ");
3819 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3822 /* Skip stmts that do not need to be vectorized. In loops this is expected
3824 - the COND_EXPR which is the loop exit condition
3825 - any LABEL_EXPRs in the loop
3826 - computations that are used only for array indexing or loop control.
3827 In basic blocks we only analyze statements that are a part of some SLP
3828 instance, therefore, all the statements are relevant. */
3830 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3831 && !STMT_VINFO_LIVE_P (stmt_info))
3833 if (vect_print_dump_info (REPORT_DETAILS))
3834 fprintf (vect_dump, "irrelevant.");
3839 switch (STMT_VINFO_DEF_TYPE (stmt_info))
3841 case vect_internal_def:
3844 case vect_reduction_def:
3845 gcc_assert (relevance == vect_used_in_outer
3846 || relevance == vect_used_in_outer_by_reduction
3847 || relevance == vect_unused_in_scope);
3850 case vect_induction_def:
3851 case vect_constant_def:
3852 case vect_external_def:
3853 case vect_unknown_def_type:
3858 if (STMT_VINFO_RELEVANT_P (stmt_info))
3860 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
3861 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
3862 *need_to_vectorize = true;
3866 if (STMT_VINFO_RELEVANT_P (stmt_info)
3867 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
3868 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
3869 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
3870 || vectorizable_conversion (stmt, NULL, NULL, NULL)
3871 || vectorizable_operation (stmt, NULL, NULL, NULL)
3872 || vectorizable_assignment (stmt, NULL, NULL, NULL)
3873 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
3874 || vectorizable_call (stmt, NULL, NULL)
3875 || vectorizable_store (stmt, NULL, NULL, NULL)
3876 || vectorizable_condition (stmt, NULL, NULL)
3877 || vectorizable_reduction (stmt, NULL, NULL));
3881 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3883 fprintf (vect_dump, "not vectorized: relevant stmt not ");
3884 fprintf (vect_dump, "supported: ");
3885 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3891 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
3892 need extra handling, except for vectorizable reductions. */
3893 if (STMT_VINFO_LIVE_P (stmt_info)
3894 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
3895 ok = vectorizable_live_operation (stmt, NULL, NULL);
3899 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3901 fprintf (vect_dump, "not vectorized: live stmt not ");
3902 fprintf (vect_dump, "supported: ");
3903 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3909 if (!PURE_SLP_STMT (stmt_info))
3911 /* Groups of strided accesses whose size is not a power of 2 are not
3912 vectorizable yet using loop-vectorization. Therefore, if this stmt
3913 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
3914 loop-based vectorized), the loop cannot be vectorized. */
3915 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
3916 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
3917 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
3919 if (vect_print_dump_info (REPORT_DETAILS))
3921 fprintf (vect_dump, "not vectorized: the size of group "
3922 "of strided accesses is not a power of 2");
3923 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3934 /* Function vect_transform_stmt.
3936 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3939 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
3940 bool *strided_store, slp_tree slp_node,
3941 slp_instance slp_node_instance)
3943 bool is_store = false;
3944 gimple vec_stmt = NULL;
3945 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3946 gimple orig_stmt_in_pattern;
3948 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3949 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3951 switch (STMT_VINFO_TYPE (stmt_info))
3953 case type_demotion_vec_info_type:
3954 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
3958 case type_promotion_vec_info_type:
3959 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
3963 case type_conversion_vec_info_type:
3964 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
3968 case induc_vec_info_type:
3969 gcc_assert (!slp_node);
3970 done = vectorizable_induction (stmt, gsi, &vec_stmt);
3974 case op_vec_info_type:
3975 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
3979 case assignment_vec_info_type:
3980 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
3984 case load_vec_info_type:
3985 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
3990 case store_vec_info_type:
3991 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
3993 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
3995 /* In case of interleaving, the whole chain is vectorized when the
3996 last store in the chain is reached. Store stmts before the last
3997 one are skipped, and there vec_stmt_info shouldn't be freed
3999 *strided_store = true;
4000 if (STMT_VINFO_VEC_STMT (stmt_info))
4007 case condition_vec_info_type:
4008 gcc_assert (!slp_node);
4009 done = vectorizable_condition (stmt, gsi, &vec_stmt);
4013 case call_vec_info_type:
4014 gcc_assert (!slp_node);
4015 done = vectorizable_call (stmt, gsi, &vec_stmt);
4018 case reduc_vec_info_type:
4019 gcc_assert (!slp_node);
4020 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4025 if (!STMT_VINFO_LIVE_P (stmt_info))
4027 if (vect_print_dump_info (REPORT_DETAILS))
4028 fprintf (vect_dump, "stmt not supported.");
4033 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4034 is being vectorized, but outside the immediately enclosing loop. */
4036 && nested_in_vect_loop_p (loop, stmt)
4037 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4038 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4039 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer_by_reduction))
4041 struct loop *innerloop = loop->inner;
4042 imm_use_iterator imm_iter;
4043 use_operand_p use_p;
4047 if (vect_print_dump_info (REPORT_DETAILS))
4048 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4050 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4051 (to be used when vectorizing outer-loop stmts that use the DEF of
4053 if (gimple_code (stmt) == GIMPLE_PHI)
4054 scalar_dest = PHI_RESULT (stmt);
4056 scalar_dest = gimple_assign_lhs (stmt);
4058 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4060 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4062 exit_phi = USE_STMT (use_p);
4063 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4068 /* Handle stmts whose DEF is used outside the loop-nest that is
4069 being vectorized. */
4070 if (STMT_VINFO_LIVE_P (stmt_info)
4071 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4073 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4079 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4080 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4081 if (orig_stmt_in_pattern)
4083 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4084 /* STMT was inserted by the vectorizer to replace a computation idiom.
4085 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4086 computed this idiom. We need to record a pointer to VEC_STMT in
4087 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4088 documentation of vect_pattern_recog. */
4089 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4091 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4092 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4101 /* Remove a group of stores (for SLP or interleaving), free their
4105 vect_remove_stores (gimple first_stmt)
4107 gimple next = first_stmt;
4109 gimple_stmt_iterator next_si;
4113 /* Free the attached stmt_vec_info and remove the stmt. */
4114 next_si = gsi_for_stmt (next);
4115 gsi_remove (&next_si, true);
4116 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4117 free_stmt_vec_info (next);
4123 /* Function new_stmt_vec_info.
4125 Create and initialize a new stmt_vec_info struct for STMT. */
4128 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo)
4131 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4133 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4134 STMT_VINFO_STMT (res) = stmt;
4135 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4136 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4137 STMT_VINFO_LIVE_P (res) = false;
4138 STMT_VINFO_VECTYPE (res) = NULL;
4139 STMT_VINFO_VEC_STMT (res) = NULL;
4140 STMT_VINFO_IN_PATTERN_P (res) = false;
4141 STMT_VINFO_RELATED_STMT (res) = NULL;
4142 STMT_VINFO_DATA_REF (res) = NULL;
4144 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4145 STMT_VINFO_DR_OFFSET (res) = NULL;
4146 STMT_VINFO_DR_INIT (res) = NULL;
4147 STMT_VINFO_DR_STEP (res) = NULL;
4148 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4150 if (gimple_code (stmt) == GIMPLE_PHI
4151 && is_loop_header_bb_p (gimple_bb (stmt)))
4152 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4154 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4156 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4157 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4158 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4159 STMT_SLP_TYPE (res) = loop_vect;
4160 DR_GROUP_FIRST_DR (res) = NULL;
4161 DR_GROUP_NEXT_DR (res) = NULL;
4162 DR_GROUP_SIZE (res) = 0;
4163 DR_GROUP_STORE_COUNT (res) = 0;
4164 DR_GROUP_GAP (res) = 0;
4165 DR_GROUP_SAME_DR_STMT (res) = NULL;
4166 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4172 /* Create a hash table for stmt_vec_info. */
4175 init_stmt_vec_info_vec (void)
4177 gcc_assert (!stmt_vec_info_vec);
4178 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4182 /* Free hash table for stmt_vec_info. */
4185 free_stmt_vec_info_vec (void)
4187 gcc_assert (stmt_vec_info_vec);
4188 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4192 /* Free stmt vectorization related info. */
4195 free_stmt_vec_info (gimple stmt)
4197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4202 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4203 set_vinfo_for_stmt (stmt, NULL);
4208 /* Function get_vectype_for_scalar_type.
4210 Returns the vector type corresponding to SCALAR_TYPE as supported
4214 get_vectype_for_scalar_type (tree scalar_type)
4216 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4217 int nbytes = GET_MODE_SIZE (inner_mode);
4221 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4224 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4226 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4228 vectype = build_vector_type (scalar_type, nunits);
4229 if (vect_print_dump_info (REPORT_DETAILS))
4231 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4232 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4238 if (vect_print_dump_info (REPORT_DETAILS))
4240 fprintf (vect_dump, "vectype: ");
4241 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4244 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4245 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4247 if (vect_print_dump_info (REPORT_DETAILS))
4248 fprintf (vect_dump, "mode not supported by target.");
4255 /* Function vect_is_simple_use.
4258 LOOP - the loop that is being vectorized.
4259 OPERAND - operand of a stmt in LOOP.
4260 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4262 Returns whether a stmt with OPERAND can be vectorized.
4263 Supportable operands are constants, loop invariants, and operands that are
4264 defined by the current iteration of the loop. Unsupportable operands are
4265 those that are defined by a previous iteration of the loop (as is the case
4266 in reduction/induction computations). */
4269 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
4270 tree *def, enum vect_def_type *dt)
4273 stmt_vec_info stmt_vinfo;
4274 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4279 if (vect_print_dump_info (REPORT_DETAILS))
4281 fprintf (vect_dump, "vect_is_simple_use: operand ");
4282 print_generic_expr (vect_dump, operand, TDF_SLIM);
4285 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4287 *dt = vect_constant_def;
4290 if (is_gimple_min_invariant (operand))
4293 *dt = vect_external_def;
4297 if (TREE_CODE (operand) == PAREN_EXPR)
4299 if (vect_print_dump_info (REPORT_DETAILS))
4300 fprintf (vect_dump, "non-associatable copy.");
4301 operand = TREE_OPERAND (operand, 0);
4303 if (TREE_CODE (operand) != SSA_NAME)
4305 if (vect_print_dump_info (REPORT_DETAILS))
4306 fprintf (vect_dump, "not ssa-name.");
4310 *def_stmt = SSA_NAME_DEF_STMT (operand);
4311 if (*def_stmt == NULL)
4313 if (vect_print_dump_info (REPORT_DETAILS))
4314 fprintf (vect_dump, "no def_stmt.");
4318 if (vect_print_dump_info (REPORT_DETAILS))
4320 fprintf (vect_dump, "def_stmt: ");
4321 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4324 /* Empty stmt is expected only in case of a function argument.
4325 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4326 if (gimple_nop_p (*def_stmt))
4329 *dt = vect_external_def;
4333 bb = gimple_bb (*def_stmt);
4334 if (!flow_bb_inside_loop_p (loop, bb))
4335 *dt = vect_external_def;
4338 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4339 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4342 if (*dt == vect_unknown_def_type)
4344 if (vect_print_dump_info (REPORT_DETAILS))
4345 fprintf (vect_dump, "Unsupported pattern.");
4349 if (vect_print_dump_info (REPORT_DETAILS))
4350 fprintf (vect_dump, "type of def: %d.",*dt);
4352 switch (gimple_code (*def_stmt))
4355 *def = gimple_phi_result (*def_stmt);
4359 *def = gimple_assign_lhs (*def_stmt);
4363 *def = gimple_call_lhs (*def_stmt);
4368 if (vect_print_dump_info (REPORT_DETAILS))
4369 fprintf (vect_dump, "unsupported defining stmt: ");
4377 /* Function supportable_widening_operation
4379 Check whether an operation represented by the code CODE is a
4380 widening operation that is supported by the target platform in
4381 vector form (i.e., when operating on arguments of type VECTYPE).
4383 Widening operations we currently support are NOP (CONVERT), FLOAT
4384 and WIDEN_MULT. This function checks if these operations are supported
4385 by the target platform either directly (via vector tree-codes), or via
4389 - CODE1 and CODE2 are codes of vector operations to be used when
4390 vectorizing the operation, if available.
4391 - DECL1 and DECL2 are decls of target builtin functions to be used
4392 when vectorizing the operation, if available. In this case,
4393 CODE1 and CODE2 are CALL_EXPR.
4394 - MULTI_STEP_CVT determines the number of required intermediate steps in
4395 case of multi-step conversion (like char->short->int - in that case
4396 MULTI_STEP_CVT will be 1).
4397 - INTERM_TYPES contains the intermediate type required to perform the
4398 widening operation (short in the above example). */
4401 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4402 tree *decl1, tree *decl2,
4403 enum tree_code *code1, enum tree_code *code2,
4404 int *multi_step_cvt,
4405 VEC (tree, heap) **interm_types)
4407 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4408 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4409 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4411 enum machine_mode vec_mode;
4412 enum insn_code icode1, icode2;
4413 optab optab1, optab2;
4414 tree type = gimple_expr_type (stmt);
4415 tree wide_vectype = get_vectype_for_scalar_type (type);
4416 enum tree_code c1, c2;
4418 /* The result of a vectorized widening operation usually requires two vectors
4419 (because the widened results do not fit int one vector). The generated
4420 vector results would normally be expected to be generated in the same
4421 order as in the original scalar computation, i.e. if 8 results are
4422 generated in each vector iteration, they are to be organized as follows:
4423 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4425 However, in the special case that the result of the widening operation is
4426 used in a reduction computation only, the order doesn't matter (because
4427 when vectorizing a reduction we change the order of the computation).
4428 Some targets can take advantage of this and generate more efficient code.
4429 For example, targets like Altivec, that support widen_mult using a sequence
4430 of {mult_even,mult_odd} generate the following vectors:
4431 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4433 When vectorizing outer-loops, we execute the inner-loop sequentially
4434 (each vectorized inner-loop iteration contributes to VF outer-loop
4435 iterations in parallel). We therefore don't allow to change the order
4436 of the computation in the inner-loop during outer-loop vectorization. */
4438 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4439 && !nested_in_vect_loop_p (vect_loop, stmt))
4445 && code == WIDEN_MULT_EXPR
4446 && targetm.vectorize.builtin_mul_widen_even
4447 && targetm.vectorize.builtin_mul_widen_even (vectype)
4448 && targetm.vectorize.builtin_mul_widen_odd
4449 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4451 if (vect_print_dump_info (REPORT_DETAILS))
4452 fprintf (vect_dump, "Unordered widening operation detected.");
4454 *code1 = *code2 = CALL_EXPR;
4455 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4456 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4462 case WIDEN_MULT_EXPR:
4463 if (BYTES_BIG_ENDIAN)
4465 c1 = VEC_WIDEN_MULT_HI_EXPR;
4466 c2 = VEC_WIDEN_MULT_LO_EXPR;
4470 c2 = VEC_WIDEN_MULT_HI_EXPR;
4471 c1 = VEC_WIDEN_MULT_LO_EXPR;
4476 if (BYTES_BIG_ENDIAN)
4478 c1 = VEC_UNPACK_HI_EXPR;
4479 c2 = VEC_UNPACK_LO_EXPR;
4483 c2 = VEC_UNPACK_HI_EXPR;
4484 c1 = VEC_UNPACK_LO_EXPR;
4489 if (BYTES_BIG_ENDIAN)
4491 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4492 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4496 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4497 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4501 case FIX_TRUNC_EXPR:
4502 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4503 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4504 computing the operation. */
4511 if (code == FIX_TRUNC_EXPR)
4513 /* The signedness is determined from output operand. */
4514 optab1 = optab_for_tree_code (c1, type, optab_default);
4515 optab2 = optab_for_tree_code (c2, type, optab_default);
4519 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4520 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4523 if (!optab1 || !optab2)
4526 vec_mode = TYPE_MODE (vectype);
4527 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4528 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4529 == CODE_FOR_nothing)
4532 /* Check if it's a multi-step conversion that can be done using intermediate
4534 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4535 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4538 tree prev_type = vectype, intermediate_type;
4539 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4540 optab optab3, optab4;
4542 if (!CONVERT_EXPR_CODE_P (code))
4548 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4549 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4550 to get to NARROW_VECTYPE, and fail if we do not. */
4551 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4552 for (i = 0; i < 3; i++)
4554 intermediate_mode = insn_data[icode1].operand[0].mode;
4555 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4556 TYPE_UNSIGNED (prev_type));
4557 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4558 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4560 if (!optab3 || !optab4
4561 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4563 || insn_data[icode1].operand[0].mode != intermediate_mode
4564 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4566 || insn_data[icode2].operand[0].mode != intermediate_mode
4567 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4569 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4570 == CODE_FOR_nothing)
4573 VEC_quick_push (tree, *interm_types, intermediate_type);
4574 (*multi_step_cvt)++;
4576 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4577 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4580 prev_type = intermediate_type;
4581 prev_mode = intermediate_mode;
4593 /* Function supportable_narrowing_operation
4595 Check whether an operation represented by the code CODE is a
4596 narrowing operation that is supported by the target platform in
4597 vector form (i.e., when operating on arguments of type VECTYPE).
4599 Narrowing operations we currently support are NOP (CONVERT) and
4600 FIX_TRUNC. This function checks if these operations are supported by
4601 the target platform directly via vector tree-codes.
4604 - CODE1 is the code of a vector operation to be used when
4605 vectorizing the operation, if available.
4606 - MULTI_STEP_CVT determines the number of required intermediate steps in
4607 case of multi-step conversion (like int->short->char - in that case
4608 MULTI_STEP_CVT will be 1).
4609 - INTERM_TYPES contains the intermediate type required to perform the
4610 narrowing operation (short in the above example). */
4613 supportable_narrowing_operation (enum tree_code code,
4614 const_gimple stmt, tree vectype,
4615 enum tree_code *code1, int *multi_step_cvt,
4616 VEC (tree, heap) **interm_types)
4618 enum machine_mode vec_mode;
4619 enum insn_code icode1;
4620 optab optab1, interm_optab;
4621 tree type = gimple_expr_type (stmt);
4622 tree narrow_vectype = get_vectype_for_scalar_type (type);
4624 tree intermediate_type, prev_type;
4630 c1 = VEC_PACK_TRUNC_EXPR;
4633 case FIX_TRUNC_EXPR:
4634 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4638 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4639 tree code and optabs used for computing the operation. */
4646 if (code == FIX_TRUNC_EXPR)
4647 /* The signedness is determined from output operand. */
4648 optab1 = optab_for_tree_code (c1, type, optab_default);
4650 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4655 vec_mode = TYPE_MODE (vectype);
4656 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4657 == CODE_FOR_nothing)
4660 /* Check if it's a multi-step conversion that can be done using intermediate
4662 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4664 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4667 prev_type = vectype;
4668 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4669 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4670 to get to NARROW_VECTYPE, and fail if we do not. */
4671 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4672 for (i = 0; i < 3; i++)
4674 intermediate_mode = insn_data[icode1].operand[0].mode;
4675 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4676 TYPE_UNSIGNED (prev_type));
4677 interm_optab = optab_for_tree_code (c1, intermediate_type,
4680 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4682 || insn_data[icode1].operand[0].mode != intermediate_mode
4684 = interm_optab->handlers[(int) intermediate_mode].insn_code)
4685 == CODE_FOR_nothing)
4688 VEC_quick_push (tree, *interm_types, intermediate_type);
4689 (*multi_step_cvt)++;
4691 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4694 prev_type = intermediate_type;
4695 prev_mode = intermediate_mode;