1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
47 /* Function vect_mark_relevant.
49 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
52 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
53 enum vect_relevant relevant, bool live_p)
55 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
56 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
57 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
59 if (vect_print_dump_info (REPORT_DETAILS))
60 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
62 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
66 /* This is the last stmt in a sequence that was detected as a
67 pattern that can potentially be vectorized. Don't mark the stmt
68 as relevant/live because it's not going to be vectorized.
69 Instead mark the pattern-stmt that replaces it. */
71 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73 if (vect_print_dump_info (REPORT_DETAILS))
74 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
75 stmt_info = vinfo_for_stmt (pattern_stmt);
76 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
77 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
78 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
82 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
83 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
84 STMT_VINFO_RELEVANT (stmt_info) = relevant;
86 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
87 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
89 if (vect_print_dump_info (REPORT_DETAILS))
90 fprintf (vect_dump, "already marked relevant/live.");
94 VEC_safe_push (gimple, heap, *worklist, stmt);
98 /* Function vect_stmt_relevant_p.
100 Return true if STMT in loop that is represented by LOOP_VINFO is
101 "relevant for vectorization".
103 A stmt is considered "relevant for vectorization" if:
104 - it has uses outside the loop.
105 - it has vdefs (it alters memory).
106 - control stmts in the loop (except for the exit condition).
108 CHECKME: what other side effects would the vectorizer allow? */
111 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
112 enum vect_relevant *relevant, bool *live_p)
114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
116 imm_use_iterator imm_iter;
120 *relevant = vect_unused_in_scope;
123 /* cond stmt other than loop exit cond. */
124 if (is_ctrl_stmt (stmt)
125 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
126 != loop_exit_ctrl_vec_info_type)
127 *relevant = vect_used_in_scope;
129 /* changing memory. */
130 if (gimple_code (stmt) != GIMPLE_PHI)
131 if (gimple_vdef (stmt))
133 if (vect_print_dump_info (REPORT_DETAILS))
134 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
135 *relevant = vect_used_in_scope;
138 /* uses outside the loop. */
139 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
141 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
143 basic_block bb = gimple_bb (USE_STMT (use_p));
144 if (!flow_bb_inside_loop_p (loop, bb))
146 if (vect_print_dump_info (REPORT_DETAILS))
147 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
149 if (is_gimple_debug (USE_STMT (use_p)))
152 /* We expect all such uses to be in the loop exit phis
153 (because of loop closed form) */
154 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
155 gcc_assert (bb == single_exit (loop)->dest);
162 return (*live_p || *relevant);
166 /* Function exist_non_indexing_operands_for_use_p
168 USE is one of the uses attached to STMT. Check if USE is
169 used in STMT for anything other than indexing an array. */
172 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
177 /* USE corresponds to some operand in STMT. If there is no data
178 reference in STMT, then any operand that corresponds to USE
179 is not indexing an array. */
180 if (!STMT_VINFO_DATA_REF (stmt_info))
183 /* STMT has a data_ref. FORNOW this means that its of one of
187 (This should have been verified in analyze_data_refs).
189 'var' in the second case corresponds to a def, not a use,
190 so USE cannot correspond to any operands that are not used
193 Therefore, all we need to check is if STMT falls into the
194 first case, and whether var corresponds to USE. */
196 if (!gimple_assign_copy_p (stmt))
198 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
200 operand = gimple_assign_rhs1 (stmt);
201 if (TREE_CODE (operand) != SSA_NAME)
212 Function process_use.
215 - a USE in STMT in a loop represented by LOOP_VINFO
216 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
217 that defined USE. This is done by calling mark_relevant and passing it
218 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
221 Generally, LIVE_P and RELEVANT are used to define the liveness and
222 relevance info of the DEF_STMT of this USE:
223 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
224 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
226 - case 1: If USE is used only for address computations (e.g. array indexing),
227 which does not need to be directly vectorized, then the liveness/relevance
228 of the respective DEF_STMT is left unchanged.
229 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
230 skip DEF_STMT cause it had already been processed.
231 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
232 be modified accordingly.
234 Return true if everything is as expected. Return false otherwise. */
237 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
238 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
241 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
242 stmt_vec_info dstmt_vinfo;
243 basic_block bb, def_bb;
246 enum vect_def_type dt;
248 /* case 1: we are only interested in uses that need to be vectorized. Uses
249 that are used for address computation are not considered relevant. */
250 if (!exist_non_indexing_operands_for_use_p (use, stmt))
253 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
255 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
256 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
260 if (!def_stmt || gimple_nop_p (def_stmt))
263 def_bb = gimple_bb (def_stmt);
264 if (!flow_bb_inside_loop_p (loop, def_bb))
266 if (vect_print_dump_info (REPORT_DETAILS))
267 fprintf (vect_dump, "def_stmt is out of loop.");
271 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
272 DEF_STMT must have already been processed, because this should be the
273 only way that STMT, which is a reduction-phi, was put in the worklist,
274 as there should be no other uses for DEF_STMT in the loop. So we just
275 check that everything is as expected, and we are done. */
276 dstmt_vinfo = vinfo_for_stmt (def_stmt);
277 bb = gimple_bb (stmt);
278 if (gimple_code (stmt) == GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
280 && gimple_code (def_stmt) != GIMPLE_PHI
281 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
282 && bb->loop_father == def_bb->loop_father)
284 if (vect_print_dump_info (REPORT_DETAILS))
285 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
286 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
287 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
288 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
289 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
290 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
294 /* case 3a: outer-loop stmt defining an inner-loop stmt:
295 outer-loop-header-bb:
301 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
303 if (vect_print_dump_info (REPORT_DETAILS))
304 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
308 case vect_unused_in_scope:
309 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
310 vect_used_in_scope : vect_unused_in_scope;
313 case vect_used_in_outer_by_reduction:
314 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
315 relevant = vect_used_by_reduction;
318 case vect_used_in_outer:
319 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
320 relevant = vect_used_in_scope;
323 case vect_used_in_scope:
331 /* case 3b: inner-loop stmt defining an outer-loop stmt:
332 outer-loop-header-bb:
336 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
338 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
340 if (vect_print_dump_info (REPORT_DETAILS))
341 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
345 case vect_unused_in_scope:
346 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
347 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
348 vect_used_in_outer_by_reduction : vect_unused_in_scope;
351 case vect_used_by_reduction:
352 relevant = vect_used_in_outer_by_reduction;
355 case vect_used_in_scope:
356 relevant = vect_used_in_outer;
364 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
369 /* Function vect_mark_stmts_to_be_vectorized.
371 Not all stmts in the loop need to be vectorized. For example:
380 Stmt 1 and 3 do not need to be vectorized, because loop control and
381 addressing of vectorized data-refs are handled differently.
383 This pass detects such stmts. */
386 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
388 VEC(gimple,heap) *worklist;
389 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
390 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
391 unsigned int nbbs = loop->num_nodes;
392 gimple_stmt_iterator si;
395 stmt_vec_info stmt_vinfo;
399 enum vect_relevant relevant, tmp_relevant;
400 enum vect_def_type def_type;
402 if (vect_print_dump_info (REPORT_DETAILS))
403 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
405 worklist = VEC_alloc (gimple, heap, 64);
407 /* 1. Init worklist. */
408 for (i = 0; i < nbbs; i++)
411 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
414 if (vect_print_dump_info (REPORT_DETAILS))
416 fprintf (vect_dump, "init: phi relevant? ");
417 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
420 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
421 vect_mark_relevant (&worklist, phi, relevant, live_p);
423 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
425 stmt = gsi_stmt (si);
426 if (vect_print_dump_info (REPORT_DETAILS))
428 fprintf (vect_dump, "init: stmt relevant? ");
429 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
432 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
433 vect_mark_relevant (&worklist, stmt, relevant, live_p);
437 /* 2. Process_worklist */
438 while (VEC_length (gimple, worklist) > 0)
443 stmt = VEC_pop (gimple, worklist);
444 if (vect_print_dump_info (REPORT_DETAILS))
446 fprintf (vect_dump, "worklist: examine stmt: ");
447 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
450 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
451 (DEF_STMT) as relevant/irrelevant and live/dead according to the
452 liveness and relevance properties of STMT. */
453 stmt_vinfo = vinfo_for_stmt (stmt);
454 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
455 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
457 /* Generally, the liveness and relevance properties of STMT are
458 propagated as is to the DEF_STMTs of its USEs:
459 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
460 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
462 One exception is when STMT has been identified as defining a reduction
463 variable; in this case we set the liveness/relevance as follows:
465 relevant = vect_used_by_reduction
466 This is because we distinguish between two kinds of relevant stmts -
467 those that are used by a reduction computation, and those that are
468 (also) used by a regular computation. This allows us later on to
469 identify stmts that are used solely by a reduction, and therefore the
470 order of the results that they produce does not have to be kept. */
472 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
473 tmp_relevant = relevant;
476 case vect_reduction_def:
477 switch (tmp_relevant)
479 case vect_unused_in_scope:
480 relevant = vect_used_by_reduction;
483 case vect_used_by_reduction:
484 if (gimple_code (stmt) == GIMPLE_PHI)
489 if (vect_print_dump_info (REPORT_DETAILS))
490 fprintf (vect_dump, "unsupported use of reduction.");
492 VEC_free (gimple, heap, worklist);
499 case vect_nested_cycle:
500 if (tmp_relevant != vect_unused_in_scope
501 && tmp_relevant != vect_used_in_outer_by_reduction
502 && tmp_relevant != vect_used_in_outer)
504 if (vect_print_dump_info (REPORT_DETAILS))
505 fprintf (vect_dump, "unsupported use of nested cycle.");
507 VEC_free (gimple, heap, worklist);
514 case vect_double_reduction_def:
515 if (tmp_relevant != vect_unused_in_scope
516 && tmp_relevant != vect_used_by_reduction)
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "unsupported use of double reduction.");
521 VEC_free (gimple, heap, worklist);
532 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
534 tree op = USE_FROM_PTR (use_p);
535 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
537 VEC_free (gimple, heap, worklist);
541 } /* while worklist */
543 VEC_free (gimple, heap, worklist);
548 /* Get cost by calling cost target builtin. */
551 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
553 tree dummy_type = NULL;
556 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
561 /* Get cost for STMT. */
564 cost_for_stmt (gimple stmt)
566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
568 switch (STMT_VINFO_TYPE (stmt_info))
570 case load_vec_info_type:
571 return vect_get_stmt_cost (scalar_load);
572 case store_vec_info_type:
573 return vect_get_stmt_cost (scalar_store);
574 case op_vec_info_type:
575 case condition_vec_info_type:
576 case assignment_vec_info_type:
577 case reduc_vec_info_type:
578 case induc_vec_info_type:
579 case type_promotion_vec_info_type:
580 case type_demotion_vec_info_type:
581 case type_conversion_vec_info_type:
582 case call_vec_info_type:
583 return vect_get_stmt_cost (scalar_stmt);
584 case undef_vec_info_type:
590 /* Function vect_model_simple_cost.
592 Models cost for simple operations, i.e. those that only emit ncopies of a
593 single op. Right now, this does not account for multiple insns that could
594 be generated for the single vector op. We will handle that shortly. */
597 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
598 enum vect_def_type *dt, slp_tree slp_node)
601 int inside_cost = 0, outside_cost = 0;
603 /* The SLP costs were already calculated during SLP tree build. */
604 if (PURE_SLP_STMT (stmt_info))
607 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
609 /* FORNOW: Assuming maximum 2 args per stmts. */
610 for (i = 0; i < 2; i++)
612 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
613 outside_cost += vect_get_stmt_cost (vector_stmt);
616 if (vect_print_dump_info (REPORT_COST))
617 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
618 "outside_cost = %d .", inside_cost, outside_cost);
620 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
621 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
622 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
626 /* Model cost for type demotion and promotion operations. PWR is normally
627 zero for single-step promotions and demotions. It will be one if
628 two-step promotion/demotion is required, and so on. Each additional
629 step doubles the number of instructions required. */
632 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
633 enum vect_def_type *dt, int pwr)
636 int inside_cost = 0, outside_cost = 0, single_stmt_cost;
638 /* The SLP costs were already calculated during SLP tree build. */
639 if (PURE_SLP_STMT (stmt_info))
642 single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
643 for (i = 0; i < pwr + 1; i++)
645 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
647 inside_cost += vect_pow2 (tmp) * single_stmt_cost;
650 /* FORNOW: Assuming maximum 2 args per stmts. */
651 for (i = 0; i < 2; i++)
653 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
654 outside_cost += vect_get_stmt_cost (vector_stmt);
657 if (vect_print_dump_info (REPORT_COST))
658 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
659 "outside_cost = %d .", inside_cost, outside_cost);
661 /* Set the costs in STMT_INFO. */
662 stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
663 stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
666 /* Function vect_cost_strided_group_size
668 For strided load or store, return the group_size only if it is the first
669 load or store of a group, else return 1. This ensures that group size is
670 only returned once per group. */
673 vect_cost_strided_group_size (stmt_vec_info stmt_info)
675 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
677 if (first_stmt == STMT_VINFO_STMT (stmt_info))
678 return DR_GROUP_SIZE (stmt_info);
684 /* Function vect_model_store_cost
686 Models cost for stores. In the case of strided accesses, one access
687 has the overhead of the strided access attributed to it. */
690 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
691 enum vect_def_type dt, slp_tree slp_node)
694 unsigned int inside_cost = 0, outside_cost = 0;
695 struct data_reference *first_dr;
698 /* The SLP costs were already calculated during SLP tree build. */
699 if (PURE_SLP_STMT (stmt_info))
702 if (dt == vect_constant_def || dt == vect_external_def)
703 outside_cost = vect_get_stmt_cost (scalar_to_vec);
705 /* Strided access? */
706 if (DR_GROUP_FIRST_DR (stmt_info))
710 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
715 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
716 group_size = vect_cost_strided_group_size (stmt_info);
719 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
721 /* Not a strided access. */
725 first_dr = STMT_VINFO_DATA_REF (stmt_info);
728 /* Is this an access in a group of stores, which provide strided access?
729 If so, add in the cost of the permutes. */
732 /* Uses a high and low interleave operation for each needed permute. */
733 inside_cost = ncopies * exact_log2(group_size) * group_size
734 * vect_get_stmt_cost (vec_perm);
736 if (vect_print_dump_info (REPORT_COST))
737 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
742 /* Costs of the stores. */
743 vect_get_store_cost (first_dr, ncopies, &inside_cost);
745 if (vect_print_dump_info (REPORT_COST))
746 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
747 "outside_cost = %d .", inside_cost, outside_cost);
749 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
750 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
751 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
755 /* Calculate cost of DR's memory access. */
757 vect_get_store_cost (struct data_reference *dr, int ncopies,
758 unsigned int *inside_cost)
760 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
762 switch (alignment_support_scheme)
766 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
768 if (vect_print_dump_info (REPORT_COST))
769 fprintf (vect_dump, "vect_model_store_cost: aligned.");
774 case dr_unaligned_supported:
776 gimple stmt = DR_STMT (dr);
777 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
778 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
780 /* Here, we assign an additional cost for the unaligned store. */
781 *inside_cost += ncopies
782 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
783 vectype, DR_MISALIGNMENT (dr));
785 if (vect_print_dump_info (REPORT_COST))
786 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
798 /* Function vect_model_load_cost
800 Models cost for loads. In the case of strided accesses, the last access
801 has the overhead of the strided access attributed to it. Since unaligned
802 accesses are supported for loads, we also account for the costs of the
803 access scheme chosen. */
806 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
811 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
812 unsigned int inside_cost = 0, outside_cost = 0;
814 /* The SLP costs were already calculated during SLP tree build. */
815 if (PURE_SLP_STMT (stmt_info))
818 /* Strided accesses? */
819 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
820 if (first_stmt && !slp_node)
822 group_size = vect_cost_strided_group_size (stmt_info);
823 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
825 /* Not a strided access. */
832 /* Is this an access in a group of loads providing strided access?
833 If so, add in the cost of the permutes. */
836 /* Uses an even and odd extract operations for each needed permute. */
837 inside_cost = ncopies * exact_log2(group_size) * group_size
838 * vect_get_stmt_cost (vec_perm);
840 if (vect_print_dump_info (REPORT_COST))
841 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
845 /* The loads themselves. */
846 vect_get_load_cost (first_dr, ncopies,
847 ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node),
848 &inside_cost, &outside_cost);
850 if (vect_print_dump_info (REPORT_COST))
851 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
852 "outside_cost = %d .", inside_cost, outside_cost);
854 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
855 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
856 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
860 /* Calculate cost of DR's memory access. */
862 vect_get_load_cost (struct data_reference *dr, int ncopies,
863 bool add_realign_cost, unsigned int *inside_cost,
864 unsigned int *outside_cost)
866 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
868 switch (alignment_support_scheme)
872 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
874 if (vect_print_dump_info (REPORT_COST))
875 fprintf (vect_dump, "vect_model_load_cost: aligned.");
879 case dr_unaligned_supported:
881 gimple stmt = DR_STMT (dr);
882 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
883 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
885 /* Here, we assign an additional cost for the unaligned load. */
886 *inside_cost += ncopies
887 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
888 vectype, DR_MISALIGNMENT (dr));
889 if (vect_print_dump_info (REPORT_COST))
890 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
895 case dr_explicit_realign:
897 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
898 + vect_get_stmt_cost (vec_perm));
900 /* FIXME: If the misalignment remains fixed across the iterations of
901 the containing loop, the following cost should be added to the
903 if (targetm.vectorize.builtin_mask_for_load)
904 *inside_cost += vect_get_stmt_cost (vector_stmt);
906 if (vect_print_dump_info (REPORT_COST))
907 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
911 case dr_explicit_realign_optimized:
913 if (vect_print_dump_info (REPORT_COST))
914 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
917 /* Unaligned software pipeline has a load of an address, an initial
918 load, and possibly a mask operation to "prime" the loop. However,
919 if this is an access in a group of loads, which provide strided
920 access, then the above cost should only be considered for one
921 access in the group. Inside the loop, there is a load op
922 and a realignment op. */
924 if (add_realign_cost)
926 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
927 if (targetm.vectorize.builtin_mask_for_load)
928 *outside_cost += vect_get_stmt_cost (vector_stmt);
931 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
932 + vect_get_stmt_cost (vec_perm));
934 if (vect_print_dump_info (REPORT_COST))
936 "vect_model_load_cost: explicit realign optimized");
947 /* Function vect_init_vector.
949 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
950 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
951 is not NULL. Otherwise, place the initialization at the loop preheader.
952 Return the DEF of INIT_STMT.
953 It will be used in the vectorization of STMT. */
956 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
957 gimple_stmt_iterator *gsi)
959 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
967 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
968 add_referenced_var (new_var);
969 init_stmt = gimple_build_assign (new_var, vector_var);
970 new_temp = make_ssa_name (new_var, init_stmt);
971 gimple_assign_set_lhs (init_stmt, new_temp);
974 vect_finish_stmt_generation (stmt, init_stmt, gsi);
977 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
981 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
983 if (nested_in_vect_loop_p (loop, stmt))
986 pe = loop_preheader_edge (loop);
987 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
988 gcc_assert (!new_bb);
992 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
994 gimple_stmt_iterator gsi_bb_start;
996 gcc_assert (bb_vinfo);
997 bb = BB_VINFO_BB (bb_vinfo);
998 gsi_bb_start = gsi_after_labels (bb);
999 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1003 if (vect_print_dump_info (REPORT_DETAILS))
1005 fprintf (vect_dump, "created new init_stmt: ");
1006 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1009 vec_oprnd = gimple_assign_lhs (init_stmt);
1014 /* Function vect_get_vec_def_for_operand.
1016 OP is an operand in STMT. This function returns a (vector) def that will be
1017 used in the vectorized stmt for STMT.
1019 In the case that OP is an SSA_NAME which is defined in the loop, then
1020 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1022 In case OP is an invariant or constant, a new stmt that creates a vector def
1023 needs to be introduced. */
1026 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1031 stmt_vec_info def_stmt_info = NULL;
1032 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1033 unsigned int nunits;
1034 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1040 enum vect_def_type dt;
1044 if (vect_print_dump_info (REPORT_DETAILS))
1046 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1047 print_generic_expr (vect_dump, op, TDF_SLIM);
1050 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1052 gcc_assert (is_simple_use);
1053 if (vect_print_dump_info (REPORT_DETAILS))
1057 fprintf (vect_dump, "def = ");
1058 print_generic_expr (vect_dump, def, TDF_SLIM);
1062 fprintf (vect_dump, " def_stmt = ");
1063 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1069 /* Case 1: operand is a constant. */
1070 case vect_constant_def:
1072 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1073 gcc_assert (vector_type);
1074 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1079 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1080 if (vect_print_dump_info (REPORT_DETAILS))
1081 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1083 vec_cst = build_vector_from_val (vector_type, op);
1084 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1087 /* Case 2: operand is defined outside the loop - loop invariant. */
1088 case vect_external_def:
1090 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1091 gcc_assert (vector_type);
1092 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1097 /* Create 'vec_inv = {inv,inv,..,inv}' */
1098 if (vect_print_dump_info (REPORT_DETAILS))
1099 fprintf (vect_dump, "Create vector_inv.");
1101 for (i = nunits - 1; i >= 0; --i)
1103 t = tree_cons (NULL_TREE, def, t);
1106 /* FIXME: use build_constructor directly. */
1107 vec_inv = build_constructor_from_list (vector_type, t);
1108 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1111 /* Case 3: operand is defined inside the loop. */
1112 case vect_internal_def:
1115 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1117 /* Get the def from the vectorized stmt. */
1118 def_stmt_info = vinfo_for_stmt (def_stmt);
1119 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1120 gcc_assert (vec_stmt);
1121 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1122 vec_oprnd = PHI_RESULT (vec_stmt);
1123 else if (is_gimple_call (vec_stmt))
1124 vec_oprnd = gimple_call_lhs (vec_stmt);
1126 vec_oprnd = gimple_assign_lhs (vec_stmt);
1130 /* Case 4: operand is defined by a loop header phi - reduction */
1131 case vect_reduction_def:
1132 case vect_double_reduction_def:
1133 case vect_nested_cycle:
1137 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1138 loop = (gimple_bb (def_stmt))->loop_father;
1140 /* Get the def before the loop */
1141 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1142 return get_initial_def_for_reduction (stmt, op, scalar_def);
1145 /* Case 5: operand is defined by loop-header phi - induction. */
1146 case vect_induction_def:
1148 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1150 /* Get the def from the vectorized stmt. */
1151 def_stmt_info = vinfo_for_stmt (def_stmt);
1152 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1153 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1154 vec_oprnd = PHI_RESULT (vec_stmt);
1156 vec_oprnd = gimple_get_lhs (vec_stmt);
1166 /* Function vect_get_vec_def_for_stmt_copy
1168 Return a vector-def for an operand. This function is used when the
1169 vectorized stmt to be created (by the caller to this function) is a "copy"
1170 created in case the vectorized result cannot fit in one vector, and several
1171 copies of the vector-stmt are required. In this case the vector-def is
1172 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1173 of the stmt that defines VEC_OPRND.
1174 DT is the type of the vector def VEC_OPRND.
1177 In case the vectorization factor (VF) is bigger than the number
1178 of elements that can fit in a vectype (nunits), we have to generate
1179 more than one vector stmt to vectorize the scalar stmt. This situation
1180 arises when there are multiple data-types operated upon in the loop; the
1181 smallest data-type determines the VF, and as a result, when vectorizing
1182 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1183 vector stmt (each computing a vector of 'nunits' results, and together
1184 computing 'VF' results in each iteration). This function is called when
1185 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1186 which VF=16 and nunits=4, so the number of copies required is 4):
1188 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1190 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1191 VS1.1: vx.1 = memref1 VS1.2
1192 VS1.2: vx.2 = memref2 VS1.3
1193 VS1.3: vx.3 = memref3
1195 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1196 VSnew.1: vz1 = vx.1 + ... VSnew.2
1197 VSnew.2: vz2 = vx.2 + ... VSnew.3
1198 VSnew.3: vz3 = vx.3 + ...
1200 The vectorization of S1 is explained in vectorizable_load.
1201 The vectorization of S2:
1202 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1203 the function 'vect_get_vec_def_for_operand' is called to
1204 get the relevant vector-def for each operand of S2. For operand x it
1205 returns the vector-def 'vx.0'.
1207 To create the remaining copies of the vector-stmt (VSnew.j), this
1208 function is called to get the relevant vector-def for each operand. It is
1209 obtained from the respective VS1.j stmt, which is recorded in the
1210 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1212 For example, to obtain the vector-def 'vx.1' in order to create the
1213 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1214 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1215 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1216 and return its def ('vx.1').
1217 Overall, to create the above sequence this function will be called 3 times:
1218 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1219 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1220 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1223 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1225 gimple vec_stmt_for_operand;
1226 stmt_vec_info def_stmt_info;
1228 /* Do nothing; can reuse same def. */
1229 if (dt == vect_external_def || dt == vect_constant_def )
1232 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1233 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1234 gcc_assert (def_stmt_info);
1235 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1236 gcc_assert (vec_stmt_for_operand);
1237 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1238 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1239 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1241 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1246 /* Get vectorized definitions for the operands to create a copy of an original
1247 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1250 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1251 VEC(tree,heap) **vec_oprnds0,
1252 VEC(tree,heap) **vec_oprnds1)
1254 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1256 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1257 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1259 if (vec_oprnds1 && *vec_oprnds1)
1261 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1262 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1263 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1268 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1272 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1273 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1277 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1282 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1283 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1284 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1288 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1289 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1290 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1296 /* Function vect_finish_stmt_generation.
1298 Insert a new stmt. */
1301 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1302 gimple_stmt_iterator *gsi)
1304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1305 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1306 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1308 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1310 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1312 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1315 if (vect_print_dump_info (REPORT_DETAILS))
1317 fprintf (vect_dump, "add new stmt: ");
1318 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1321 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1324 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1325 a function declaration if the target has a vectorized version
1326 of the function, or NULL_TREE if the function cannot be vectorized. */
1329 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1331 tree fndecl = gimple_call_fndecl (call);
1333 /* We only handle functions that do not read or clobber memory -- i.e.
1334 const or novops ones. */
1335 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1339 || TREE_CODE (fndecl) != FUNCTION_DECL
1340 || !DECL_BUILT_IN (fndecl))
1343 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1347 /* Function vectorizable_call.
1349 Check if STMT performs a function call that can be vectorized.
1350 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1351 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1352 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1355 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1360 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1362 tree vectype_out, vectype_in;
1365 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1366 tree fndecl, new_temp, def, rhs_type;
1368 enum vect_def_type dt[3]
1369 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1370 gimple new_stmt = NULL;
1372 VEC(tree, heap) *vargs = NULL;
1373 enum { NARROW, NONE, WIDEN } modifier;
1376 /* FORNOW: unsupported in basic block SLP. */
1377 gcc_assert (loop_vinfo);
1379 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1382 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1385 /* FORNOW: SLP not supported. */
1386 if (STMT_SLP_TYPE (stmt_info))
1389 /* Is STMT a vectorizable call? */
1390 if (!is_gimple_call (stmt))
1393 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1396 if (stmt_can_throw_internal (stmt))
1399 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1401 /* Process function arguments. */
1402 rhs_type = NULL_TREE;
1403 vectype_in = NULL_TREE;
1404 nargs = gimple_call_num_args (stmt);
1406 /* Bail out if the function has more than three arguments, we do not have
1407 interesting builtin functions to vectorize with more than two arguments
1408 except for fma. No arguments is also not good. */
1409 if (nargs == 0 || nargs > 3)
1412 for (i = 0; i < nargs; i++)
1416 op = gimple_call_arg (stmt, i);
1418 /* We can only handle calls with arguments of the same type. */
1420 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1422 if (vect_print_dump_info (REPORT_DETAILS))
1423 fprintf (vect_dump, "argument types differ.");
1427 rhs_type = TREE_TYPE (op);
1429 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1430 &def_stmt, &def, &dt[i], &opvectype))
1432 if (vect_print_dump_info (REPORT_DETAILS))
1433 fprintf (vect_dump, "use not simple.");
1438 vectype_in = opvectype;
1440 && opvectype != vectype_in)
1442 if (vect_print_dump_info (REPORT_DETAILS))
1443 fprintf (vect_dump, "argument vector types differ.");
1447 /* If all arguments are external or constant defs use a vector type with
1448 the same size as the output vector type. */
1450 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1452 gcc_assert (vectype_in);
1455 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "no vectype for scalar type ");
1458 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1465 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1466 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1467 if (nunits_in == nunits_out / 2)
1469 else if (nunits_out == nunits_in)
1471 else if (nunits_out == nunits_in / 2)
1476 /* For now, we only vectorize functions if a target specific builtin
1477 is available. TODO -- in some cases, it might be profitable to
1478 insert the calls for pieces of the vector, in order to be able
1479 to vectorize other operations in the loop. */
1480 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1481 if (fndecl == NULL_TREE)
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "function is not vectorizable.");
1489 gcc_assert (!gimple_vuse (stmt));
1491 if (modifier == NARROW)
1492 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1494 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1496 /* Sanity check: make sure that at least one copy of the vectorized stmt
1497 needs to be generated. */
1498 gcc_assert (ncopies >= 1);
1500 if (!vec_stmt) /* transformation not required. */
1502 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1503 if (vect_print_dump_info (REPORT_DETAILS))
1504 fprintf (vect_dump, "=== vectorizable_call ===");
1505 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1511 if (vect_print_dump_info (REPORT_DETAILS))
1512 fprintf (vect_dump, "transform operation.");
1515 scalar_dest = gimple_call_lhs (stmt);
1516 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1518 prev_stmt_info = NULL;
1522 for (j = 0; j < ncopies; ++j)
1524 /* Build argument list for the vectorized call. */
1526 vargs = VEC_alloc (tree, heap, nargs);
1528 VEC_truncate (tree, vargs, 0);
1530 for (i = 0; i < nargs; i++)
1532 op = gimple_call_arg (stmt, i);
1535 = vect_get_vec_def_for_operand (op, stmt, NULL);
1538 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1540 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1543 VEC_quick_push (tree, vargs, vec_oprnd0);
1546 new_stmt = gimple_build_call_vec (fndecl, vargs);
1547 new_temp = make_ssa_name (vec_dest, new_stmt);
1548 gimple_call_set_lhs (new_stmt, new_temp);
1550 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1551 mark_symbols_for_renaming (new_stmt);
1554 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1556 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1558 prev_stmt_info = vinfo_for_stmt (new_stmt);
1564 for (j = 0; j < ncopies; ++j)
1566 /* Build argument list for the vectorized call. */
1568 vargs = VEC_alloc (tree, heap, nargs * 2);
1570 VEC_truncate (tree, vargs, 0);
1572 for (i = 0; i < nargs; i++)
1574 op = gimple_call_arg (stmt, i);
1578 = vect_get_vec_def_for_operand (op, stmt, NULL);
1580 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1584 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1586 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1588 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1591 VEC_quick_push (tree, vargs, vec_oprnd0);
1592 VEC_quick_push (tree, vargs, vec_oprnd1);
1595 new_stmt = gimple_build_call_vec (fndecl, vargs);
1596 new_temp = make_ssa_name (vec_dest, new_stmt);
1597 gimple_call_set_lhs (new_stmt, new_temp);
1599 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1600 mark_symbols_for_renaming (new_stmt);
1603 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1605 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1607 prev_stmt_info = vinfo_for_stmt (new_stmt);
1610 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1615 /* No current target implements this case. */
1619 VEC_free (tree, heap, vargs);
1621 /* Update the exception handling table with the vector stmt if necessary. */
1622 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1623 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1625 /* The call in STMT might prevent it from being removed in dce.
1626 We however cannot remove it here, due to the way the ssa name
1627 it defines is mapped to the new definition. So just replace
1628 rhs of the statement with something harmless. */
1630 type = TREE_TYPE (scalar_dest);
1631 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1632 build_zero_cst (type));
1633 set_vinfo_for_stmt (new_stmt, stmt_info);
1634 /* For pattern statements make the related statement to point to
1635 NEW_STMT in order to be able to retrieve the original statement
1636 information later. */
1637 if (is_pattern_stmt_p (stmt_info))
1639 gimple related = STMT_VINFO_RELATED_STMT (stmt_info);
1640 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (related)) = new_stmt;
1642 set_vinfo_for_stmt (stmt, NULL);
1643 STMT_VINFO_STMT (stmt_info) = new_stmt;
1644 gsi_replace (gsi, new_stmt, false);
1645 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1651 /* Function vect_gen_widened_results_half
1653 Create a vector stmt whose code, type, number of arguments, and result
1654 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1655 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1656 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1657 needs to be created (DECL is a function-decl of a target-builtin).
1658 STMT is the original scalar stmt that we are vectorizing. */
1661 vect_gen_widened_results_half (enum tree_code code,
1663 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1664 tree vec_dest, gimple_stmt_iterator *gsi,
1670 /* Generate half of the widened result: */
1671 if (code == CALL_EXPR)
1673 /* Target specific support */
1674 if (op_type == binary_op)
1675 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1677 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1678 new_temp = make_ssa_name (vec_dest, new_stmt);
1679 gimple_call_set_lhs (new_stmt, new_temp);
1683 /* Generic support */
1684 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1685 if (op_type != binary_op)
1687 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1689 new_temp = make_ssa_name (vec_dest, new_stmt);
1690 gimple_assign_set_lhs (new_stmt, new_temp);
1692 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1698 /* Check if STMT performs a conversion operation, that can be vectorized.
1699 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1700 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1701 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1704 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1705 gimple *vec_stmt, slp_tree slp_node)
1710 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1711 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1712 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1713 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1714 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1718 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1719 gimple new_stmt = NULL;
1720 stmt_vec_info prev_stmt_info;
1723 tree vectype_out, vectype_in;
1727 enum { NARROW, NONE, WIDEN } modifier;
1729 VEC(tree,heap) *vec_oprnds0 = NULL;
1731 VEC(tree,heap) *dummy = NULL;
1734 /* Is STMT a vectorizable conversion? */
1736 /* FORNOW: unsupported in basic block SLP. */
1737 gcc_assert (loop_vinfo);
1739 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1742 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1745 if (!is_gimple_assign (stmt))
1748 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1751 code = gimple_assign_rhs_code (stmt);
1752 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1755 /* Check types of lhs and rhs. */
1756 scalar_dest = gimple_assign_lhs (stmt);
1757 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1759 op0 = gimple_assign_rhs1 (stmt);
1760 rhs_type = TREE_TYPE (op0);
1761 /* Check the operands of the operation. */
1762 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1763 &def_stmt, &def, &dt[0], &vectype_in))
1765 if (vect_print_dump_info (REPORT_DETAILS))
1766 fprintf (vect_dump, "use not simple.");
1769 /* If op0 is an external or constant defs use a vector type of
1770 the same size as the output vector type. */
1772 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1774 gcc_assert (vectype_in);
1777 if (vect_print_dump_info (REPORT_DETAILS))
1779 fprintf (vect_dump, "no vectype for scalar type ");
1780 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1787 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1788 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1789 if (nunits_in == nunits_out / 2)
1791 else if (nunits_out == nunits_in)
1793 else if (nunits_out == nunits_in / 2)
1798 if (modifier == NARROW)
1799 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1801 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1803 /* Multiple types in SLP are handled by creating the appropriate number of
1804 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1809 /* Sanity check: make sure that at least one copy of the vectorized stmt
1810 needs to be generated. */
1811 gcc_assert (ncopies >= 1);
1813 /* Supportable by target? */
1814 if ((modifier == NONE
1815 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1816 || (modifier == WIDEN
1817 && !supportable_widening_operation (code, stmt,
1818 vectype_out, vectype_in,
1821 &dummy_int, &dummy))
1822 || (modifier == NARROW
1823 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1824 &code1, &dummy_int, &dummy)))
1826 if (vect_print_dump_info (REPORT_DETAILS))
1827 fprintf (vect_dump, "conversion not supported by target.");
1831 if (modifier != NONE)
1833 /* FORNOW: SLP not supported. */
1834 if (STMT_SLP_TYPE (stmt_info))
1838 if (!vec_stmt) /* transformation not required. */
1840 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1845 if (vect_print_dump_info (REPORT_DETAILS))
1846 fprintf (vect_dump, "transform conversion.");
1849 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1851 if (modifier == NONE && !slp_node)
1852 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1854 prev_stmt_info = NULL;
1858 for (j = 0; j < ncopies; j++)
1861 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1863 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1866 targetm.vectorize.builtin_conversion (code,
1867 vectype_out, vectype_in);
1868 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1870 /* Arguments are ready. create the new vector stmt. */
1871 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1872 new_temp = make_ssa_name (vec_dest, new_stmt);
1873 gimple_call_set_lhs (new_stmt, new_temp);
1874 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1876 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1880 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1882 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1883 prev_stmt_info = vinfo_for_stmt (new_stmt);
1888 /* In case the vectorization factor (VF) is bigger than the number
1889 of elements that we can fit in a vectype (nunits), we have to
1890 generate more than one vector stmt - i.e - we need to "unroll"
1891 the vector stmt by a factor VF/nunits. */
1892 for (j = 0; j < ncopies; j++)
1895 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1897 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1899 /* Generate first half of the widened result: */
1901 = vect_gen_widened_results_half (code1, decl1,
1902 vec_oprnd0, vec_oprnd1,
1903 unary_op, vec_dest, gsi, stmt);
1905 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1907 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1908 prev_stmt_info = vinfo_for_stmt (new_stmt);
1910 /* Generate second half of the widened result: */
1912 = vect_gen_widened_results_half (code2, decl2,
1913 vec_oprnd0, vec_oprnd1,
1914 unary_op, vec_dest, gsi, stmt);
1915 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1916 prev_stmt_info = vinfo_for_stmt (new_stmt);
1921 /* In case the vectorization factor (VF) is bigger than the number
1922 of elements that we can fit in a vectype (nunits), we have to
1923 generate more than one vector stmt - i.e - we need to "unroll"
1924 the vector stmt by a factor VF/nunits. */
1925 for (j = 0; j < ncopies; j++)
1930 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1931 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1935 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1936 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1939 /* Arguments are ready. Create the new vector stmt. */
1940 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1942 new_temp = make_ssa_name (vec_dest, new_stmt);
1943 gimple_assign_set_lhs (new_stmt, new_temp);
1944 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1947 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1949 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1951 prev_stmt_info = vinfo_for_stmt (new_stmt);
1954 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1958 VEC_free (tree, heap, vec_oprnds0);
1964 /* Function vectorizable_assignment.
1966 Check if STMT performs an assignment (copy) that can be vectorized.
1967 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1968 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1969 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1972 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1973 gimple *vec_stmt, slp_tree slp_node)
1978 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1979 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1980 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1984 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1985 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1988 VEC(tree,heap) *vec_oprnds = NULL;
1990 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1991 gimple new_stmt = NULL;
1992 stmt_vec_info prev_stmt_info = NULL;
1993 enum tree_code code;
1996 /* Multiple types in SLP are handled by creating the appropriate number of
1997 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2002 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2004 gcc_assert (ncopies >= 1);
2006 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2009 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2012 /* Is vectorizable assignment? */
2013 if (!is_gimple_assign (stmt))
2016 scalar_dest = gimple_assign_lhs (stmt);
2017 if (TREE_CODE (scalar_dest) != SSA_NAME)
2020 code = gimple_assign_rhs_code (stmt);
2021 if (gimple_assign_single_p (stmt)
2022 || code == PAREN_EXPR
2023 || CONVERT_EXPR_CODE_P (code))
2024 op = gimple_assign_rhs1 (stmt);
2028 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2029 &def_stmt, &def, &dt[0], &vectype_in))
2031 if (vect_print_dump_info (REPORT_DETAILS))
2032 fprintf (vect_dump, "use not simple.");
2036 /* We can handle NOP_EXPR conversions that do not change the number
2037 of elements or the vector size. */
2038 if (CONVERT_EXPR_CODE_P (code)
2040 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2041 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2042 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2045 if (!vec_stmt) /* transformation not required. */
2047 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2048 if (vect_print_dump_info (REPORT_DETAILS))
2049 fprintf (vect_dump, "=== vectorizable_assignment ===");
2050 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "transform assignment.");
2059 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2062 for (j = 0; j < ncopies; j++)
2066 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2068 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2070 /* Arguments are ready. create the new vector stmt. */
2071 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2073 if (CONVERT_EXPR_CODE_P (code))
2074 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2075 new_stmt = gimple_build_assign (vec_dest, vop);
2076 new_temp = make_ssa_name (vec_dest, new_stmt);
2077 gimple_assign_set_lhs (new_stmt, new_temp);
2078 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2080 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2087 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2091 prev_stmt_info = vinfo_for_stmt (new_stmt);
2094 VEC_free (tree, heap, vec_oprnds);
2099 /* Function vectorizable_shift.
2101 Check if STMT performs a shift operation that can be vectorized.
2102 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2103 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2104 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2107 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2108 gimple *vec_stmt, slp_tree slp_node)
2112 tree op0, op1 = NULL;
2113 tree vec_oprnd1 = NULL_TREE;
2114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2116 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2117 enum tree_code code;
2118 enum machine_mode vec_mode;
2122 enum machine_mode optab_op2_mode;
2125 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2126 gimple new_stmt = NULL;
2127 stmt_vec_info prev_stmt_info;
2133 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2136 bool scalar_shift_arg = true;
2137 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2140 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2143 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2146 /* Is STMT a vectorizable binary/unary operation? */
2147 if (!is_gimple_assign (stmt))
2150 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2153 code = gimple_assign_rhs_code (stmt);
2155 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2156 || code == RROTATE_EXPR))
2159 scalar_dest = gimple_assign_lhs (stmt);
2160 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2162 op0 = gimple_assign_rhs1 (stmt);
2163 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2164 &def_stmt, &def, &dt[0], &vectype))
2166 if (vect_print_dump_info (REPORT_DETAILS))
2167 fprintf (vect_dump, "use not simple.");
2170 /* If op0 is an external or constant def use a vector type with
2171 the same size as the output vector type. */
2173 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2175 gcc_assert (vectype);
2178 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "no vectype for scalar type ");
2181 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2187 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2188 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2189 if (nunits_out != nunits_in)
2192 op1 = gimple_assign_rhs2 (stmt);
2193 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2195 if (vect_print_dump_info (REPORT_DETAILS))
2196 fprintf (vect_dump, "use not simple.");
2201 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2205 /* Multiple types in SLP are handled by creating the appropriate number of
2206 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2211 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2213 gcc_assert (ncopies >= 1);
2215 /* Determine whether the shift amount is a vector, or scalar. If the
2216 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2218 if (dt[1] == vect_internal_def && !slp_node)
2219 scalar_shift_arg = false;
2220 else if (dt[1] == vect_constant_def
2221 || dt[1] == vect_external_def
2222 || dt[1] == vect_internal_def)
2224 /* In SLP, need to check whether the shift count is the same,
2225 in loops if it is a constant or invariant, it is always
2229 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2232 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2233 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2234 scalar_shift_arg = false;
2239 if (vect_print_dump_info (REPORT_DETAILS))
2240 fprintf (vect_dump, "operand mode requires invariant argument.");
2244 /* Vector shifted by vector. */
2245 if (!scalar_shift_arg)
2247 optab = optab_for_tree_code (code, vectype, optab_vector);
2248 if (vect_print_dump_info (REPORT_DETAILS))
2249 fprintf (vect_dump, "vector/vector shift/rotate found.");
2251 /* See if the machine has a vector shifted by scalar insn and if not
2252 then see if it has a vector shifted by vector insn. */
2255 optab = optab_for_tree_code (code, vectype, optab_scalar);
2257 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2259 if (vect_print_dump_info (REPORT_DETAILS))
2260 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2264 optab = optab_for_tree_code (code, vectype, optab_vector);
2266 && (optab_handler (optab, TYPE_MODE (vectype))
2267 != CODE_FOR_nothing))
2269 scalar_shift_arg = false;
2271 if (vect_print_dump_info (REPORT_DETAILS))
2272 fprintf (vect_dump, "vector/vector shift/rotate found.");
2274 /* Unlike the other binary operators, shifts/rotates have
2275 the rhs being int, instead of the same type as the lhs,
2276 so make sure the scalar is the right type if we are
2277 dealing with vectors of short/char. */
2278 if (dt[1] == vect_constant_def)
2279 op1 = fold_convert (TREE_TYPE (vectype), op1);
2284 /* Supportable by target? */
2287 if (vect_print_dump_info (REPORT_DETAILS))
2288 fprintf (vect_dump, "no optab.");
2291 vec_mode = TYPE_MODE (vectype);
2292 icode = (int) optab_handler (optab, vec_mode);
2293 if (icode == CODE_FOR_nothing)
2295 if (vect_print_dump_info (REPORT_DETAILS))
2296 fprintf (vect_dump, "op not supported by target.");
2297 /* Check only during analysis. */
2298 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2299 || (vf < vect_min_worthwhile_factor (code)
2302 if (vect_print_dump_info (REPORT_DETAILS))
2303 fprintf (vect_dump, "proceeding using word mode.");
2306 /* Worthwhile without SIMD support? Check only during analysis. */
2307 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2308 && vf < vect_min_worthwhile_factor (code)
2311 if (vect_print_dump_info (REPORT_DETAILS))
2312 fprintf (vect_dump, "not worthwhile without SIMD support.");
2316 if (!vec_stmt) /* transformation not required. */
2318 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2319 if (vect_print_dump_info (REPORT_DETAILS))
2320 fprintf (vect_dump, "=== vectorizable_shift ===");
2321 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2327 if (vect_print_dump_info (REPORT_DETAILS))
2328 fprintf (vect_dump, "transform binary/unary operation.");
2331 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2333 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2334 created in the previous stages of the recursion, so no allocation is
2335 needed, except for the case of shift with scalar shift argument. In that
2336 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2337 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2338 In case of loop-based vectorization we allocate VECs of size 1. We
2339 allocate VEC_OPRNDS1 only in case of binary operation. */
2342 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2343 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2345 else if (scalar_shift_arg)
2346 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2348 prev_stmt_info = NULL;
2349 for (j = 0; j < ncopies; j++)
2354 if (scalar_shift_arg)
2356 /* Vector shl and shr insn patterns can be defined with scalar
2357 operand 2 (shift operand). In this case, use constant or loop
2358 invariant op1 directly, without extending it to vector mode
2360 optab_op2_mode = insn_data[icode].operand[2].mode;
2361 if (!VECTOR_MODE_P (optab_op2_mode))
2363 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "operand 1 using scalar mode.");
2366 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2369 /* Store vec_oprnd1 for every vector stmt to be created
2370 for SLP_NODE. We check during the analysis that all
2371 the shift arguments are the same.
2372 TODO: Allow different constants for different vector
2373 stmts generated for an SLP instance. */
2374 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2375 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2380 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2381 (a special case for certain kind of vector shifts); otherwise,
2382 operand 1 should be of a vector type (the usual case). */
2384 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2387 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2391 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2393 /* Arguments are ready. Create the new vector stmt. */
2394 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2396 vop1 = VEC_index (tree, vec_oprnds1, i);
2397 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2398 new_temp = make_ssa_name (vec_dest, new_stmt);
2399 gimple_assign_set_lhs (new_stmt, new_temp);
2400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2402 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2409 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2411 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2412 prev_stmt_info = vinfo_for_stmt (new_stmt);
2415 VEC_free (tree, heap, vec_oprnds0);
2416 VEC_free (tree, heap, vec_oprnds1);
2422 /* Function vectorizable_operation.
2424 Check if STMT performs a binary, unary or ternary operation that can
2426 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2427 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2428 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2431 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2432 gimple *vec_stmt, slp_tree slp_node)
2436 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2437 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2439 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2440 enum tree_code code;
2441 enum machine_mode vec_mode;
2448 enum vect_def_type dt[3]
2449 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2450 gimple new_stmt = NULL;
2451 stmt_vec_info prev_stmt_info;
2457 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2458 tree vop0, vop1, vop2;
2459 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2462 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2465 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2468 /* Is STMT a vectorizable binary/unary operation? */
2469 if (!is_gimple_assign (stmt))
2472 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2475 code = gimple_assign_rhs_code (stmt);
2477 /* For pointer addition, we should use the normal plus for
2478 the vector addition. */
2479 if (code == POINTER_PLUS_EXPR)
2482 /* Support only unary or binary operations. */
2483 op_type = TREE_CODE_LENGTH (code);
2484 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2486 if (vect_print_dump_info (REPORT_DETAILS))
2487 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2492 scalar_dest = gimple_assign_lhs (stmt);
2493 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2495 op0 = gimple_assign_rhs1 (stmt);
2496 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2497 &def_stmt, &def, &dt[0], &vectype))
2499 if (vect_print_dump_info (REPORT_DETAILS))
2500 fprintf (vect_dump, "use not simple.");
2503 /* If op0 is an external or constant def use a vector type with
2504 the same size as the output vector type. */
2506 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2508 gcc_assert (vectype);
2511 if (vect_print_dump_info (REPORT_DETAILS))
2513 fprintf (vect_dump, "no vectype for scalar type ");
2514 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2520 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2521 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2522 if (nunits_out != nunits_in)
2525 if (op_type == binary_op || op_type == ternary_op)
2527 op1 = gimple_assign_rhs2 (stmt);
2528 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2531 if (vect_print_dump_info (REPORT_DETAILS))
2532 fprintf (vect_dump, "use not simple.");
2536 if (op_type == ternary_op)
2538 op2 = gimple_assign_rhs3 (stmt);
2539 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2542 if (vect_print_dump_info (REPORT_DETAILS))
2543 fprintf (vect_dump, "use not simple.");
2549 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2553 /* Multiple types in SLP are handled by creating the appropriate number of
2554 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2559 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2561 gcc_assert (ncopies >= 1);
2563 /* Shifts are handled in vectorizable_shift (). */
2564 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2565 || code == RROTATE_EXPR)
2568 optab = optab_for_tree_code (code, vectype, optab_default);
2570 /* Supportable by target? */
2573 if (vect_print_dump_info (REPORT_DETAILS))
2574 fprintf (vect_dump, "no optab.");
2577 vec_mode = TYPE_MODE (vectype);
2578 icode = (int) optab_handler (optab, vec_mode);
2579 if (icode == CODE_FOR_nothing)
2581 if (vect_print_dump_info (REPORT_DETAILS))
2582 fprintf (vect_dump, "op not supported by target.");
2583 /* Check only during analysis. */
2584 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2585 || (vf < vect_min_worthwhile_factor (code)
2588 if (vect_print_dump_info (REPORT_DETAILS))
2589 fprintf (vect_dump, "proceeding using word mode.");
2592 /* Worthwhile without SIMD support? Check only during analysis. */
2593 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2594 && vf < vect_min_worthwhile_factor (code)
2597 if (vect_print_dump_info (REPORT_DETAILS))
2598 fprintf (vect_dump, "not worthwhile without SIMD support.");
2602 if (!vec_stmt) /* transformation not required. */
2604 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2605 if (vect_print_dump_info (REPORT_DETAILS))
2606 fprintf (vect_dump, "=== vectorizable_operation ===");
2607 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2613 if (vect_print_dump_info (REPORT_DETAILS))
2614 fprintf (vect_dump, "transform binary/unary operation.");
2617 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2619 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2620 created in the previous stages of the recursion, so no allocation is
2621 needed, except for the case of shift with scalar shift argument. In that
2622 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2623 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2624 In case of loop-based vectorization we allocate VECs of size 1. We
2625 allocate VEC_OPRNDS1 only in case of binary operation. */
2628 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2629 if (op_type == binary_op || op_type == ternary_op)
2630 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2631 if (op_type == ternary_op)
2632 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2635 /* In case the vectorization factor (VF) is bigger than the number
2636 of elements that we can fit in a vectype (nunits), we have to generate
2637 more than one vector stmt - i.e - we need to "unroll" the
2638 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2639 from one copy of the vector stmt to the next, in the field
2640 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2641 stages to find the correct vector defs to be used when vectorizing
2642 stmts that use the defs of the current stmt. The example below
2643 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2644 we need to create 4 vectorized stmts):
2646 before vectorization:
2647 RELATED_STMT VEC_STMT
2651 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2653 RELATED_STMT VEC_STMT
2654 VS1_0: vx0 = memref0 VS1_1 -
2655 VS1_1: vx1 = memref1 VS1_2 -
2656 VS1_2: vx2 = memref2 VS1_3 -
2657 VS1_3: vx3 = memref3 - -
2658 S1: x = load - VS1_0
2661 step2: vectorize stmt S2 (done here):
2662 To vectorize stmt S2 we first need to find the relevant vector
2663 def for the first operand 'x'. This is, as usual, obtained from
2664 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2665 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2666 relevant vector def 'vx0'. Having found 'vx0' we can generate
2667 the vector stmt VS2_0, and as usual, record it in the
2668 STMT_VINFO_VEC_STMT of stmt S2.
2669 When creating the second copy (VS2_1), we obtain the relevant vector
2670 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2671 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2672 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2673 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2674 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2675 chain of stmts and pointers:
2676 RELATED_STMT VEC_STMT
2677 VS1_0: vx0 = memref0 VS1_1 -
2678 VS1_1: vx1 = memref1 VS1_2 -
2679 VS1_2: vx2 = memref2 VS1_3 -
2680 VS1_3: vx3 = memref3 - -
2681 S1: x = load - VS1_0
2682 VS2_0: vz0 = vx0 + v1 VS2_1 -
2683 VS2_1: vz1 = vx1 + v1 VS2_2 -
2684 VS2_2: vz2 = vx2 + v1 VS2_3 -
2685 VS2_3: vz3 = vx3 + v1 - -
2686 S2: z = x + 1 - VS2_0 */
2688 prev_stmt_info = NULL;
2689 for (j = 0; j < ncopies; j++)
2694 if (op_type == binary_op || op_type == ternary_op)
2695 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2698 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2700 if (op_type == ternary_op)
2702 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2703 VEC_quick_push (tree, vec_oprnds2,
2704 vect_get_vec_def_for_operand (op2, stmt, NULL));
2709 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2710 if (op_type == ternary_op)
2712 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2713 VEC_quick_push (tree, vec_oprnds2,
2714 vect_get_vec_def_for_stmt_copy (dt[2],
2719 /* Arguments are ready. Create the new vector stmt. */
2720 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2722 vop1 = ((op_type == binary_op || op_type == ternary_op)
2723 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2724 vop2 = ((op_type == ternary_op)
2725 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2726 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2728 new_temp = make_ssa_name (vec_dest, new_stmt);
2729 gimple_assign_set_lhs (new_stmt, new_temp);
2730 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2732 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2739 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2741 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2742 prev_stmt_info = vinfo_for_stmt (new_stmt);
2745 VEC_free (tree, heap, vec_oprnds0);
2747 VEC_free (tree, heap, vec_oprnds1);
2749 VEC_free (tree, heap, vec_oprnds2);
2755 /* Get vectorized definitions for loop-based vectorization. For the first
2756 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2757 scalar operand), and for the rest we get a copy with
2758 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2759 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2760 The vectors are collected into VEC_OPRNDS. */
2763 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2764 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2768 /* Get first vector operand. */
2769 /* All the vector operands except the very first one (that is scalar oprnd)
2771 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2772 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2774 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2776 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2778 /* Get second vector operand. */
2779 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2780 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2784 /* For conversion in multiple steps, continue to get operands
2787 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2791 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2792 For multi-step conversions store the resulting vectors and call the function
2796 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2797 int multi_step_cvt, gimple stmt,
2798 VEC (tree, heap) *vec_dsts,
2799 gimple_stmt_iterator *gsi,
2800 slp_tree slp_node, enum tree_code code,
2801 stmt_vec_info *prev_stmt_info)
2804 tree vop0, vop1, new_tmp, vec_dest;
2806 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2808 vec_dest = VEC_pop (tree, vec_dsts);
2810 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2812 /* Create demotion operation. */
2813 vop0 = VEC_index (tree, *vec_oprnds, i);
2814 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2815 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2816 new_tmp = make_ssa_name (vec_dest, new_stmt);
2817 gimple_assign_set_lhs (new_stmt, new_tmp);
2818 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2821 /* Store the resulting vector for next recursive call. */
2822 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2825 /* This is the last step of the conversion sequence. Store the
2826 vectors in SLP_NODE or in vector info of the scalar statement
2827 (or in STMT_VINFO_RELATED_STMT chain). */
2829 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2832 if (!*prev_stmt_info)
2833 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2835 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2837 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2842 /* For multi-step demotion operations we first generate demotion operations
2843 from the source type to the intermediate types, and then combine the
2844 results (stored in VEC_OPRNDS) in demotion operation to the destination
2848 /* At each level of recursion we have have of the operands we had at the
2850 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2851 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2852 stmt, vec_dsts, gsi, slp_node,
2853 code, prev_stmt_info);
2858 /* Function vectorizable_type_demotion
2860 Check if STMT performs a binary or unary operation that involves
2861 type demotion, and if it can be vectorized.
2862 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2863 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2864 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2867 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2868 gimple *vec_stmt, slp_tree slp_node)
2873 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2874 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2875 enum tree_code code, code1 = ERROR_MARK;
2878 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2879 stmt_vec_info prev_stmt_info;
2886 int multi_step_cvt = 0;
2887 VEC (tree, heap) *vec_oprnds0 = NULL;
2888 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2889 tree last_oprnd, intermediate_type;
2891 /* FORNOW: not supported by basic block SLP vectorization. */
2892 gcc_assert (loop_vinfo);
2894 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2897 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2900 /* Is STMT a vectorizable type-demotion operation? */
2901 if (!is_gimple_assign (stmt))
2904 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2907 code = gimple_assign_rhs_code (stmt);
2908 if (!CONVERT_EXPR_CODE_P (code))
2911 scalar_dest = gimple_assign_lhs (stmt);
2912 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2914 /* Check the operands of the operation. */
2915 op0 = gimple_assign_rhs1 (stmt);
2916 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2917 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2918 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2919 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2920 && CONVERT_EXPR_CODE_P (code))))
2922 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
2923 &def_stmt, &def, &dt[0], &vectype_in))
2925 if (vect_print_dump_info (REPORT_DETAILS))
2926 fprintf (vect_dump, "use not simple.");
2929 /* If op0 is an external def use a vector type with the
2930 same size as the output vector type if possible. */
2932 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2934 gcc_assert (vectype_in);
2937 if (vect_print_dump_info (REPORT_DETAILS))
2939 fprintf (vect_dump, "no vectype for scalar type ");
2940 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2946 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2947 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2948 if (nunits_in >= nunits_out)
2951 /* Multiple types in SLP are handled by creating the appropriate number of
2952 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2957 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2958 gcc_assert (ncopies >= 1);
2960 /* Supportable by target? */
2961 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
2962 &code1, &multi_step_cvt, &interm_types))
2965 if (!vec_stmt) /* transformation not required. */
2967 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2968 if (vect_print_dump_info (REPORT_DETAILS))
2969 fprintf (vect_dump, "=== vectorizable_demotion ===");
2970 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2975 if (vect_print_dump_info (REPORT_DETAILS))
2976 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2979 /* In case of multi-step demotion, we first generate demotion operations to
2980 the intermediate types, and then from that types to the final one.
2981 We create vector destinations for the intermediate type (TYPES) received
2982 from supportable_narrowing_operation, and store them in the correct order
2983 for future use in vect_create_vectorized_demotion_stmts(). */
2985 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2987 vec_dsts = VEC_alloc (tree, heap, 1);
2989 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2990 VEC_quick_push (tree, vec_dsts, vec_dest);
2994 for (i = VEC_length (tree, interm_types) - 1;
2995 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2997 vec_dest = vect_create_destination_var (scalar_dest,
2999 VEC_quick_push (tree, vec_dsts, vec_dest);
3003 /* In case the vectorization factor (VF) is bigger than the number
3004 of elements that we can fit in a vectype (nunits), we have to generate
3005 more than one vector stmt - i.e - we need to "unroll" the
3006 vector stmt by a factor VF/nunits. */
3008 prev_stmt_info = NULL;
3009 for (j = 0; j < ncopies; j++)
3013 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3016 VEC_free (tree, heap, vec_oprnds0);
3017 vec_oprnds0 = VEC_alloc (tree, heap,
3018 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3019 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3020 vect_pow2 (multi_step_cvt) - 1);
3023 /* Arguments are ready. Create the new vector stmts. */
3024 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3025 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3026 multi_step_cvt, stmt, tmp_vec_dsts,
3027 gsi, slp_node, code1,
3031 VEC_free (tree, heap, vec_oprnds0);
3032 VEC_free (tree, heap, vec_dsts);
3033 VEC_free (tree, heap, tmp_vec_dsts);
3034 VEC_free (tree, heap, interm_types);
3036 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3041 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3042 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3043 the resulting vectors and call the function recursively. */
3046 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3047 VEC (tree, heap) **vec_oprnds1,
3048 int multi_step_cvt, gimple stmt,
3049 VEC (tree, heap) *vec_dsts,
3050 gimple_stmt_iterator *gsi,
3051 slp_tree slp_node, enum tree_code code1,
3052 enum tree_code code2, tree decl1,
3053 tree decl2, int op_type,
3054 stmt_vec_info *prev_stmt_info)
3057 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3058 gimple new_stmt1, new_stmt2;
3059 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3060 VEC (tree, heap) *vec_tmp;
3062 vec_dest = VEC_pop (tree, vec_dsts);
3063 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3065 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3067 if (op_type == binary_op)
3068 vop1 = VEC_index (tree, *vec_oprnds1, i);
3072 /* Generate the two halves of promotion operation. */
3073 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3074 op_type, vec_dest, gsi, stmt);
3075 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3076 op_type, vec_dest, gsi, stmt);
3077 if (is_gimple_call (new_stmt1))
3079 new_tmp1 = gimple_call_lhs (new_stmt1);
3080 new_tmp2 = gimple_call_lhs (new_stmt2);
3084 new_tmp1 = gimple_assign_lhs (new_stmt1);
3085 new_tmp2 = gimple_assign_lhs (new_stmt2);
3090 /* Store the results for the recursive call. */
3091 VEC_quick_push (tree, vec_tmp, new_tmp1);
3092 VEC_quick_push (tree, vec_tmp, new_tmp2);
3096 /* Last step of promotion sequience - store the results. */
3099 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3100 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3104 if (!*prev_stmt_info)
3105 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3107 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3109 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3110 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3111 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3118 /* For multi-step promotion operation we first generate we call the
3119 function recurcively for every stage. We start from the input type,
3120 create promotion operations to the intermediate types, and then
3121 create promotions to the output type. */
3122 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3123 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3124 multi_step_cvt - 1, stmt,
3125 vec_dsts, gsi, slp_node, code1,
3126 code2, decl2, decl2, op_type,
3130 VEC_free (tree, heap, vec_tmp);
3134 /* Function vectorizable_type_promotion
3136 Check if STMT performs a binary or unary operation that involves
3137 type promotion, and if it can be vectorized.
3138 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3139 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3140 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3143 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3144 gimple *vec_stmt, slp_tree slp_node)
3148 tree op0, op1 = NULL;
3149 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3151 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3152 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3153 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3157 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3158 stmt_vec_info prev_stmt_info;
3165 tree intermediate_type = NULL_TREE;
3166 int multi_step_cvt = 0;
3167 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3168 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3170 /* FORNOW: not supported by basic block SLP vectorization. */
3171 gcc_assert (loop_vinfo);
3173 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3176 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3179 /* Is STMT a vectorizable type-promotion operation? */
3180 if (!is_gimple_assign (stmt))
3183 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3186 code = gimple_assign_rhs_code (stmt);
3187 if (!CONVERT_EXPR_CODE_P (code)
3188 && code != WIDEN_MULT_EXPR)
3191 scalar_dest = gimple_assign_lhs (stmt);
3192 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3194 /* Check the operands of the operation. */
3195 op0 = gimple_assign_rhs1 (stmt);
3196 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3197 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3198 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3199 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3200 && CONVERT_EXPR_CODE_P (code))))
3202 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
3203 &def_stmt, &def, &dt[0], &vectype_in))
3205 if (vect_print_dump_info (REPORT_DETAILS))
3206 fprintf (vect_dump, "use not simple.");
3209 /* If op0 is an external or constant def use a vector type with
3210 the same size as the output vector type. */
3212 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3214 gcc_assert (vectype_in);
3217 if (vect_print_dump_info (REPORT_DETAILS))
3219 fprintf (vect_dump, "no vectype for scalar type ");
3220 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3226 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3227 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3228 if (nunits_in <= nunits_out)
3231 /* Multiple types in SLP are handled by creating the appropriate number of
3232 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3237 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3239 gcc_assert (ncopies >= 1);
3241 op_type = TREE_CODE_LENGTH (code);
3242 if (op_type == binary_op)
3244 op1 = gimple_assign_rhs2 (stmt);
3245 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
3247 if (vect_print_dump_info (REPORT_DETAILS))
3248 fprintf (vect_dump, "use not simple.");
3253 /* Supportable by target? */
3254 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3255 &decl1, &decl2, &code1, &code2,
3256 &multi_step_cvt, &interm_types))
3259 /* Binary widening operation can only be supported directly by the
3261 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3263 if (!vec_stmt) /* transformation not required. */
3265 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3266 if (vect_print_dump_info (REPORT_DETAILS))
3267 fprintf (vect_dump, "=== vectorizable_promotion ===");
3268 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3274 if (vect_print_dump_info (REPORT_DETAILS))
3275 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3279 /* In case of multi-step promotion, we first generate promotion operations
3280 to the intermediate types, and then from that types to the final one.
3281 We store vector destination in VEC_DSTS in the correct order for
3282 recursive creation of promotion operations in
3283 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3284 according to TYPES recieved from supportable_widening_operation(). */
3286 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3288 vec_dsts = VEC_alloc (tree, heap, 1);
3290 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3291 VEC_quick_push (tree, vec_dsts, vec_dest);
3295 for (i = VEC_length (tree, interm_types) - 1;
3296 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3298 vec_dest = vect_create_destination_var (scalar_dest,
3300 VEC_quick_push (tree, vec_dsts, vec_dest);
3306 vec_oprnds0 = VEC_alloc (tree, heap,
3307 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3308 if (op_type == binary_op)
3309 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3312 /* In case the vectorization factor (VF) is bigger than the number
3313 of elements that we can fit in a vectype (nunits), we have to generate
3314 more than one vector stmt - i.e - we need to "unroll" the
3315 vector stmt by a factor VF/nunits. */
3317 prev_stmt_info = NULL;
3318 for (j = 0; j < ncopies; j++)
3324 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3328 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3329 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3330 if (op_type == binary_op)
3332 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3333 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3339 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3340 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3341 if (op_type == binary_op)
3343 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3344 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3348 /* Arguments are ready. Create the new vector stmts. */
3349 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3350 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3351 multi_step_cvt, stmt,
3353 gsi, slp_node, code1, code2,
3354 decl1, decl2, op_type,
3358 VEC_free (tree, heap, vec_dsts);
3359 VEC_free (tree, heap, tmp_vec_dsts);
3360 VEC_free (tree, heap, interm_types);
3361 VEC_free (tree, heap, vec_oprnds0);
3362 VEC_free (tree, heap, vec_oprnds1);
3364 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3369 /* Function vectorizable_store.
3371 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3373 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3374 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3375 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3378 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3384 tree vec_oprnd = NULL_TREE;
3385 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3386 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3387 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3388 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3389 struct loop *loop = NULL;
3390 enum machine_mode vec_mode;
3392 enum dr_alignment_support alignment_support_scheme;
3395 enum vect_def_type dt;
3396 stmt_vec_info prev_stmt_info = NULL;
3397 tree dataref_ptr = NULL_TREE;
3398 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3401 gimple next_stmt, first_stmt = NULL;
3402 bool strided_store = false;
3403 unsigned int group_size, i;
3404 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3406 VEC(tree,heap) *vec_oprnds = NULL;
3407 bool slp = (slp_node != NULL);
3408 unsigned int vec_num;
3409 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3412 loop = LOOP_VINFO_LOOP (loop_vinfo);
3414 /* Multiple types in SLP are handled by creating the appropriate number of
3415 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3420 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3422 gcc_assert (ncopies >= 1);
3424 /* FORNOW. This restriction should be relaxed. */
3425 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3427 if (vect_print_dump_info (REPORT_DETAILS))
3428 fprintf (vect_dump, "multiple types in nested loop.");
3432 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3435 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3438 /* Is vectorizable store? */
3440 if (!is_gimple_assign (stmt))
3443 scalar_dest = gimple_assign_lhs (stmt);
3444 if (TREE_CODE (scalar_dest) != ARRAY_REF
3445 && TREE_CODE (scalar_dest) != INDIRECT_REF
3446 && TREE_CODE (scalar_dest) != COMPONENT_REF
3447 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3448 && TREE_CODE (scalar_dest) != REALPART_EXPR
3449 && TREE_CODE (scalar_dest) != MEM_REF)
3452 gcc_assert (gimple_assign_single_p (stmt));
3453 op = gimple_assign_rhs1 (stmt);
3454 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3456 if (vect_print_dump_info (REPORT_DETAILS))
3457 fprintf (vect_dump, "use not simple.");
3461 /* The scalar rhs type needs to be trivially convertible to the vector
3462 component type. This should always be the case. */
3463 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
3465 if (vect_print_dump_info (REPORT_DETAILS))
3466 fprintf (vect_dump, "??? operands of different types");
3470 vec_mode = TYPE_MODE (vectype);
3471 /* FORNOW. In some cases can vectorize even if data-type not supported
3472 (e.g. - array initialization with 0). */
3473 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3476 if (!STMT_VINFO_DATA_REF (stmt_info))
3479 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3481 if (vect_print_dump_info (REPORT_DETAILS))
3482 fprintf (vect_dump, "negative step for store.");
3486 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3488 strided_store = true;
3489 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3490 if (!vect_strided_store_supported (vectype)
3491 && !PURE_SLP_STMT (stmt_info) && !slp)
3494 if (first_stmt == stmt)
3496 /* STMT is the leader of the group. Check the operands of all the
3497 stmts of the group. */
3498 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
3501 gcc_assert (gimple_assign_single_p (next_stmt));
3502 op = gimple_assign_rhs1 (next_stmt);
3503 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3506 if (vect_print_dump_info (REPORT_DETAILS))
3507 fprintf (vect_dump, "use not simple.");
3510 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3515 if (!vec_stmt) /* transformation not required. */
3517 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3518 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
3526 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3527 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3529 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3532 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3534 /* We vectorize all the stmts of the interleaving group when we
3535 reach the last stmt in the group. */
3536 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3537 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
3546 strided_store = false;
3547 /* VEC_NUM is the number of vect stmts to be created for this
3549 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3550 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3551 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3554 /* VEC_NUM is the number of vect stmts to be created for this
3556 vec_num = group_size;
3562 group_size = vec_num = 1;
3565 if (vect_print_dump_info (REPORT_DETAILS))
3566 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3568 dr_chain = VEC_alloc (tree, heap, group_size);
3569 oprnds = VEC_alloc (tree, heap, group_size);
3571 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3572 gcc_assert (alignment_support_scheme);
3574 /* In case the vectorization factor (VF) is bigger than the number
3575 of elements that we can fit in a vectype (nunits), we have to generate
3576 more than one vector stmt - i.e - we need to "unroll" the
3577 vector stmt by a factor VF/nunits. For more details see documentation in
3578 vect_get_vec_def_for_copy_stmt. */
3580 /* In case of interleaving (non-unit strided access):
3587 We create vectorized stores starting from base address (the access of the
3588 first stmt in the chain (S2 in the above example), when the last store stmt
3589 of the chain (S4) is reached:
3592 VS2: &base + vec_size*1 = vx0
3593 VS3: &base + vec_size*2 = vx1
3594 VS4: &base + vec_size*3 = vx3
3596 Then permutation statements are generated:
3598 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3599 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3602 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3603 (the order of the data-refs in the output of vect_permute_store_chain
3604 corresponds to the order of scalar stmts in the interleaving chain - see
3605 the documentation of vect_permute_store_chain()).
3607 In case of both multiple types and interleaving, above vector stores and
3608 permutation stmts are created for every copy. The result vector stmts are
3609 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3610 STMT_VINFO_RELATED_STMT for the next copies.
3613 prev_stmt_info = NULL;
3614 for (j = 0; j < ncopies; j++)
3623 /* Get vectorized arguments for SLP_NODE. */
3624 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3627 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3631 /* For interleaved stores we collect vectorized defs for all the
3632 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3633 used as an input to vect_permute_store_chain(), and OPRNDS as
3634 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3636 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3637 OPRNDS are of size 1. */
3638 next_stmt = first_stmt;
3639 for (i = 0; i < group_size; i++)
3641 /* Since gaps are not supported for interleaved stores,
3642 GROUP_SIZE is the exact number of stmts in the chain.
3643 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3644 there is no interleaving, GROUP_SIZE is 1, and only one
3645 iteration of the loop will be executed. */
3646 gcc_assert (next_stmt
3647 && gimple_assign_single_p (next_stmt));
3648 op = gimple_assign_rhs1 (next_stmt);
3650 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3652 VEC_quick_push(tree, dr_chain, vec_oprnd);
3653 VEC_quick_push(tree, oprnds, vec_oprnd);
3654 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3658 /* We should have catched mismatched types earlier. */
3659 gcc_assert (useless_type_conversion_p (vectype,
3660 TREE_TYPE (vec_oprnd)));
3661 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3662 &dummy, &ptr_incr, false,
3664 gcc_assert (bb_vinfo || !inv_p);
3668 /* For interleaved stores we created vectorized defs for all the
3669 defs stored in OPRNDS in the previous iteration (previous copy).
3670 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3671 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3673 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3674 OPRNDS are of size 1. */
3675 for (i = 0; i < group_size; i++)
3677 op = VEC_index (tree, oprnds, i);
3678 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3680 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3681 VEC_replace(tree, dr_chain, i, vec_oprnd);
3682 VEC_replace(tree, oprnds, i, vec_oprnd);
3685 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3690 result_chain = VEC_alloc (tree, heap, group_size);
3692 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3697 next_stmt = first_stmt;
3698 for (i = 0; i < vec_num; i++)
3700 struct ptr_info_def *pi;
3703 /* Bump the vector pointer. */
3704 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3708 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3709 else if (strided_store)
3710 /* For strided stores vectorized defs are interleaved in
3711 vect_permute_store_chain(). */
3712 vec_oprnd = VEC_index (tree, result_chain, i);
3714 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3715 build_int_cst (reference_alias_ptr_type
3716 (DR_REF (first_dr)), 0));
3717 pi = get_ptr_info (dataref_ptr);
3718 pi->align = TYPE_ALIGN_UNIT (vectype);
3719 if (aligned_access_p (first_dr))
3721 else if (DR_MISALIGNMENT (first_dr) == -1)
3723 TREE_TYPE (data_ref)
3724 = build_aligned_type (TREE_TYPE (data_ref),
3725 TYPE_ALIGN (TREE_TYPE (vectype)));
3726 pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
3731 TREE_TYPE (data_ref)
3732 = build_aligned_type (TREE_TYPE (data_ref),
3733 TYPE_ALIGN (TREE_TYPE (vectype)));
3734 pi->misalign = DR_MISALIGNMENT (first_dr);
3737 /* Arguments are ready. Create the new vector stmt. */
3738 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3739 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3740 mark_symbols_for_renaming (new_stmt);
3746 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3748 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3750 prev_stmt_info = vinfo_for_stmt (new_stmt);
3751 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3757 VEC_free (tree, heap, dr_chain);
3758 VEC_free (tree, heap, oprnds);
3760 VEC_free (tree, heap, result_chain);
3762 VEC_free (tree, heap, vec_oprnds);
3767 /* Given a vector type VECTYPE returns a builtin DECL to be used
3768 for vector permutation and stores a mask into *MASK that implements
3769 reversal of the vector elements. If that is impossible to do
3770 returns NULL (and *MASK is unchanged). */
3773 perm_mask_for_reverse (tree vectype, tree *mask)
3776 tree mask_element_type, mask_type;
3777 tree mask_vec = NULL;
3780 if (!targetm.vectorize.builtin_vec_perm)
3783 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
3784 &mask_element_type);
3785 if (!builtin_decl || !mask_element_type)
3788 mask_type = get_vectype_for_scalar_type (mask_element_type);
3789 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3791 || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
3794 for (i = 0; i < nunits; i++)
3795 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
3796 mask_vec = build_vector (mask_type, mask_vec);
3798 if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
3802 return builtin_decl;
3805 /* Given a vector variable X, that was generated for the scalar LHS of
3806 STMT, generate instructions to reverse the vector elements of X,
3807 insert them a *GSI and return the permuted vector variable. */
3810 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
3812 tree vectype = TREE_TYPE (x);
3813 tree mask_vec, builtin_decl;
3814 tree perm_dest, data_ref;
3817 builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
3819 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3821 /* Generate the permute statement. */
3822 perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
3823 if (!useless_type_conversion_p (vectype,
3824 TREE_TYPE (TREE_TYPE (builtin_decl))))
3826 tree tem = create_tmp_reg (TREE_TYPE (TREE_TYPE (builtin_decl)), NULL);
3827 tem = make_ssa_name (tem, perm_stmt);
3828 gimple_call_set_lhs (perm_stmt, tem);
3829 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3830 perm_stmt = gimple_build_assign (NULL_TREE,
3831 build1 (VIEW_CONVERT_EXPR,
3834 data_ref = make_ssa_name (perm_dest, perm_stmt);
3835 gimple_set_lhs (perm_stmt, data_ref);
3836 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3841 /* vectorizable_load.
3843 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3845 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3846 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3847 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3850 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3851 slp_tree slp_node, slp_instance slp_node_instance)
3854 tree vec_dest = NULL;
3855 tree data_ref = NULL;
3856 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3857 stmt_vec_info prev_stmt_info;
3858 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3859 struct loop *loop = NULL;
3860 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3861 bool nested_in_vect_loop = false;
3862 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3863 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3865 enum machine_mode mode;
3866 gimple new_stmt = NULL;
3868 enum dr_alignment_support alignment_support_scheme;
3869 tree dataref_ptr = NULL_TREE;
3871 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3873 int i, j, group_size;
3874 tree msq = NULL_TREE, lsq;
3875 tree offset = NULL_TREE;
3876 tree realignment_token = NULL_TREE;
3878 VEC(tree,heap) *dr_chain = NULL;
3879 bool strided_load = false;
3884 bool compute_in_loop = false;
3885 struct loop *at_loop;
3887 bool slp = (slp_node != NULL);
3888 bool slp_perm = false;
3889 enum tree_code code;
3890 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3895 loop = LOOP_VINFO_LOOP (loop_vinfo);
3896 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3897 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3902 /* Multiple types in SLP are handled by creating the appropriate number of
3903 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3908 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3910 gcc_assert (ncopies >= 1);
3912 /* FORNOW. This restriction should be relaxed. */
3913 if (nested_in_vect_loop && ncopies > 1)
3915 if (vect_print_dump_info (REPORT_DETAILS))
3916 fprintf (vect_dump, "multiple types in nested loop.");
3920 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3923 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3926 /* Is vectorizable load? */
3927 if (!is_gimple_assign (stmt))
3930 scalar_dest = gimple_assign_lhs (stmt);
3931 if (TREE_CODE (scalar_dest) != SSA_NAME)
3934 code = gimple_assign_rhs_code (stmt);
3935 if (code != ARRAY_REF
3936 && code != INDIRECT_REF
3937 && code != COMPONENT_REF
3938 && code != IMAGPART_EXPR
3939 && code != REALPART_EXPR
3943 if (!STMT_VINFO_DATA_REF (stmt_info))
3946 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
3947 if (negative && ncopies > 1)
3949 if (vect_print_dump_info (REPORT_DETAILS))
3950 fprintf (vect_dump, "multiple types with negative step.");
3954 scalar_type = TREE_TYPE (DR_REF (dr));
3955 mode = TYPE_MODE (vectype);
3957 /* FORNOW. In some cases can vectorize even if data-type not supported
3958 (e.g. - data copies). */
3959 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
3961 if (vect_print_dump_info (REPORT_DETAILS))
3962 fprintf (vect_dump, "Aligned load, but unsupported type.");
3966 /* The vector component type needs to be trivially convertible to the
3967 scalar lhs. This should always be the case. */
3968 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3970 if (vect_print_dump_info (REPORT_DETAILS))
3971 fprintf (vect_dump, "??? operands of different types");
3975 /* Check if the load is a part of an interleaving chain. */
3976 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3978 strided_load = true;
3980 gcc_assert (! nested_in_vect_loop);
3982 /* Check if interleaving is supported. */
3983 if (!vect_strided_load_supported (vectype)
3984 && !PURE_SLP_STMT (stmt_info) && !slp)
3990 gcc_assert (!strided_load);
3991 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
3992 if (alignment_support_scheme != dr_aligned
3993 && alignment_support_scheme != dr_unaligned_supported)
3995 if (vect_print_dump_info (REPORT_DETAILS))
3996 fprintf (vect_dump, "negative step but alignment required.");
3999 if (!perm_mask_for_reverse (vectype, NULL))
4001 if (vect_print_dump_info (REPORT_DETAILS))
4002 fprintf (vect_dump, "negative step and reversing not supported.");
4007 if (!vec_stmt) /* transformation not required. */
4009 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4010 vect_model_load_cost (stmt_info, ncopies, NULL);
4014 if (vect_print_dump_info (REPORT_DETAILS))
4015 fprintf (vect_dump, "transform load.");
4021 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4022 /* Check if the chain of loads is already vectorized. */
4023 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4025 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4028 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4029 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4031 /* VEC_NUM is the number of vect stmts to be created for this group. */
4034 strided_load = false;
4035 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4036 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4040 vec_num = group_size;
4042 dr_chain = VEC_alloc (tree, heap, vec_num);
4048 group_size = vec_num = 1;
4051 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4052 gcc_assert (alignment_support_scheme);
4054 /* In case the vectorization factor (VF) is bigger than the number
4055 of elements that we can fit in a vectype (nunits), we have to generate
4056 more than one vector stmt - i.e - we need to "unroll" the
4057 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4058 from one copy of the vector stmt to the next, in the field
4059 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4060 stages to find the correct vector defs to be used when vectorizing
4061 stmts that use the defs of the current stmt. The example below
4062 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4063 need to create 4 vectorized stmts):
4065 before vectorization:
4066 RELATED_STMT VEC_STMT
4070 step 1: vectorize stmt S1:
4071 We first create the vector stmt VS1_0, and, as usual, record a
4072 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4073 Next, we create the vector stmt VS1_1, and record a pointer to
4074 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4075 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4077 RELATED_STMT VEC_STMT
4078 VS1_0: vx0 = memref0 VS1_1 -
4079 VS1_1: vx1 = memref1 VS1_2 -
4080 VS1_2: vx2 = memref2 VS1_3 -
4081 VS1_3: vx3 = memref3 - -
4082 S1: x = load - VS1_0
4085 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4086 information we recorded in RELATED_STMT field is used to vectorize
4089 /* In case of interleaving (non-unit strided access):
4096 Vectorized loads are created in the order of memory accesses
4097 starting from the access of the first stmt of the chain:
4100 VS2: vx1 = &base + vec_size*1
4101 VS3: vx3 = &base + vec_size*2
4102 VS4: vx4 = &base + vec_size*3
4104 Then permutation statements are generated:
4106 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4107 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4110 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4111 (the order of the data-refs in the output of vect_permute_load_chain
4112 corresponds to the order of scalar stmts in the interleaving chain - see
4113 the documentation of vect_permute_load_chain()).
4114 The generation of permutation stmts and recording them in
4115 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4117 In case of both multiple types and interleaving, the vector loads and
4118 permutation stmts above are created for every copy. The result vector
4119 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4120 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4122 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4123 on a target that supports unaligned accesses (dr_unaligned_supported)
4124 we generate the following code:
4128 p = p + indx * vectype_size;
4133 Otherwise, the data reference is potentially unaligned on a target that
4134 does not support unaligned accesses (dr_explicit_realign_optimized) -
4135 then generate the following code, in which the data in each iteration is
4136 obtained by two vector loads, one from the previous iteration, and one
4137 from the current iteration:
4139 msq_init = *(floor(p1))
4140 p2 = initial_addr + VS - 1;
4141 realignment_token = call target_builtin;
4144 p2 = p2 + indx * vectype_size
4146 vec_dest = realign_load (msq, lsq, realignment_token)
4151 /* If the misalignment remains the same throughout the execution of the
4152 loop, we can create the init_addr and permutation mask at the loop
4153 preheader. Otherwise, it needs to be created inside the loop.
4154 This can only occur when vectorizing memory accesses in the inner-loop
4155 nested within an outer-loop that is being vectorized. */
4157 if (loop && nested_in_vect_loop_p (loop, stmt)
4158 && (TREE_INT_CST_LOW (DR_STEP (dr))
4159 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4161 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4162 compute_in_loop = true;
4165 if ((alignment_support_scheme == dr_explicit_realign_optimized
4166 || alignment_support_scheme == dr_explicit_realign)
4167 && !compute_in_loop)
4169 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4170 alignment_support_scheme, NULL_TREE,
4172 if (alignment_support_scheme == dr_explicit_realign_optimized)
4174 phi = SSA_NAME_DEF_STMT (msq);
4175 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4182 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4184 prev_stmt_info = NULL;
4185 for (j = 0; j < ncopies; j++)
4187 /* 1. Create the vector pointer update chain. */
4189 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
4191 &dummy, &ptr_incr, false,
4195 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
4197 for (i = 0; i < vec_num; i++)
4200 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4203 /* 2. Create the vector-load in the loop. */
4204 switch (alignment_support_scheme)
4207 case dr_unaligned_supported:
4209 struct ptr_info_def *pi;
4211 = build2 (MEM_REF, vectype, dataref_ptr,
4212 build_int_cst (reference_alias_ptr_type
4213 (DR_REF (first_dr)), 0));
4214 pi = get_ptr_info (dataref_ptr);
4215 pi->align = TYPE_ALIGN_UNIT (vectype);
4216 if (alignment_support_scheme == dr_aligned)
4218 gcc_assert (aligned_access_p (first_dr));
4221 else if (DR_MISALIGNMENT (first_dr) == -1)
4223 TREE_TYPE (data_ref)
4224 = build_aligned_type (TREE_TYPE (data_ref),
4225 TYPE_ALIGN (TREE_TYPE (vectype)));
4226 pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
4231 TREE_TYPE (data_ref)
4232 = build_aligned_type (TREE_TYPE (data_ref),
4233 TYPE_ALIGN (TREE_TYPE (vectype)));
4234 pi->misalign = DR_MISALIGNMENT (first_dr);
4238 case dr_explicit_realign:
4241 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4243 if (compute_in_loop)
4244 msq = vect_setup_realignment (first_stmt, gsi,
4246 dr_explicit_realign,
4249 new_stmt = gimple_build_assign_with_ops
4250 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4252 (TREE_TYPE (dataref_ptr),
4253 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4254 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4255 gimple_assign_set_lhs (new_stmt, ptr);
4256 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4258 = build2 (MEM_REF, vectype, ptr,
4259 build_int_cst (reference_alias_ptr_type
4260 (DR_REF (first_dr)), 0));
4261 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4262 new_stmt = gimple_build_assign (vec_dest, data_ref);
4263 new_temp = make_ssa_name (vec_dest, new_stmt);
4264 gimple_assign_set_lhs (new_stmt, new_temp);
4265 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4266 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4267 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4270 bump = size_binop (MULT_EXPR, vs_minus_1,
4271 TYPE_SIZE_UNIT (scalar_type));
4272 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4273 new_stmt = gimple_build_assign_with_ops
4274 (BIT_AND_EXPR, NULL_TREE, ptr,
4277 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4278 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4279 gimple_assign_set_lhs (new_stmt, ptr);
4280 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4282 = build2 (MEM_REF, vectype, ptr,
4283 build_int_cst (reference_alias_ptr_type
4284 (DR_REF (first_dr)), 0));
4287 case dr_explicit_realign_optimized:
4288 new_stmt = gimple_build_assign_with_ops
4289 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4291 (TREE_TYPE (dataref_ptr),
4292 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4293 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4294 gimple_assign_set_lhs (new_stmt, new_temp);
4295 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4297 = build2 (MEM_REF, vectype, new_temp,
4298 build_int_cst (reference_alias_ptr_type
4299 (DR_REF (first_dr)), 0));
4304 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4305 new_stmt = gimple_build_assign (vec_dest, data_ref);
4306 new_temp = make_ssa_name (vec_dest, new_stmt);
4307 gimple_assign_set_lhs (new_stmt, new_temp);
4308 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4309 mark_symbols_for_renaming (new_stmt);
4311 /* 3. Handle explicit realignment if necessary/supported. Create in
4312 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
4313 if (alignment_support_scheme == dr_explicit_realign_optimized
4314 || alignment_support_scheme == dr_explicit_realign)
4318 lsq = gimple_assign_lhs (new_stmt);
4319 if (!realignment_token)
4320 realignment_token = dataref_ptr;
4321 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4322 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
4324 new_stmt = gimple_build_assign (vec_dest, tmp);
4325 new_temp = make_ssa_name (vec_dest, new_stmt);
4326 gimple_assign_set_lhs (new_stmt, new_temp);
4327 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4329 if (alignment_support_scheme == dr_explicit_realign_optimized)
4332 if (i == vec_num - 1 && j == ncopies - 1)
4333 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
4339 /* 4. Handle invariant-load. */
4340 if (inv_p && !bb_vinfo)
4342 gcc_assert (!strided_load);
4343 gcc_assert (nested_in_vect_loop_p (loop, stmt));
4348 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
4350 /* CHECKME: bitpos depends on endianess? */
4351 bitpos = bitsize_zero_node;
4352 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4355 vect_create_destination_var (scalar_dest, NULL_TREE);
4356 new_stmt = gimple_build_assign (vec_dest, vec_inv);
4357 new_temp = make_ssa_name (vec_dest, new_stmt);
4358 gimple_assign_set_lhs (new_stmt, new_temp);
4359 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4361 for (k = nunits - 1; k >= 0; --k)
4362 t = tree_cons (NULL_TREE, new_temp, t);
4363 /* FIXME: use build_constructor directly. */
4364 vec_inv = build_constructor_from_list (vectype, t);
4365 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4366 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4369 gcc_unreachable (); /* FORNOW. */
4374 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4375 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4378 /* Collect vector loads and later create their permutation in
4379 vect_transform_strided_load (). */
4380 if (strided_load || slp_perm)
4381 VEC_quick_push (tree, dr_chain, new_temp);
4383 /* Store vector loads in the corresponding SLP_NODE. */
4384 if (slp && !slp_perm)
4385 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4388 if (slp && !slp_perm)
4393 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4394 slp_node_instance, false))
4396 VEC_free (tree, heap, dr_chain);
4404 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
4407 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4408 VEC_free (tree, heap, dr_chain);
4409 dr_chain = VEC_alloc (tree, heap, group_size);
4414 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4416 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4417 prev_stmt_info = vinfo_for_stmt (new_stmt);
4423 VEC_free (tree, heap, dr_chain);
4428 /* Function vect_is_simple_cond.
4431 LOOP - the loop that is being vectorized.
4432 COND - Condition that is checked for simple use.
4434 Returns whether a COND can be vectorized. Checks whether
4435 condition operands are supportable using vec_is_simple_use. */
4438 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4442 enum vect_def_type dt;
4444 if (!COMPARISON_CLASS_P (cond))
4447 lhs = TREE_OPERAND (cond, 0);
4448 rhs = TREE_OPERAND (cond, 1);
4450 if (TREE_CODE (lhs) == SSA_NAME)
4452 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4453 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4457 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4458 && TREE_CODE (lhs) != FIXED_CST)
4461 if (TREE_CODE (rhs) == SSA_NAME)
4463 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4464 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4468 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4469 && TREE_CODE (rhs) != FIXED_CST)
4475 /* vectorizable_condition.
4477 Check if STMT is conditional modify expression that can be vectorized.
4478 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4479 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4482 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4483 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4484 else caluse if it is 2).
4486 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4489 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4490 gimple *vec_stmt, tree reduc_def, int reduc_index)
4492 tree scalar_dest = NULL_TREE;
4493 tree vec_dest = NULL_TREE;
4494 tree op = NULL_TREE;
4495 tree cond_expr, then_clause, else_clause;
4496 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4497 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4498 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4499 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4500 tree vec_compare, vec_cond_expr;
4502 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4503 enum machine_mode vec_mode;
4505 enum vect_def_type dt, dts[4];
4506 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4507 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4508 enum tree_code code;
4509 stmt_vec_info prev_stmt_info = NULL;
4512 /* FORNOW: unsupported in basic block SLP. */
4513 gcc_assert (loop_vinfo);
4515 gcc_assert (ncopies >= 1);
4516 if (reduc_index && ncopies > 1)
4517 return false; /* FORNOW */
4519 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4522 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4523 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4527 /* FORNOW: SLP not supported. */
4528 if (STMT_SLP_TYPE (stmt_info))
4531 /* FORNOW: not yet supported. */
4532 if (STMT_VINFO_LIVE_P (stmt_info))
4534 if (vect_print_dump_info (REPORT_DETAILS))
4535 fprintf (vect_dump, "value used after loop.");
4539 /* Is vectorizable conditional operation? */
4540 if (!is_gimple_assign (stmt))
4543 code = gimple_assign_rhs_code (stmt);
4545 if (code != COND_EXPR)
4548 gcc_assert (gimple_assign_single_p (stmt));
4549 op = gimple_assign_rhs1 (stmt);
4550 cond_expr = TREE_OPERAND (op, 0);
4551 then_clause = TREE_OPERAND (op, 1);
4552 else_clause = TREE_OPERAND (op, 2);
4554 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4557 /* We do not handle two different vector types for the condition
4559 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
4560 TREE_TYPE (vectype)))
4563 if (TREE_CODE (then_clause) == SSA_NAME)
4565 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4566 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4567 &then_def_stmt, &def, &dt))
4570 else if (TREE_CODE (then_clause) != INTEGER_CST
4571 && TREE_CODE (then_clause) != REAL_CST
4572 && TREE_CODE (then_clause) != FIXED_CST)
4575 if (TREE_CODE (else_clause) == SSA_NAME)
4577 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4578 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4579 &else_def_stmt, &def, &dt))
4582 else if (TREE_CODE (else_clause) != INTEGER_CST
4583 && TREE_CODE (else_clause) != REAL_CST
4584 && TREE_CODE (else_clause) != FIXED_CST)
4588 vec_mode = TYPE_MODE (vectype);
4592 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4593 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
4599 scalar_dest = gimple_assign_lhs (stmt);
4600 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4602 /* Handle cond expr. */
4603 for (j = 0; j < ncopies; j++)
4610 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4612 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4613 NULL, >emp, &def, &dts[0]);
4615 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4617 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4618 NULL, >emp, &def, &dts[1]);
4619 if (reduc_index == 1)
4620 vec_then_clause = reduc_def;
4623 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4625 vect_is_simple_use (then_clause, loop_vinfo,
4626 NULL, >emp, &def, &dts[2]);
4628 if (reduc_index == 2)
4629 vec_else_clause = reduc_def;
4632 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4634 vect_is_simple_use (else_clause, loop_vinfo,
4635 NULL, >emp, &def, &dts[3]);
4640 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4641 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4642 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4644 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4648 /* Arguments are ready. Create the new vector stmt. */
4649 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4650 vec_cond_lhs, vec_cond_rhs);
4651 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4652 vec_compare, vec_then_clause, vec_else_clause);
4654 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4655 new_temp = make_ssa_name (vec_dest, new_stmt);
4656 gimple_assign_set_lhs (new_stmt, new_temp);
4657 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4659 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4661 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4663 prev_stmt_info = vinfo_for_stmt (new_stmt);
4670 /* Make sure the statement is vectorizable. */
4673 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4675 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4676 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4677 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4679 tree scalar_type, vectype;
4681 if (vect_print_dump_info (REPORT_DETAILS))
4683 fprintf (vect_dump, "==> examining statement: ");
4684 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4687 if (gimple_has_volatile_ops (stmt))
4689 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4690 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4695 /* Skip stmts that do not need to be vectorized. In loops this is expected
4697 - the COND_EXPR which is the loop exit condition
4698 - any LABEL_EXPRs in the loop
4699 - computations that are used only for array indexing or loop control.
4700 In basic blocks we only analyze statements that are a part of some SLP
4701 instance, therefore, all the statements are relevant. */
4703 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4704 && !STMT_VINFO_LIVE_P (stmt_info))
4706 if (vect_print_dump_info (REPORT_DETAILS))
4707 fprintf (vect_dump, "irrelevant.");
4712 switch (STMT_VINFO_DEF_TYPE (stmt_info))
4714 case vect_internal_def:
4717 case vect_reduction_def:
4718 case vect_nested_cycle:
4719 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
4720 || relevance == vect_used_in_outer_by_reduction
4721 || relevance == vect_unused_in_scope));
4724 case vect_induction_def:
4725 case vect_constant_def:
4726 case vect_external_def:
4727 case vect_unknown_def_type:
4734 gcc_assert (PURE_SLP_STMT (stmt_info));
4736 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
4737 if (vect_print_dump_info (REPORT_DETAILS))
4739 fprintf (vect_dump, "get vectype for scalar type: ");
4740 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4743 vectype = get_vectype_for_scalar_type (scalar_type);
4746 if (vect_print_dump_info (REPORT_DETAILS))
4748 fprintf (vect_dump, "not SLPed: unsupported data-type ");
4749 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4754 if (vect_print_dump_info (REPORT_DETAILS))
4756 fprintf (vect_dump, "vectype: ");
4757 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4760 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4763 if (STMT_VINFO_RELEVANT_P (stmt_info))
4765 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4766 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4767 *need_to_vectorize = true;
4772 && (STMT_VINFO_RELEVANT_P (stmt_info)
4773 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4774 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4775 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4776 || vectorizable_conversion (stmt, NULL, NULL, NULL)
4777 || vectorizable_shift (stmt, NULL, NULL, NULL)
4778 || vectorizable_operation (stmt, NULL, NULL, NULL)
4779 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4780 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4781 || vectorizable_call (stmt, NULL, NULL)
4782 || vectorizable_store (stmt, NULL, NULL, NULL)
4783 || vectorizable_reduction (stmt, NULL, NULL, NULL)
4784 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
4788 ok = (vectorizable_shift (stmt, NULL, NULL, node)
4789 || vectorizable_operation (stmt, NULL, NULL, node)
4790 || vectorizable_assignment (stmt, NULL, NULL, node)
4791 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4792 || vectorizable_store (stmt, NULL, NULL, node));
4797 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4799 fprintf (vect_dump, "not vectorized: relevant stmt not ");
4800 fprintf (vect_dump, "supported: ");
4801 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4810 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4811 need extra handling, except for vectorizable reductions. */
4812 if (STMT_VINFO_LIVE_P (stmt_info)
4813 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4814 ok = vectorizable_live_operation (stmt, NULL, NULL);
4818 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4820 fprintf (vect_dump, "not vectorized: live stmt not ");
4821 fprintf (vect_dump, "supported: ");
4822 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4828 if (!PURE_SLP_STMT (stmt_info))
4830 /* Groups of strided accesses whose size is not a power of 2 are not
4831 vectorizable yet using loop-vectorization. Therefore, if this stmt
4832 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4833 loop-based vectorized), the loop cannot be vectorized. */
4834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4835 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4836 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4838 if (vect_print_dump_info (REPORT_DETAILS))
4840 fprintf (vect_dump, "not vectorized: the size of group "
4841 "of strided accesses is not a power of 2");
4842 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4853 /* Function vect_transform_stmt.
4855 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4858 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4859 bool *strided_store, slp_tree slp_node,
4860 slp_instance slp_node_instance)
4862 bool is_store = false;
4863 gimple vec_stmt = NULL;
4864 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4865 gimple orig_stmt_in_pattern;
4868 switch (STMT_VINFO_TYPE (stmt_info))
4870 case type_demotion_vec_info_type:
4871 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4875 case type_promotion_vec_info_type:
4876 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4880 case type_conversion_vec_info_type:
4881 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4885 case induc_vec_info_type:
4886 gcc_assert (!slp_node);
4887 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4891 case shift_vec_info_type:
4892 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
4896 case op_vec_info_type:
4897 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4901 case assignment_vec_info_type:
4902 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4906 case load_vec_info_type:
4907 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4912 case store_vec_info_type:
4913 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4915 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4917 /* In case of interleaving, the whole chain is vectorized when the
4918 last store in the chain is reached. Store stmts before the last
4919 one are skipped, and there vec_stmt_info shouldn't be freed
4921 *strided_store = true;
4922 if (STMT_VINFO_VEC_STMT (stmt_info))
4929 case condition_vec_info_type:
4930 gcc_assert (!slp_node);
4931 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
4935 case call_vec_info_type:
4936 gcc_assert (!slp_node);
4937 done = vectorizable_call (stmt, gsi, &vec_stmt);
4938 stmt = gsi_stmt (*gsi);
4941 case reduc_vec_info_type:
4942 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
4947 if (!STMT_VINFO_LIVE_P (stmt_info))
4949 if (vect_print_dump_info (REPORT_DETAILS))
4950 fprintf (vect_dump, "stmt not supported.");
4955 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4956 is being vectorized, but outside the immediately enclosing loop. */
4958 && STMT_VINFO_LOOP_VINFO (stmt_info)
4959 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4960 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
4961 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4962 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4963 || STMT_VINFO_RELEVANT (stmt_info) ==
4964 vect_used_in_outer_by_reduction))
4966 struct loop *innerloop = LOOP_VINFO_LOOP (
4967 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
4968 imm_use_iterator imm_iter;
4969 use_operand_p use_p;
4973 if (vect_print_dump_info (REPORT_DETAILS))
4974 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4976 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4977 (to be used when vectorizing outer-loop stmts that use the DEF of
4979 if (gimple_code (stmt) == GIMPLE_PHI)
4980 scalar_dest = PHI_RESULT (stmt);
4982 scalar_dest = gimple_assign_lhs (stmt);
4984 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4986 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4988 exit_phi = USE_STMT (use_p);
4989 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4994 /* Handle stmts whose DEF is used outside the loop-nest that is
4995 being vectorized. */
4996 if (STMT_VINFO_LIVE_P (stmt_info)
4997 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4999 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5005 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5006 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
5007 if (orig_stmt_in_pattern)
5009 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
5010 /* STMT was inserted by the vectorizer to replace a computation idiom.
5011 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
5012 computed this idiom. We need to record a pointer to VEC_STMT in
5013 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
5014 documentation of vect_pattern_recog. */
5015 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
5016 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
5024 /* Remove a group of stores (for SLP or interleaving), free their
5028 vect_remove_stores (gimple first_stmt)
5030 gimple next = first_stmt;
5032 gimple_stmt_iterator next_si;
5036 /* Free the attached stmt_vec_info and remove the stmt. */
5037 next_si = gsi_for_stmt (next);
5038 gsi_remove (&next_si, true);
5039 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
5040 free_stmt_vec_info (next);
5046 /* Function new_stmt_vec_info.
5048 Create and initialize a new stmt_vec_info struct for STMT. */
5051 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5052 bb_vec_info bb_vinfo)
5055 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5057 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5058 STMT_VINFO_STMT (res) = stmt;
5059 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5060 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5061 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5062 STMT_VINFO_LIVE_P (res) = false;
5063 STMT_VINFO_VECTYPE (res) = NULL;
5064 STMT_VINFO_VEC_STMT (res) = NULL;
5065 STMT_VINFO_VECTORIZABLE (res) = true;
5066 STMT_VINFO_IN_PATTERN_P (res) = false;
5067 STMT_VINFO_RELATED_STMT (res) = NULL;
5068 STMT_VINFO_DATA_REF (res) = NULL;
5070 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5071 STMT_VINFO_DR_OFFSET (res) = NULL;
5072 STMT_VINFO_DR_INIT (res) = NULL;
5073 STMT_VINFO_DR_STEP (res) = NULL;
5074 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5076 if (gimple_code (stmt) == GIMPLE_PHI
5077 && is_loop_header_bb_p (gimple_bb (stmt)))
5078 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5080 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5082 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5083 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5084 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5085 STMT_SLP_TYPE (res) = loop_vect;
5086 DR_GROUP_FIRST_DR (res) = NULL;
5087 DR_GROUP_NEXT_DR (res) = NULL;
5088 DR_GROUP_SIZE (res) = 0;
5089 DR_GROUP_STORE_COUNT (res) = 0;
5090 DR_GROUP_GAP (res) = 0;
5091 DR_GROUP_SAME_DR_STMT (res) = NULL;
5092 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
5098 /* Create a hash table for stmt_vec_info. */
5101 init_stmt_vec_info_vec (void)
5103 gcc_assert (!stmt_vec_info_vec);
5104 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5108 /* Free hash table for stmt_vec_info. */
5111 free_stmt_vec_info_vec (void)
5113 gcc_assert (stmt_vec_info_vec);
5114 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5118 /* Free stmt vectorization related info. */
5121 free_stmt_vec_info (gimple stmt)
5123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5128 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5129 set_vinfo_for_stmt (stmt, NULL);
5134 /* Function get_vectype_for_scalar_type_and_size.
5136 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5140 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5142 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5143 enum machine_mode simd_mode;
5144 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5151 /* We can't build a vector type of elements with alignment bigger than
5153 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5156 /* If we'd build a vector type of elements whose mode precision doesn't
5157 match their types precision we'll get mismatched types on vector
5158 extracts via BIT_FIELD_REFs. This effectively means we disable
5159 vectorization of bool and/or enum types in some languages. */
5160 if (INTEGRAL_TYPE_P (scalar_type)
5161 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5164 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5165 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5168 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5169 When the component mode passes the above test simply use a type
5170 corresponding to that mode. The theory is that any use that
5171 would cause problems with this will disable vectorization anyway. */
5172 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5173 && !INTEGRAL_TYPE_P (scalar_type)
5174 && !POINTER_TYPE_P (scalar_type))
5175 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5177 /* If no size was supplied use the mode the target prefers. Otherwise
5178 lookup a vector mode of the specified size. */
5180 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5182 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5183 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5187 vectype = build_vector_type (scalar_type, nunits);
5188 if (vect_print_dump_info (REPORT_DETAILS))
5190 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5191 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5197 if (vect_print_dump_info (REPORT_DETAILS))
5199 fprintf (vect_dump, "vectype: ");
5200 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5203 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5204 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5206 if (vect_print_dump_info (REPORT_DETAILS))
5207 fprintf (vect_dump, "mode not supported by target.");
5214 unsigned int current_vector_size;
5216 /* Function get_vectype_for_scalar_type.
5218 Returns the vector type corresponding to SCALAR_TYPE as supported
5222 get_vectype_for_scalar_type (tree scalar_type)
5225 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5226 current_vector_size);
5228 && current_vector_size == 0)
5229 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5233 /* Function get_same_sized_vectype
5235 Returns a vector type corresponding to SCALAR_TYPE of size
5236 VECTOR_TYPE if supported by the target. */
5239 get_same_sized_vectype (tree scalar_type, tree vector_type)
5241 return get_vectype_for_scalar_type_and_size
5242 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5245 /* Function vect_is_simple_use.
5248 LOOP_VINFO - the vect info of the loop that is being vectorized.
5249 BB_VINFO - the vect info of the basic block that is being vectorized.
5250 OPERAND - operand of a stmt in the loop or bb.
5251 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5253 Returns whether a stmt with OPERAND can be vectorized.
5254 For loops, supportable operands are constants, loop invariants, and operands
5255 that are defined by the current iteration of the loop. Unsupportable
5256 operands are those that are defined by a previous iteration of the loop (as
5257 is the case in reduction/induction computations).
5258 For basic blocks, supportable operands are constants and bb invariants.
5259 For now, operands defined outside the basic block are not supported. */
5262 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5263 bb_vec_info bb_vinfo, gimple *def_stmt,
5264 tree *def, enum vect_def_type *dt)
5267 stmt_vec_info stmt_vinfo;
5268 struct loop *loop = NULL;
5271 loop = LOOP_VINFO_LOOP (loop_vinfo);
5276 if (vect_print_dump_info (REPORT_DETAILS))
5278 fprintf (vect_dump, "vect_is_simple_use: operand ");
5279 print_generic_expr (vect_dump, operand, TDF_SLIM);
5282 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5284 *dt = vect_constant_def;
5288 if (is_gimple_min_invariant (operand))
5291 *dt = vect_external_def;
5295 if (TREE_CODE (operand) == PAREN_EXPR)
5297 if (vect_print_dump_info (REPORT_DETAILS))
5298 fprintf (vect_dump, "non-associatable copy.");
5299 operand = TREE_OPERAND (operand, 0);
5302 if (TREE_CODE (operand) != SSA_NAME)
5304 if (vect_print_dump_info (REPORT_DETAILS))
5305 fprintf (vect_dump, "not ssa-name.");
5309 *def_stmt = SSA_NAME_DEF_STMT (operand);
5310 if (*def_stmt == NULL)
5312 if (vect_print_dump_info (REPORT_DETAILS))
5313 fprintf (vect_dump, "no def_stmt.");
5317 if (vect_print_dump_info (REPORT_DETAILS))
5319 fprintf (vect_dump, "def_stmt: ");
5320 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5323 /* Empty stmt is expected only in case of a function argument.
5324 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5325 if (gimple_nop_p (*def_stmt))
5328 *dt = vect_external_def;
5332 bb = gimple_bb (*def_stmt);
5334 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5335 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5336 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5337 *dt = vect_external_def;
5340 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5341 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5344 if (*dt == vect_unknown_def_type)
5346 if (vect_print_dump_info (REPORT_DETAILS))
5347 fprintf (vect_dump, "Unsupported pattern.");
5351 if (vect_print_dump_info (REPORT_DETAILS))
5352 fprintf (vect_dump, "type of def: %d.",*dt);
5354 switch (gimple_code (*def_stmt))
5357 *def = gimple_phi_result (*def_stmt);
5361 *def = gimple_assign_lhs (*def_stmt);
5365 *def = gimple_call_lhs (*def_stmt);
5370 if (vect_print_dump_info (REPORT_DETAILS))
5371 fprintf (vect_dump, "unsupported defining stmt: ");
5378 /* Function vect_is_simple_use_1.
5380 Same as vect_is_simple_use_1 but also determines the vector operand
5381 type of OPERAND and stores it to *VECTYPE. If the definition of
5382 OPERAND is vect_uninitialized_def, vect_constant_def or
5383 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5384 is responsible to compute the best suited vector type for the
5388 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5389 bb_vec_info bb_vinfo, gimple *def_stmt,
5390 tree *def, enum vect_def_type *dt, tree *vectype)
5392 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5395 /* Now get a vector type if the def is internal, otherwise supply
5396 NULL_TREE and leave it up to the caller to figure out a proper
5397 type for the use stmt. */
5398 if (*dt == vect_internal_def
5399 || *dt == vect_induction_def
5400 || *dt == vect_reduction_def
5401 || *dt == vect_double_reduction_def
5402 || *dt == vect_nested_cycle)
5404 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5405 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5406 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5407 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5408 gcc_assert (*vectype != NULL_TREE);
5410 else if (*dt == vect_uninitialized_def
5411 || *dt == vect_constant_def
5412 || *dt == vect_external_def)
5413 *vectype = NULL_TREE;
5421 /* Function supportable_widening_operation
5423 Check whether an operation represented by the code CODE is a
5424 widening operation that is supported by the target platform in
5425 vector form (i.e., when operating on arguments of type VECTYPE_IN
5426 producing a result of type VECTYPE_OUT).
5428 Widening operations we currently support are NOP (CONVERT), FLOAT
5429 and WIDEN_MULT. This function checks if these operations are supported
5430 by the target platform either directly (via vector tree-codes), or via
5434 - CODE1 and CODE2 are codes of vector operations to be used when
5435 vectorizing the operation, if available.
5436 - DECL1 and DECL2 are decls of target builtin functions to be used
5437 when vectorizing the operation, if available. In this case,
5438 CODE1 and CODE2 are CALL_EXPR.
5439 - MULTI_STEP_CVT determines the number of required intermediate steps in
5440 case of multi-step conversion (like char->short->int - in that case
5441 MULTI_STEP_CVT will be 1).
5442 - INTERM_TYPES contains the intermediate type required to perform the
5443 widening operation (short in the above example). */
5446 supportable_widening_operation (enum tree_code code, gimple stmt,
5447 tree vectype_out, tree vectype_in,
5448 tree *decl1, tree *decl2,
5449 enum tree_code *code1, enum tree_code *code2,
5450 int *multi_step_cvt,
5451 VEC (tree, heap) **interm_types)
5453 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5454 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5455 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
5457 enum machine_mode vec_mode;
5458 enum insn_code icode1, icode2;
5459 optab optab1, optab2;
5460 tree vectype = vectype_in;
5461 tree wide_vectype = vectype_out;
5462 enum tree_code c1, c2;
5464 /* The result of a vectorized widening operation usually requires two vectors
5465 (because the widened results do not fit int one vector). The generated
5466 vector results would normally be expected to be generated in the same
5467 order as in the original scalar computation, i.e. if 8 results are
5468 generated in each vector iteration, they are to be organized as follows:
5469 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5471 However, in the special case that the result of the widening operation is
5472 used in a reduction computation only, the order doesn't matter (because
5473 when vectorizing a reduction we change the order of the computation).
5474 Some targets can take advantage of this and generate more efficient code.
5475 For example, targets like Altivec, that support widen_mult using a sequence
5476 of {mult_even,mult_odd} generate the following vectors:
5477 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5479 When vectorizing outer-loops, we execute the inner-loop sequentially
5480 (each vectorized inner-loop iteration contributes to VF outer-loop
5481 iterations in parallel). We therefore don't allow to change the order
5482 of the computation in the inner-loop during outer-loop vectorization. */
5484 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5485 && !nested_in_vect_loop_p (vect_loop, stmt))
5491 && code == WIDEN_MULT_EXPR
5492 && targetm.vectorize.builtin_mul_widen_even
5493 && targetm.vectorize.builtin_mul_widen_even (vectype)
5494 && targetm.vectorize.builtin_mul_widen_odd
5495 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5497 if (vect_print_dump_info (REPORT_DETAILS))
5498 fprintf (vect_dump, "Unordered widening operation detected.");
5500 *code1 = *code2 = CALL_EXPR;
5501 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5502 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5508 case WIDEN_MULT_EXPR:
5509 if (BYTES_BIG_ENDIAN)
5511 c1 = VEC_WIDEN_MULT_HI_EXPR;
5512 c2 = VEC_WIDEN_MULT_LO_EXPR;
5516 c2 = VEC_WIDEN_MULT_HI_EXPR;
5517 c1 = VEC_WIDEN_MULT_LO_EXPR;
5522 if (BYTES_BIG_ENDIAN)
5524 c1 = VEC_UNPACK_HI_EXPR;
5525 c2 = VEC_UNPACK_LO_EXPR;
5529 c2 = VEC_UNPACK_HI_EXPR;
5530 c1 = VEC_UNPACK_LO_EXPR;
5535 if (BYTES_BIG_ENDIAN)
5537 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5538 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5542 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5543 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5547 case FIX_TRUNC_EXPR:
5548 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5549 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5550 computing the operation. */
5557 if (code == FIX_TRUNC_EXPR)
5559 /* The signedness is determined from output operand. */
5560 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5561 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5565 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5566 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5569 if (!optab1 || !optab2)
5572 vec_mode = TYPE_MODE (vectype);
5573 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5574 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5577 /* Check if it's a multi-step conversion that can be done using intermediate
5579 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5580 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5583 tree prev_type = vectype, intermediate_type;
5584 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5585 optab optab3, optab4;
5587 if (!CONVERT_EXPR_CODE_P (code))
5593 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5594 intermediate steps in promotion sequence. We try
5595 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5597 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5598 for (i = 0; i < 3; i++)
5600 intermediate_mode = insn_data[icode1].operand[0].mode;
5601 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5602 TYPE_UNSIGNED (prev_type));
5603 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5604 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5606 if (!optab3 || !optab4
5607 || ((icode1 = optab_handler (optab1, prev_mode))
5608 == CODE_FOR_nothing)
5609 || insn_data[icode1].operand[0].mode != intermediate_mode
5610 || ((icode2 = optab_handler (optab2, prev_mode))
5611 == CODE_FOR_nothing)
5612 || insn_data[icode2].operand[0].mode != intermediate_mode
5613 || ((icode1 = optab_handler (optab3, intermediate_mode))
5614 == CODE_FOR_nothing)
5615 || ((icode2 = optab_handler (optab4, intermediate_mode))
5616 == CODE_FOR_nothing))
5619 VEC_quick_push (tree, *interm_types, intermediate_type);
5620 (*multi_step_cvt)++;
5622 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5623 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5626 prev_type = intermediate_type;
5627 prev_mode = intermediate_mode;
5639 /* Function supportable_narrowing_operation
5641 Check whether an operation represented by the code CODE is a
5642 narrowing operation that is supported by the target platform in
5643 vector form (i.e., when operating on arguments of type VECTYPE_IN
5644 and producing a result of type VECTYPE_OUT).
5646 Narrowing operations we currently support are NOP (CONVERT) and
5647 FIX_TRUNC. This function checks if these operations are supported by
5648 the target platform directly via vector tree-codes.
5651 - CODE1 is the code of a vector operation to be used when
5652 vectorizing the operation, if available.
5653 - MULTI_STEP_CVT determines the number of required intermediate steps in
5654 case of multi-step conversion (like int->short->char - in that case
5655 MULTI_STEP_CVT will be 1).
5656 - INTERM_TYPES contains the intermediate type required to perform the
5657 narrowing operation (short in the above example). */
5660 supportable_narrowing_operation (enum tree_code code,
5661 tree vectype_out, tree vectype_in,
5662 enum tree_code *code1, int *multi_step_cvt,
5663 VEC (tree, heap) **interm_types)
5665 enum machine_mode vec_mode;
5666 enum insn_code icode1;
5667 optab optab1, interm_optab;
5668 tree vectype = vectype_in;
5669 tree narrow_vectype = vectype_out;
5671 tree intermediate_type, prev_type;
5677 c1 = VEC_PACK_TRUNC_EXPR;
5680 case FIX_TRUNC_EXPR:
5681 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5685 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5686 tree code and optabs used for computing the operation. */
5693 if (code == FIX_TRUNC_EXPR)
5694 /* The signedness is determined from output operand. */
5695 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5697 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5702 vec_mode = TYPE_MODE (vectype);
5703 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
5706 /* Check if it's a multi-step conversion that can be done using intermediate
5708 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5710 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5713 prev_type = vectype;
5714 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5715 intermediate steps in promotion sequence. We try
5716 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5718 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5719 for (i = 0; i < 3; i++)
5721 intermediate_mode = insn_data[icode1].operand[0].mode;
5722 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5723 TYPE_UNSIGNED (prev_type));
5724 interm_optab = optab_for_tree_code (c1, intermediate_type,
5727 || ((icode1 = optab_handler (optab1, prev_mode))
5728 == CODE_FOR_nothing)
5729 || insn_data[icode1].operand[0].mode != intermediate_mode
5730 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
5731 == CODE_FOR_nothing))
5734 VEC_quick_push (tree, *interm_types, intermediate_type);
5735 (*multi_step_cvt)++;
5737 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5740 prev_type = intermediate_type;
5741 prev_mode = intermediate_mode;