1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
164 if (is_gimple_debug (USE_STMT (use_p)))
166 use_stmt = USE_STMT (use_p);
168 if (vinfo_for_stmt (use_stmt)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
184 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "last stmt in pattern. don't mark"
189 stmt_info = vinfo_for_stmt (pattern_stmt);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
191 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
192 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
197 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
198 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
199 STMT_VINFO_RELEVANT (stmt_info) = relevant;
201 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
204 if (vect_print_dump_info (REPORT_DETAILS))
205 fprintf (vect_dump, "already marked relevant/live.");
209 VEC_safe_push (gimple, heap, *worklist, stmt);
213 /* Function vect_stmt_relevant_p.
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
223 CHECKME: what other side effects would the vectorizer allow? */
226 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
227 enum vect_relevant *relevant, bool *live_p)
229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 imm_use_iterator imm_iter;
235 *relevant = vect_unused_in_scope;
238 /* cond stmt other than loop exit cond. */
239 if (is_ctrl_stmt (stmt)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
241 != loop_exit_ctrl_vec_info_type)
242 *relevant = vect_used_in_scope;
244 /* changing memory. */
245 if (gimple_code (stmt) != GIMPLE_PHI)
246 if (gimple_vdef (stmt))
248 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
250 *relevant = vect_used_in_scope;
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
256 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
258 basic_block bb = gimple_bb (USE_STMT (use_p));
259 if (!flow_bb_inside_loop_p (loop, bb))
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
264 if (is_gimple_debug (USE_STMT (use_p)))
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
270 gcc_assert (bb == single_exit (loop)->dest);
277 return (*live_p || *relevant);
281 /* Function exist_non_indexing_operands_for_use_p
283 USE is one of the uses attached to STMT. Check if USE is
284 used in STMT for anything other than indexing an array. */
287 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
292 /* USE corresponds to some operand in STMT. If there is no data
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info))
298 /* STMT has a data_ref. FORNOW this means that its of one of
302 (This should have been verified in analyze_data_refs).
304 'var' in the second case corresponds to a def, not a use,
305 so USE cannot correspond to any operands that are not used
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
311 if (!gimple_assign_copy_p (stmt))
313 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
315 operand = gimple_assign_rhs1 (stmt);
316 if (TREE_CODE (operand) != SSA_NAME)
327 Function process_use.
330 - a USE in STMT in a loop represented by LOOP_VINFO
331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
332 that defined USE. This is done by calling mark_relevant and passing it
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
336 Generally, LIVE_P and RELEVANT are used to define the liveness and
337 relevance info of the DEF_STMT of this USE:
338 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
339 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
341 - case 1: If USE is used only for address computations (e.g. array indexing),
342 which does not need to be directly vectorized, then the liveness/relevance
343 of the respective DEF_STMT is left unchanged.
344 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
345 skip DEF_STMT cause it had already been processed.
346 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
347 be modified accordingly.
349 Return true if everything is as expected. Return false otherwise. */
352 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
353 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
355 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
356 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
357 stmt_vec_info dstmt_vinfo;
358 basic_block bb, def_bb;
361 enum vect_def_type dt;
363 /* case 1: we are only interested in uses that need to be vectorized. Uses
364 that are used for address computation are not considered relevant. */
365 if (!exist_non_indexing_operands_for_use_p (use, stmt))
368 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
370 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
371 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
375 if (!def_stmt || gimple_nop_p (def_stmt))
378 def_bb = gimple_bb (def_stmt);
379 if (!flow_bb_inside_loop_p (loop, def_bb))
381 if (vect_print_dump_info (REPORT_DETAILS))
382 fprintf (vect_dump, "def_stmt is out of loop.");
386 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
387 DEF_STMT must have already been processed, because this should be the
388 only way that STMT, which is a reduction-phi, was put in the worklist,
389 as there should be no other uses for DEF_STMT in the loop. So we just
390 check that everything is as expected, and we are done. */
391 dstmt_vinfo = vinfo_for_stmt (def_stmt);
392 bb = gimple_bb (stmt);
393 if (gimple_code (stmt) == GIMPLE_PHI
394 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
395 && gimple_code (def_stmt) != GIMPLE_PHI
396 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
397 && bb->loop_father == def_bb->loop_father)
399 if (vect_print_dump_info (REPORT_DETAILS))
400 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
401 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
402 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
403 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
404 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
405 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
409 /* case 3a: outer-loop stmt defining an inner-loop stmt:
410 outer-loop-header-bb:
416 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
418 if (vect_print_dump_info (REPORT_DETAILS))
419 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
423 case vect_unused_in_scope:
424 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
425 vect_used_in_scope : vect_unused_in_scope;
428 case vect_used_in_outer_by_reduction:
429 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
430 relevant = vect_used_by_reduction;
433 case vect_used_in_outer:
434 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
435 relevant = vect_used_in_scope;
438 case vect_used_in_scope:
446 /* case 3b: inner-loop stmt defining an outer-loop stmt:
447 outer-loop-header-bb:
451 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
453 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
455 if (vect_print_dump_info (REPORT_DETAILS))
456 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
460 case vect_unused_in_scope:
461 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
462 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
463 vect_used_in_outer_by_reduction : vect_unused_in_scope;
466 case vect_used_by_reduction:
467 relevant = vect_used_in_outer_by_reduction;
470 case vect_used_in_scope:
471 relevant = vect_used_in_outer;
479 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
480 is_pattern_stmt_p (stmt_vinfo));
485 /* Function vect_mark_stmts_to_be_vectorized.
487 Not all stmts in the loop need to be vectorized. For example:
496 Stmt 1 and 3 do not need to be vectorized, because loop control and
497 addressing of vectorized data-refs are handled differently.
499 This pass detects such stmts. */
502 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
504 VEC(gimple,heap) *worklist;
505 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
506 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
507 unsigned int nbbs = loop->num_nodes;
508 gimple_stmt_iterator si;
511 stmt_vec_info stmt_vinfo;
515 enum vect_relevant relevant, tmp_relevant;
516 enum vect_def_type def_type;
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
521 worklist = VEC_alloc (gimple, heap, 64);
523 /* 1. Init worklist. */
524 for (i = 0; i < nbbs; i++)
527 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
530 if (vect_print_dump_info (REPORT_DETAILS))
532 fprintf (vect_dump, "init: phi relevant? ");
533 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
536 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
537 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
539 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
541 stmt = gsi_stmt (si);
542 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "init: stmt relevant? ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
548 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
549 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
553 /* 2. Process_worklist */
554 while (VEC_length (gimple, worklist) > 0)
559 stmt = VEC_pop (gimple, worklist);
560 if (vect_print_dump_info (REPORT_DETAILS))
562 fprintf (vect_dump, "worklist: examine stmt: ");
563 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
566 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
567 (DEF_STMT) as relevant/irrelevant and live/dead according to the
568 liveness and relevance properties of STMT. */
569 stmt_vinfo = vinfo_for_stmt (stmt);
570 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
571 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
573 /* Generally, the liveness and relevance properties of STMT are
574 propagated as is to the DEF_STMTs of its USEs:
575 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
576 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
578 One exception is when STMT has been identified as defining a reduction
579 variable; in this case we set the liveness/relevance as follows:
581 relevant = vect_used_by_reduction
582 This is because we distinguish between two kinds of relevant stmts -
583 those that are used by a reduction computation, and those that are
584 (also) used by a regular computation. This allows us later on to
585 identify stmts that are used solely by a reduction, and therefore the
586 order of the results that they produce does not have to be kept. */
588 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
589 tmp_relevant = relevant;
592 case vect_reduction_def:
593 switch (tmp_relevant)
595 case vect_unused_in_scope:
596 relevant = vect_used_by_reduction;
599 case vect_used_by_reduction:
600 if (gimple_code (stmt) == GIMPLE_PHI)
605 if (vect_print_dump_info (REPORT_DETAILS))
606 fprintf (vect_dump, "unsupported use of reduction.");
608 VEC_free (gimple, heap, worklist);
615 case vect_nested_cycle:
616 if (tmp_relevant != vect_unused_in_scope
617 && tmp_relevant != vect_used_in_outer_by_reduction
618 && tmp_relevant != vect_used_in_outer)
620 if (vect_print_dump_info (REPORT_DETAILS))
621 fprintf (vect_dump, "unsupported use of nested cycle.");
623 VEC_free (gimple, heap, worklist);
630 case vect_double_reduction_def:
631 if (tmp_relevant != vect_unused_in_scope
632 && tmp_relevant != vect_used_by_reduction)
634 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "unsupported use of double reduction.");
637 VEC_free (gimple, heap, worklist);
648 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
650 /* Pattern statements are not inserted into the code, so
651 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
652 have to scan the RHS or function arguments instead. */
653 if (is_gimple_assign (stmt))
655 for (i = 1; i < gimple_num_ops (stmt); i++)
657 tree op = gimple_op (stmt, i);
658 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
661 VEC_free (gimple, heap, worklist);
666 else if (is_gimple_call (stmt))
668 for (i = 0; i < gimple_call_num_args (stmt); i++)
670 tree arg = gimple_call_arg (stmt, i);
671 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
674 VEC_free (gimple, heap, worklist);
681 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
683 tree op = USE_FROM_PTR (use_p);
684 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
687 VEC_free (gimple, heap, worklist);
691 } /* while worklist */
693 VEC_free (gimple, heap, worklist);
698 /* Get cost by calling cost target builtin. */
701 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
703 tree dummy_type = NULL;
706 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
711 /* Get cost for STMT. */
714 cost_for_stmt (gimple stmt)
716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
718 switch (STMT_VINFO_TYPE (stmt_info))
720 case load_vec_info_type:
721 return vect_get_stmt_cost (scalar_load);
722 case store_vec_info_type:
723 return vect_get_stmt_cost (scalar_store);
724 case op_vec_info_type:
725 case condition_vec_info_type:
726 case assignment_vec_info_type:
727 case reduc_vec_info_type:
728 case induc_vec_info_type:
729 case type_promotion_vec_info_type:
730 case type_demotion_vec_info_type:
731 case type_conversion_vec_info_type:
732 case call_vec_info_type:
733 return vect_get_stmt_cost (scalar_stmt);
734 case undef_vec_info_type:
740 /* Function vect_model_simple_cost.
742 Models cost for simple operations, i.e. those that only emit ncopies of a
743 single op. Right now, this does not account for multiple insns that could
744 be generated for the single vector op. We will handle that shortly. */
747 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
748 enum vect_def_type *dt, slp_tree slp_node)
751 int inside_cost = 0, outside_cost = 0;
753 /* The SLP costs were already calculated during SLP tree build. */
754 if (PURE_SLP_STMT (stmt_info))
757 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
759 /* FORNOW: Assuming maximum 2 args per stmts. */
760 for (i = 0; i < 2; i++)
762 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
763 outside_cost += vect_get_stmt_cost (vector_stmt);
766 if (vect_print_dump_info (REPORT_COST))
767 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
768 "outside_cost = %d .", inside_cost, outside_cost);
770 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
771 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
772 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
776 /* Function vect_cost_strided_group_size
778 For strided load or store, return the group_size only if it is the first
779 load or store of a group, else return 1. This ensures that group size is
780 only returned once per group. */
783 vect_cost_strided_group_size (stmt_vec_info stmt_info)
785 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
787 if (first_stmt == STMT_VINFO_STMT (stmt_info))
788 return GROUP_SIZE (stmt_info);
794 /* Function vect_model_store_cost
796 Models cost for stores. In the case of strided accesses, one access
797 has the overhead of the strided access attributed to it. */
800 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
801 bool store_lanes_p, enum vect_def_type dt,
805 unsigned int inside_cost = 0, outside_cost = 0;
806 struct data_reference *first_dr;
809 /* The SLP costs were already calculated during SLP tree build. */
810 if (PURE_SLP_STMT (stmt_info))
813 if (dt == vect_constant_def || dt == vect_external_def)
814 outside_cost = vect_get_stmt_cost (scalar_to_vec);
816 /* Strided access? */
817 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
821 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
826 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
827 group_size = vect_cost_strided_group_size (stmt_info);
830 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
832 /* Not a strided access. */
836 first_dr = STMT_VINFO_DATA_REF (stmt_info);
839 /* We assume that the cost of a single store-lanes instruction is
840 equivalent to the cost of GROUP_SIZE separate stores. If a strided
841 access is instead being provided by a permute-and-store operation,
842 include the cost of the permutes. */
843 if (!store_lanes_p && group_size > 1)
845 /* Uses a high and low interleave operation for each needed permute. */
846 inside_cost = ncopies * exact_log2(group_size) * group_size
847 * vect_get_stmt_cost (vector_stmt);
849 if (vect_print_dump_info (REPORT_COST))
850 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
855 /* Costs of the stores. */
856 vect_get_store_cost (first_dr, ncopies, &inside_cost);
858 if (vect_print_dump_info (REPORT_COST))
859 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
860 "outside_cost = %d .", inside_cost, outside_cost);
862 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
863 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
864 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
868 /* Calculate cost of DR's memory access. */
870 vect_get_store_cost (struct data_reference *dr, int ncopies,
871 unsigned int *inside_cost)
873 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
875 switch (alignment_support_scheme)
879 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
881 if (vect_print_dump_info (REPORT_COST))
882 fprintf (vect_dump, "vect_model_store_cost: aligned.");
887 case dr_unaligned_supported:
889 gimple stmt = DR_STMT (dr);
890 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
891 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
893 /* Here, we assign an additional cost for the unaligned store. */
894 *inside_cost += ncopies
895 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
896 vectype, DR_MISALIGNMENT (dr));
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
911 /* Function vect_model_load_cost
913 Models cost for loads. In the case of strided accesses, the last access
914 has the overhead of the strided access attributed to it. Since unaligned
915 accesses are supported for loads, we also account for the costs of the
916 access scheme chosen. */
919 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
924 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
925 unsigned int inside_cost = 0, outside_cost = 0;
927 /* The SLP costs were already calculated during SLP tree build. */
928 if (PURE_SLP_STMT (stmt_info))
931 /* Strided accesses? */
932 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
933 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
935 group_size = vect_cost_strided_group_size (stmt_info);
936 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
938 /* Not a strided access. */
945 /* We assume that the cost of a single load-lanes instruction is
946 equivalent to the cost of GROUP_SIZE separate loads. If a strided
947 access is instead being provided by a load-and-permute operation,
948 include the cost of the permutes. */
949 if (!load_lanes_p && group_size > 1)
951 /* Uses an even and odd extract operations for each needed permute. */
952 inside_cost = ncopies * exact_log2(group_size) * group_size
953 * vect_get_stmt_cost (vector_stmt);
955 if (vect_print_dump_info (REPORT_COST))
956 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
960 /* The loads themselves. */
961 vect_get_load_cost (first_dr, ncopies,
962 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
964 &inside_cost, &outside_cost);
966 if (vect_print_dump_info (REPORT_COST))
967 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
968 "outside_cost = %d .", inside_cost, outside_cost);
970 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
971 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
972 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
976 /* Calculate cost of DR's memory access. */
978 vect_get_load_cost (struct data_reference *dr, int ncopies,
979 bool add_realign_cost, unsigned int *inside_cost,
980 unsigned int *outside_cost)
982 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
984 switch (alignment_support_scheme)
988 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
990 if (vect_print_dump_info (REPORT_COST))
991 fprintf (vect_dump, "vect_model_load_cost: aligned.");
995 case dr_unaligned_supported:
997 gimple stmt = DR_STMT (dr);
998 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
999 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1001 /* Here, we assign an additional cost for the unaligned load. */
1002 *inside_cost += ncopies
1003 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1004 vectype, DR_MISALIGNMENT (dr));
1005 if (vect_print_dump_info (REPORT_COST))
1006 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1011 case dr_explicit_realign:
1013 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1014 + vect_get_stmt_cost (vector_stmt));
1016 /* FIXME: If the misalignment remains fixed across the iterations of
1017 the containing loop, the following cost should be added to the
1019 if (targetm.vectorize.builtin_mask_for_load)
1020 *inside_cost += vect_get_stmt_cost (vector_stmt);
1024 case dr_explicit_realign_optimized:
1026 if (vect_print_dump_info (REPORT_COST))
1027 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1030 /* Unaligned software pipeline has a load of an address, an initial
1031 load, and possibly a mask operation to "prime" the loop. However,
1032 if this is an access in a group of loads, which provide strided
1033 access, then the above cost should only be considered for one
1034 access in the group. Inside the loop, there is a load op
1035 and a realignment op. */
1037 if (add_realign_cost)
1039 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1040 if (targetm.vectorize.builtin_mask_for_load)
1041 *outside_cost += vect_get_stmt_cost (vector_stmt);
1044 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1045 + vect_get_stmt_cost (vector_stmt));
1055 /* Function vect_init_vector.
1057 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1058 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1059 is not NULL. Otherwise, place the initialization at the loop preheader.
1060 Return the DEF of INIT_STMT.
1061 It will be used in the vectorization of STMT. */
1064 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1065 gimple_stmt_iterator *gsi)
1067 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1075 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1076 add_referenced_var (new_var);
1077 init_stmt = gimple_build_assign (new_var, vector_var);
1078 new_temp = make_ssa_name (new_var, init_stmt);
1079 gimple_assign_set_lhs (init_stmt, new_temp);
1082 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1085 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1089 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1091 if (nested_in_vect_loop_p (loop, stmt))
1094 pe = loop_preheader_edge (loop);
1095 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1096 gcc_assert (!new_bb);
1100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1102 gimple_stmt_iterator gsi_bb_start;
1104 gcc_assert (bb_vinfo);
1105 bb = BB_VINFO_BB (bb_vinfo);
1106 gsi_bb_start = gsi_after_labels (bb);
1107 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1111 if (vect_print_dump_info (REPORT_DETAILS))
1113 fprintf (vect_dump, "created new init_stmt: ");
1114 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1117 vec_oprnd = gimple_assign_lhs (init_stmt);
1122 /* Function vect_get_vec_def_for_operand.
1124 OP is an operand in STMT. This function returns a (vector) def that will be
1125 used in the vectorized stmt for STMT.
1127 In the case that OP is an SSA_NAME which is defined in the loop, then
1128 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1130 In case OP is an invariant or constant, a new stmt that creates a vector def
1131 needs to be introduced. */
1134 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1139 stmt_vec_info def_stmt_info = NULL;
1140 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1141 unsigned int nunits;
1142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1148 enum vect_def_type dt;
1152 if (vect_print_dump_info (REPORT_DETAILS))
1154 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1155 print_generic_expr (vect_dump, op, TDF_SLIM);
1158 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1160 gcc_assert (is_simple_use);
1161 if (vect_print_dump_info (REPORT_DETAILS))
1165 fprintf (vect_dump, "def = ");
1166 print_generic_expr (vect_dump, def, TDF_SLIM);
1170 fprintf (vect_dump, " def_stmt = ");
1171 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1177 /* Case 1: operand is a constant. */
1178 case vect_constant_def:
1180 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1181 gcc_assert (vector_type);
1182 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1187 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1188 if (vect_print_dump_info (REPORT_DETAILS))
1189 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1191 vec_cst = build_vector_from_val (vector_type, op);
1192 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1195 /* Case 2: operand is defined outside the loop - loop invariant. */
1196 case vect_external_def:
1198 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1199 gcc_assert (vector_type);
1200 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1205 /* Create 'vec_inv = {inv,inv,..,inv}' */
1206 if (vect_print_dump_info (REPORT_DETAILS))
1207 fprintf (vect_dump, "Create vector_inv.");
1209 for (i = nunits - 1; i >= 0; --i)
1211 t = tree_cons (NULL_TREE, def, t);
1214 /* FIXME: use build_constructor directly. */
1215 vec_inv = build_constructor_from_list (vector_type, t);
1216 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1219 /* Case 3: operand is defined inside the loop. */
1220 case vect_internal_def:
1223 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1225 /* Get the def from the vectorized stmt. */
1226 def_stmt_info = vinfo_for_stmt (def_stmt);
1228 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1229 /* Get vectorized pattern statement. */
1231 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1232 && !STMT_VINFO_RELEVANT (def_stmt_info))
1233 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1234 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1235 gcc_assert (vec_stmt);
1236 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1237 vec_oprnd = PHI_RESULT (vec_stmt);
1238 else if (is_gimple_call (vec_stmt))
1239 vec_oprnd = gimple_call_lhs (vec_stmt);
1241 vec_oprnd = gimple_assign_lhs (vec_stmt);
1245 /* Case 4: operand is defined by a loop header phi - reduction */
1246 case vect_reduction_def:
1247 case vect_double_reduction_def:
1248 case vect_nested_cycle:
1252 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1253 loop = (gimple_bb (def_stmt))->loop_father;
1255 /* Get the def before the loop */
1256 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1257 return get_initial_def_for_reduction (stmt, op, scalar_def);
1260 /* Case 5: operand is defined by loop-header phi - induction. */
1261 case vect_induction_def:
1263 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1265 /* Get the def from the vectorized stmt. */
1266 def_stmt_info = vinfo_for_stmt (def_stmt);
1267 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1268 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1269 vec_oprnd = PHI_RESULT (vec_stmt);
1271 vec_oprnd = gimple_get_lhs (vec_stmt);
1281 /* Function vect_get_vec_def_for_stmt_copy
1283 Return a vector-def for an operand. This function is used when the
1284 vectorized stmt to be created (by the caller to this function) is a "copy"
1285 created in case the vectorized result cannot fit in one vector, and several
1286 copies of the vector-stmt are required. In this case the vector-def is
1287 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1288 of the stmt that defines VEC_OPRND.
1289 DT is the type of the vector def VEC_OPRND.
1292 In case the vectorization factor (VF) is bigger than the number
1293 of elements that can fit in a vectype (nunits), we have to generate
1294 more than one vector stmt to vectorize the scalar stmt. This situation
1295 arises when there are multiple data-types operated upon in the loop; the
1296 smallest data-type determines the VF, and as a result, when vectorizing
1297 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1298 vector stmt (each computing a vector of 'nunits' results, and together
1299 computing 'VF' results in each iteration). This function is called when
1300 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1301 which VF=16 and nunits=4, so the number of copies required is 4):
1303 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1305 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1306 VS1.1: vx.1 = memref1 VS1.2
1307 VS1.2: vx.2 = memref2 VS1.3
1308 VS1.3: vx.3 = memref3
1310 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1311 VSnew.1: vz1 = vx.1 + ... VSnew.2
1312 VSnew.2: vz2 = vx.2 + ... VSnew.3
1313 VSnew.3: vz3 = vx.3 + ...
1315 The vectorization of S1 is explained in vectorizable_load.
1316 The vectorization of S2:
1317 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1318 the function 'vect_get_vec_def_for_operand' is called to
1319 get the relevant vector-def for each operand of S2. For operand x it
1320 returns the vector-def 'vx.0'.
1322 To create the remaining copies of the vector-stmt (VSnew.j), this
1323 function is called to get the relevant vector-def for each operand. It is
1324 obtained from the respective VS1.j stmt, which is recorded in the
1325 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1327 For example, to obtain the vector-def 'vx.1' in order to create the
1328 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1329 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1330 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1331 and return its def ('vx.1').
1332 Overall, to create the above sequence this function will be called 3 times:
1333 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1334 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1335 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1338 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1340 gimple vec_stmt_for_operand;
1341 stmt_vec_info def_stmt_info;
1343 /* Do nothing; can reuse same def. */
1344 if (dt == vect_external_def || dt == vect_constant_def )
1347 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1348 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1349 gcc_assert (def_stmt_info);
1350 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1351 gcc_assert (vec_stmt_for_operand);
1352 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1353 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1354 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1356 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1361 /* Get vectorized definitions for the operands to create a copy of an original
1362 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1365 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1366 VEC(tree,heap) **vec_oprnds0,
1367 VEC(tree,heap) **vec_oprnds1)
1369 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1371 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1372 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1374 if (vec_oprnds1 && *vec_oprnds1)
1376 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1377 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1378 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1383 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1387 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1388 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1392 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1397 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1398 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1399 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1403 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1404 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1405 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1411 /* Function vect_finish_stmt_generation.
1413 Insert a new stmt. */
1416 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1417 gimple_stmt_iterator *gsi)
1419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1421 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1423 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1425 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1427 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1430 if (vect_print_dump_info (REPORT_DETAILS))
1432 fprintf (vect_dump, "add new stmt: ");
1433 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1436 gimple_set_location (vec_stmt, gimple_location (stmt));
1439 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1440 a function declaration if the target has a vectorized version
1441 of the function, or NULL_TREE if the function cannot be vectorized. */
1444 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1446 tree fndecl = gimple_call_fndecl (call);
1448 /* We only handle functions that do not read or clobber memory -- i.e.
1449 const or novops ones. */
1450 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1454 || TREE_CODE (fndecl) != FUNCTION_DECL
1455 || !DECL_BUILT_IN (fndecl))
1458 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1462 /* Function vectorizable_call.
1464 Check if STMT performs a function call that can be vectorized.
1465 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1466 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1470 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1475 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1477 tree vectype_out, vectype_in;
1480 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1481 tree fndecl, new_temp, def, rhs_type;
1483 enum vect_def_type dt[3]
1484 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1485 gimple new_stmt = NULL;
1487 VEC(tree, heap) *vargs = NULL;
1488 enum { NARROW, NONE, WIDEN } modifier;
1492 /* FORNOW: unsupported in basic block SLP. */
1493 gcc_assert (loop_vinfo);
1495 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1498 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1501 /* FORNOW: SLP not supported. */
1502 if (STMT_SLP_TYPE (stmt_info))
1505 /* Is STMT a vectorizable call? */
1506 if (!is_gimple_call (stmt))
1509 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1512 if (stmt_can_throw_internal (stmt))
1515 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1517 /* Process function arguments. */
1518 rhs_type = NULL_TREE;
1519 vectype_in = NULL_TREE;
1520 nargs = gimple_call_num_args (stmt);
1522 /* Bail out if the function has more than three arguments, we do not have
1523 interesting builtin functions to vectorize with more than two arguments
1524 except for fma. No arguments is also not good. */
1525 if (nargs == 0 || nargs > 3)
1528 for (i = 0; i < nargs; i++)
1532 op = gimple_call_arg (stmt, i);
1534 /* We can only handle calls with arguments of the same type. */
1536 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1538 if (vect_print_dump_info (REPORT_DETAILS))
1539 fprintf (vect_dump, "argument types differ.");
1543 rhs_type = TREE_TYPE (op);
1545 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1546 &def_stmt, &def, &dt[i], &opvectype))
1548 if (vect_print_dump_info (REPORT_DETAILS))
1549 fprintf (vect_dump, "use not simple.");
1554 vectype_in = opvectype;
1556 && opvectype != vectype_in)
1558 if (vect_print_dump_info (REPORT_DETAILS))
1559 fprintf (vect_dump, "argument vector types differ.");
1563 /* If all arguments are external or constant defs use a vector type with
1564 the same size as the output vector type. */
1566 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1568 gcc_assert (vectype_in);
1571 if (vect_print_dump_info (REPORT_DETAILS))
1573 fprintf (vect_dump, "no vectype for scalar type ");
1574 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1581 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1582 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1583 if (nunits_in == nunits_out / 2)
1585 else if (nunits_out == nunits_in)
1587 else if (nunits_out == nunits_in / 2)
1592 /* For now, we only vectorize functions if a target specific builtin
1593 is available. TODO -- in some cases, it might be profitable to
1594 insert the calls for pieces of the vector, in order to be able
1595 to vectorize other operations in the loop. */
1596 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1597 if (fndecl == NULL_TREE)
1599 if (vect_print_dump_info (REPORT_DETAILS))
1600 fprintf (vect_dump, "function is not vectorizable.");
1605 gcc_assert (!gimple_vuse (stmt));
1607 if (modifier == NARROW)
1608 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1610 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1612 /* Sanity check: make sure that at least one copy of the vectorized stmt
1613 needs to be generated. */
1614 gcc_assert (ncopies >= 1);
1616 if (!vec_stmt) /* transformation not required. */
1618 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1619 if (vect_print_dump_info (REPORT_DETAILS))
1620 fprintf (vect_dump, "=== vectorizable_call ===");
1621 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1627 if (vect_print_dump_info (REPORT_DETAILS))
1628 fprintf (vect_dump, "transform call.");
1631 scalar_dest = gimple_call_lhs (stmt);
1632 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1634 prev_stmt_info = NULL;
1638 for (j = 0; j < ncopies; ++j)
1640 /* Build argument list for the vectorized call. */
1642 vargs = VEC_alloc (tree, heap, nargs);
1644 VEC_truncate (tree, vargs, 0);
1646 for (i = 0; i < nargs; i++)
1648 op = gimple_call_arg (stmt, i);
1651 = vect_get_vec_def_for_operand (op, stmt, NULL);
1654 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1656 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1659 VEC_quick_push (tree, vargs, vec_oprnd0);
1662 new_stmt = gimple_build_call_vec (fndecl, vargs);
1663 new_temp = make_ssa_name (vec_dest, new_stmt);
1664 gimple_call_set_lhs (new_stmt, new_temp);
1666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1667 mark_symbols_for_renaming (new_stmt);
1670 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1672 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1674 prev_stmt_info = vinfo_for_stmt (new_stmt);
1680 for (j = 0; j < ncopies; ++j)
1682 /* Build argument list for the vectorized call. */
1684 vargs = VEC_alloc (tree, heap, nargs * 2);
1686 VEC_truncate (tree, vargs, 0);
1688 for (i = 0; i < nargs; i++)
1690 op = gimple_call_arg (stmt, i);
1694 = vect_get_vec_def_for_operand (op, stmt, NULL);
1696 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1700 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1702 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1704 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1707 VEC_quick_push (tree, vargs, vec_oprnd0);
1708 VEC_quick_push (tree, vargs, vec_oprnd1);
1711 new_stmt = gimple_build_call_vec (fndecl, vargs);
1712 new_temp = make_ssa_name (vec_dest, new_stmt);
1713 gimple_call_set_lhs (new_stmt, new_temp);
1715 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1716 mark_symbols_for_renaming (new_stmt);
1719 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1721 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1723 prev_stmt_info = vinfo_for_stmt (new_stmt);
1726 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1731 /* No current target implements this case. */
1735 VEC_free (tree, heap, vargs);
1737 /* Update the exception handling table with the vector stmt if necessary. */
1738 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1739 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1741 /* The call in STMT might prevent it from being removed in dce.
1742 We however cannot remove it here, due to the way the ssa name
1743 it defines is mapped to the new definition. So just replace
1744 rhs of the statement with something harmless. */
1746 type = TREE_TYPE (scalar_dest);
1747 if (is_pattern_stmt_p (stmt_info))
1748 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1750 lhs = gimple_call_lhs (stmt);
1751 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1752 set_vinfo_for_stmt (new_stmt, stmt_info);
1753 set_vinfo_for_stmt (stmt, NULL);
1754 STMT_VINFO_STMT (stmt_info) = new_stmt;
1755 gsi_replace (gsi, new_stmt, false);
1756 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1762 /* Function vect_gen_widened_results_half
1764 Create a vector stmt whose code, type, number of arguments, and result
1765 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1766 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1767 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1768 needs to be created (DECL is a function-decl of a target-builtin).
1769 STMT is the original scalar stmt that we are vectorizing. */
1772 vect_gen_widened_results_half (enum tree_code code,
1774 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1775 tree vec_dest, gimple_stmt_iterator *gsi,
1781 /* Generate half of the widened result: */
1782 if (code == CALL_EXPR)
1784 /* Target specific support */
1785 if (op_type == binary_op)
1786 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1788 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1789 new_temp = make_ssa_name (vec_dest, new_stmt);
1790 gimple_call_set_lhs (new_stmt, new_temp);
1794 /* Generic support */
1795 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1796 if (op_type != binary_op)
1798 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1800 new_temp = make_ssa_name (vec_dest, new_stmt);
1801 gimple_assign_set_lhs (new_stmt, new_temp);
1803 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1809 /* Check if STMT performs a conversion operation, that can be vectorized.
1810 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1811 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1812 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1815 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1816 gimple *vec_stmt, slp_tree slp_node)
1821 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1822 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1823 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1824 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1825 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1829 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1830 gimple new_stmt = NULL;
1831 stmt_vec_info prev_stmt_info;
1834 tree vectype_out, vectype_in;
1838 enum { NARROW, NONE, WIDEN } modifier;
1840 VEC(tree,heap) *vec_oprnds0 = NULL;
1842 VEC(tree,heap) *dummy = NULL;
1845 /* Is STMT a vectorizable conversion? */
1847 /* FORNOW: unsupported in basic block SLP. */
1848 gcc_assert (loop_vinfo);
1850 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1853 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1856 if (!is_gimple_assign (stmt))
1859 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1862 code = gimple_assign_rhs_code (stmt);
1863 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1866 /* Check types of lhs and rhs. */
1867 scalar_dest = gimple_assign_lhs (stmt);
1868 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1870 op0 = gimple_assign_rhs1 (stmt);
1871 rhs_type = TREE_TYPE (op0);
1872 /* Check the operands of the operation. */
1873 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1874 &def_stmt, &def, &dt[0], &vectype_in))
1876 if (vect_print_dump_info (REPORT_DETAILS))
1877 fprintf (vect_dump, "use not simple.");
1880 /* If op0 is an external or constant defs use a vector type of
1881 the same size as the output vector type. */
1883 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1885 gcc_assert (vectype_in);
1888 if (vect_print_dump_info (REPORT_DETAILS))
1890 fprintf (vect_dump, "no vectype for scalar type ");
1891 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1898 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1899 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1900 if (nunits_in == nunits_out / 2)
1902 else if (nunits_out == nunits_in)
1904 else if (nunits_out == nunits_in / 2)
1909 if (modifier == NARROW)
1910 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1912 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1914 /* Multiple types in SLP are handled by creating the appropriate number of
1915 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1917 if (slp_node || PURE_SLP_STMT (stmt_info))
1920 /* Sanity check: make sure that at least one copy of the vectorized stmt
1921 needs to be generated. */
1922 gcc_assert (ncopies >= 1);
1924 /* Supportable by target? */
1925 if ((modifier == NONE
1926 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1927 || (modifier == WIDEN
1928 && !supportable_widening_operation (code, stmt,
1929 vectype_out, vectype_in,
1932 &dummy_int, &dummy))
1933 || (modifier == NARROW
1934 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1935 &code1, &dummy_int, &dummy)))
1937 if (vect_print_dump_info (REPORT_DETAILS))
1938 fprintf (vect_dump, "conversion not supported by target.");
1942 if (modifier != NONE)
1944 /* FORNOW: SLP not supported. */
1945 if (STMT_SLP_TYPE (stmt_info))
1949 if (!vec_stmt) /* transformation not required. */
1951 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1956 if (vect_print_dump_info (REPORT_DETAILS))
1957 fprintf (vect_dump, "transform conversion.");
1960 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1962 if (modifier == NONE && !slp_node)
1963 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1965 prev_stmt_info = NULL;
1969 for (j = 0; j < ncopies; j++)
1972 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1974 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1977 targetm.vectorize.builtin_conversion (code,
1978 vectype_out, vectype_in);
1979 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1981 /* Arguments are ready. create the new vector stmt. */
1982 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1983 new_temp = make_ssa_name (vec_dest, new_stmt);
1984 gimple_call_set_lhs (new_stmt, new_temp);
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1987 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1991 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1993 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1994 prev_stmt_info = vinfo_for_stmt (new_stmt);
1999 /* In case the vectorization factor (VF) is bigger than the number
2000 of elements that we can fit in a vectype (nunits), we have to
2001 generate more than one vector stmt - i.e - we need to "unroll"
2002 the vector stmt by a factor VF/nunits. */
2003 for (j = 0; j < ncopies; j++)
2006 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2008 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2010 /* Generate first half of the widened result: */
2012 = vect_gen_widened_results_half (code1, decl1,
2013 vec_oprnd0, vec_oprnd1,
2014 unary_op, vec_dest, gsi, stmt);
2016 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2018 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2019 prev_stmt_info = vinfo_for_stmt (new_stmt);
2021 /* Generate second half of the widened result: */
2023 = vect_gen_widened_results_half (code2, decl2,
2024 vec_oprnd0, vec_oprnd1,
2025 unary_op, vec_dest, gsi, stmt);
2026 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2027 prev_stmt_info = vinfo_for_stmt (new_stmt);
2032 /* In case the vectorization factor (VF) is bigger than the number
2033 of elements that we can fit in a vectype (nunits), we have to
2034 generate more than one vector stmt - i.e - we need to "unroll"
2035 the vector stmt by a factor VF/nunits. */
2036 for (j = 0; j < ncopies; j++)
2041 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2042 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2046 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2047 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2050 /* Arguments are ready. Create the new vector stmt. */
2051 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2053 new_temp = make_ssa_name (vec_dest, new_stmt);
2054 gimple_assign_set_lhs (new_stmt, new_temp);
2055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2058 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2060 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2062 prev_stmt_info = vinfo_for_stmt (new_stmt);
2065 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2069 VEC_free (tree, heap, vec_oprnds0);
2075 /* Function vectorizable_assignment.
2077 Check if STMT performs an assignment (copy) that can be vectorized.
2078 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2079 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2080 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2083 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2084 gimple *vec_stmt, slp_tree slp_node)
2089 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2091 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2095 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2096 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2099 VEC(tree,heap) *vec_oprnds = NULL;
2101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2102 gimple new_stmt = NULL;
2103 stmt_vec_info prev_stmt_info = NULL;
2104 enum tree_code code;
2107 /* Multiple types in SLP are handled by creating the appropriate number of
2108 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2110 if (slp_node || PURE_SLP_STMT (stmt_info))
2113 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2115 gcc_assert (ncopies >= 1);
2117 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2120 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2123 /* Is vectorizable assignment? */
2124 if (!is_gimple_assign (stmt))
2127 scalar_dest = gimple_assign_lhs (stmt);
2128 if (TREE_CODE (scalar_dest) != SSA_NAME)
2131 code = gimple_assign_rhs_code (stmt);
2132 if (gimple_assign_single_p (stmt)
2133 || code == PAREN_EXPR
2134 || CONVERT_EXPR_CODE_P (code))
2135 op = gimple_assign_rhs1 (stmt);
2139 if (code == VIEW_CONVERT_EXPR)
2140 op = TREE_OPERAND (op, 0);
2142 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2143 &def_stmt, &def, &dt[0], &vectype_in))
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "use not simple.");
2150 /* We can handle NOP_EXPR conversions that do not change the number
2151 of elements or the vector size. */
2152 if ((CONVERT_EXPR_CODE_P (code)
2153 || code == VIEW_CONVERT_EXPR)
2155 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2156 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2157 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2160 if (!vec_stmt) /* transformation not required. */
2162 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump, "=== vectorizable_assignment ===");
2165 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2170 if (vect_print_dump_info (REPORT_DETAILS))
2171 fprintf (vect_dump, "transform assignment.");
2174 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2177 for (j = 0; j < ncopies; j++)
2181 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2183 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2185 /* Arguments are ready. create the new vector stmt. */
2186 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2188 if (CONVERT_EXPR_CODE_P (code)
2189 || code == VIEW_CONVERT_EXPR)
2190 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2191 new_stmt = gimple_build_assign (vec_dest, vop);
2192 new_temp = make_ssa_name (vec_dest, new_stmt);
2193 gimple_assign_set_lhs (new_stmt, new_temp);
2194 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2196 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2203 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2205 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2207 prev_stmt_info = vinfo_for_stmt (new_stmt);
2210 VEC_free (tree, heap, vec_oprnds);
2215 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2216 either as shift by a scalar or by a vector. */
2219 vect_supportable_shift (enum tree_code code, tree scalar_type)
2222 enum machine_mode vec_mode;
2227 vectype = get_vectype_for_scalar_type (scalar_type);
2231 optab = optab_for_tree_code (code, vectype, optab_scalar);
2233 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2235 optab = optab_for_tree_code (code, vectype, optab_vector);
2237 || (optab_handler (optab, TYPE_MODE (vectype))
2238 == CODE_FOR_nothing))
2242 vec_mode = TYPE_MODE (vectype);
2243 icode = (int) optab_handler (optab, vec_mode);
2244 if (icode == CODE_FOR_nothing)
2251 /* Function vectorizable_shift.
2253 Check if STMT performs a shift operation that can be vectorized.
2254 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2255 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2256 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2259 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2260 gimple *vec_stmt, slp_tree slp_node)
2264 tree op0, op1 = NULL;
2265 tree vec_oprnd1 = NULL_TREE;
2266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2268 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2269 enum tree_code code;
2270 enum machine_mode vec_mode;
2274 enum machine_mode optab_op2_mode;
2277 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2278 gimple new_stmt = NULL;
2279 stmt_vec_info prev_stmt_info;
2285 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2288 bool scalar_shift_arg = true;
2289 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2292 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2295 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2298 /* Is STMT a vectorizable binary/unary operation? */
2299 if (!is_gimple_assign (stmt))
2302 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2305 code = gimple_assign_rhs_code (stmt);
2307 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2308 || code == RROTATE_EXPR))
2311 scalar_dest = gimple_assign_lhs (stmt);
2312 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2314 op0 = gimple_assign_rhs1 (stmt);
2315 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2316 &def_stmt, &def, &dt[0], &vectype))
2318 if (vect_print_dump_info (REPORT_DETAILS))
2319 fprintf (vect_dump, "use not simple.");
2322 /* If op0 is an external or constant def use a vector type with
2323 the same size as the output vector type. */
2325 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2327 gcc_assert (vectype);
2330 if (vect_print_dump_info (REPORT_DETAILS))
2332 fprintf (vect_dump, "no vectype for scalar type ");
2333 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2339 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2340 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2341 if (nunits_out != nunits_in)
2344 op1 = gimple_assign_rhs2 (stmt);
2345 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2347 if (vect_print_dump_info (REPORT_DETAILS))
2348 fprintf (vect_dump, "use not simple.");
2353 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2357 /* Multiple types in SLP are handled by creating the appropriate number of
2358 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2360 if (slp_node || PURE_SLP_STMT (stmt_info))
2363 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2365 gcc_assert (ncopies >= 1);
2367 /* Determine whether the shift amount is a vector, or scalar. If the
2368 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2370 if (dt[1] == vect_internal_def && !slp_node)
2371 scalar_shift_arg = false;
2372 else if (dt[1] == vect_constant_def
2373 || dt[1] == vect_external_def
2374 || dt[1] == vect_internal_def)
2376 /* In SLP, need to check whether the shift count is the same,
2377 in loops if it is a constant or invariant, it is always
2381 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2384 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2385 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2386 scalar_shift_arg = false;
2391 if (vect_print_dump_info (REPORT_DETAILS))
2392 fprintf (vect_dump, "operand mode requires invariant argument.");
2396 /* Vector shifted by vector. */
2397 if (!scalar_shift_arg)
2399 optab = optab_for_tree_code (code, vectype, optab_vector);
2400 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "vector/vector shift/rotate found.");
2403 /* See if the machine has a vector shifted by scalar insn and if not
2404 then see if it has a vector shifted by vector insn. */
2407 optab = optab_for_tree_code (code, vectype, optab_scalar);
2409 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2411 if (vect_print_dump_info (REPORT_DETAILS))
2412 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2416 optab = optab_for_tree_code (code, vectype, optab_vector);
2418 && (optab_handler (optab, TYPE_MODE (vectype))
2419 != CODE_FOR_nothing))
2421 scalar_shift_arg = false;
2423 if (vect_print_dump_info (REPORT_DETAILS))
2424 fprintf (vect_dump, "vector/vector shift/rotate found.");
2426 /* Unlike the other binary operators, shifts/rotates have
2427 the rhs being int, instead of the same type as the lhs,
2428 so make sure the scalar is the right type if we are
2429 dealing with vectors of short/char. */
2430 if (dt[1] == vect_constant_def)
2431 op1 = fold_convert (TREE_TYPE (vectype), op1);
2436 /* Supportable by target? */
2439 if (vect_print_dump_info (REPORT_DETAILS))
2440 fprintf (vect_dump, "no optab.");
2443 vec_mode = TYPE_MODE (vectype);
2444 icode = (int) optab_handler (optab, vec_mode);
2445 if (icode == CODE_FOR_nothing)
2447 if (vect_print_dump_info (REPORT_DETAILS))
2448 fprintf (vect_dump, "op not supported by target.");
2449 /* Check only during analysis. */
2450 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2451 || (vf < vect_min_worthwhile_factor (code)
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "proceeding using word mode.");
2458 /* Worthwhile without SIMD support? Check only during analysis. */
2459 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2460 && vf < vect_min_worthwhile_factor (code)
2463 if (vect_print_dump_info (REPORT_DETAILS))
2464 fprintf (vect_dump, "not worthwhile without SIMD support.");
2468 if (!vec_stmt) /* transformation not required. */
2470 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2471 if (vect_print_dump_info (REPORT_DETAILS))
2472 fprintf (vect_dump, "=== vectorizable_shift ===");
2473 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2479 if (vect_print_dump_info (REPORT_DETAILS))
2480 fprintf (vect_dump, "transform binary/unary operation.");
2483 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2485 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2486 created in the previous stages of the recursion, so no allocation is
2487 needed, except for the case of shift with scalar shift argument. In that
2488 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2489 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2490 In case of loop-based vectorization we allocate VECs of size 1. We
2491 allocate VEC_OPRNDS1 only in case of binary operation. */
2494 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2495 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2497 else if (scalar_shift_arg)
2498 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2500 prev_stmt_info = NULL;
2501 for (j = 0; j < ncopies; j++)
2506 if (scalar_shift_arg)
2508 /* Vector shl and shr insn patterns can be defined with scalar
2509 operand 2 (shift operand). In this case, use constant or loop
2510 invariant op1 directly, without extending it to vector mode
2512 optab_op2_mode = insn_data[icode].operand[2].mode;
2513 if (!VECTOR_MODE_P (optab_op2_mode))
2515 if (vect_print_dump_info (REPORT_DETAILS))
2516 fprintf (vect_dump, "operand 1 using scalar mode.");
2518 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2521 /* Store vec_oprnd1 for every vector stmt to be created
2522 for SLP_NODE. We check during the analysis that all
2523 the shift arguments are the same.
2524 TODO: Allow different constants for different vector
2525 stmts generated for an SLP instance. */
2526 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2527 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2532 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2533 (a special case for certain kind of vector shifts); otherwise,
2534 operand 1 should be of a vector type (the usual case). */
2536 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2539 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2543 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2545 /* Arguments are ready. Create the new vector stmt. */
2546 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2548 vop1 = VEC_index (tree, vec_oprnds1, i);
2549 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2550 new_temp = make_ssa_name (vec_dest, new_stmt);
2551 gimple_assign_set_lhs (new_stmt, new_temp);
2552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2554 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2561 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2563 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2564 prev_stmt_info = vinfo_for_stmt (new_stmt);
2567 VEC_free (tree, heap, vec_oprnds0);
2568 VEC_free (tree, heap, vec_oprnds1);
2574 /* Function vectorizable_operation.
2576 Check if STMT performs a binary, unary or ternary operation that can
2578 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2579 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2580 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2583 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2584 gimple *vec_stmt, slp_tree slp_node)
2588 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2589 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2591 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2592 enum tree_code code;
2593 enum machine_mode vec_mode;
2600 enum vect_def_type dt[3]
2601 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2602 gimple new_stmt = NULL;
2603 stmt_vec_info prev_stmt_info;
2609 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2610 tree vop0, vop1, vop2;
2611 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2614 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2617 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2620 /* Is STMT a vectorizable binary/unary operation? */
2621 if (!is_gimple_assign (stmt))
2624 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2627 code = gimple_assign_rhs_code (stmt);
2629 /* For pointer addition, we should use the normal plus for
2630 the vector addition. */
2631 if (code == POINTER_PLUS_EXPR)
2634 /* Support only unary or binary operations. */
2635 op_type = TREE_CODE_LENGTH (code);
2636 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2638 if (vect_print_dump_info (REPORT_DETAILS))
2639 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2644 scalar_dest = gimple_assign_lhs (stmt);
2645 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2647 op0 = gimple_assign_rhs1 (stmt);
2648 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2649 &def_stmt, &def, &dt[0], &vectype))
2651 if (vect_print_dump_info (REPORT_DETAILS))
2652 fprintf (vect_dump, "use not simple.");
2655 /* If op0 is an external or constant def use a vector type with
2656 the same size as the output vector type. */
2658 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2660 gcc_assert (vectype);
2663 if (vect_print_dump_info (REPORT_DETAILS))
2665 fprintf (vect_dump, "no vectype for scalar type ");
2666 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2672 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2673 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2674 if (nunits_out != nunits_in)
2677 if (op_type == binary_op || op_type == ternary_op)
2679 op1 = gimple_assign_rhs2 (stmt);
2680 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2683 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "use not simple.");
2688 if (op_type == ternary_op)
2690 op2 = gimple_assign_rhs3 (stmt);
2691 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2694 if (vect_print_dump_info (REPORT_DETAILS))
2695 fprintf (vect_dump, "use not simple.");
2701 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2705 /* Multiple types in SLP are handled by creating the appropriate number of
2706 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2708 if (slp_node || PURE_SLP_STMT (stmt_info))
2711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2713 gcc_assert (ncopies >= 1);
2715 /* Shifts are handled in vectorizable_shift (). */
2716 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2717 || code == RROTATE_EXPR)
2720 optab = optab_for_tree_code (code, vectype, optab_default);
2722 /* Supportable by target? */
2725 if (vect_print_dump_info (REPORT_DETAILS))
2726 fprintf (vect_dump, "no optab.");
2729 vec_mode = TYPE_MODE (vectype);
2730 icode = (int) optab_handler (optab, vec_mode);
2731 if (icode == CODE_FOR_nothing)
2733 if (vect_print_dump_info (REPORT_DETAILS))
2734 fprintf (vect_dump, "op not supported by target.");
2735 /* Check only during analysis. */
2736 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2737 || (vf < vect_min_worthwhile_factor (code)
2740 if (vect_print_dump_info (REPORT_DETAILS))
2741 fprintf (vect_dump, "proceeding using word mode.");
2744 /* Worthwhile without SIMD support? Check only during analysis. */
2745 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2746 && vf < vect_min_worthwhile_factor (code)
2749 if (vect_print_dump_info (REPORT_DETAILS))
2750 fprintf (vect_dump, "not worthwhile without SIMD support.");
2754 if (!vec_stmt) /* transformation not required. */
2756 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2757 if (vect_print_dump_info (REPORT_DETAILS))
2758 fprintf (vect_dump, "=== vectorizable_operation ===");
2759 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2765 if (vect_print_dump_info (REPORT_DETAILS))
2766 fprintf (vect_dump, "transform binary/unary operation.");
2769 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2771 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2772 created in the previous stages of the recursion, so no allocation is
2773 needed, except for the case of shift with scalar shift argument. In that
2774 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2775 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2776 In case of loop-based vectorization we allocate VECs of size 1. We
2777 allocate VEC_OPRNDS1 only in case of binary operation. */
2780 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2781 if (op_type == binary_op || op_type == ternary_op)
2782 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2783 if (op_type == ternary_op)
2784 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2787 /* In case the vectorization factor (VF) is bigger than the number
2788 of elements that we can fit in a vectype (nunits), we have to generate
2789 more than one vector stmt - i.e - we need to "unroll" the
2790 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2791 from one copy of the vector stmt to the next, in the field
2792 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2793 stages to find the correct vector defs to be used when vectorizing
2794 stmts that use the defs of the current stmt. The example below
2795 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2796 we need to create 4 vectorized stmts):
2798 before vectorization:
2799 RELATED_STMT VEC_STMT
2803 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2805 RELATED_STMT VEC_STMT
2806 VS1_0: vx0 = memref0 VS1_1 -
2807 VS1_1: vx1 = memref1 VS1_2 -
2808 VS1_2: vx2 = memref2 VS1_3 -
2809 VS1_3: vx3 = memref3 - -
2810 S1: x = load - VS1_0
2813 step2: vectorize stmt S2 (done here):
2814 To vectorize stmt S2 we first need to find the relevant vector
2815 def for the first operand 'x'. This is, as usual, obtained from
2816 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2817 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2818 relevant vector def 'vx0'. Having found 'vx0' we can generate
2819 the vector stmt VS2_0, and as usual, record it in the
2820 STMT_VINFO_VEC_STMT of stmt S2.
2821 When creating the second copy (VS2_1), we obtain the relevant vector
2822 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2823 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2824 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2825 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2826 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2827 chain of stmts and pointers:
2828 RELATED_STMT VEC_STMT
2829 VS1_0: vx0 = memref0 VS1_1 -
2830 VS1_1: vx1 = memref1 VS1_2 -
2831 VS1_2: vx2 = memref2 VS1_3 -
2832 VS1_3: vx3 = memref3 - -
2833 S1: x = load - VS1_0
2834 VS2_0: vz0 = vx0 + v1 VS2_1 -
2835 VS2_1: vz1 = vx1 + v1 VS2_2 -
2836 VS2_2: vz2 = vx2 + v1 VS2_3 -
2837 VS2_3: vz3 = vx3 + v1 - -
2838 S2: z = x + 1 - VS2_0 */
2840 prev_stmt_info = NULL;
2841 for (j = 0; j < ncopies; j++)
2846 if (op_type == binary_op || op_type == ternary_op)
2847 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2850 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2852 if (op_type == ternary_op)
2854 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2855 VEC_quick_push (tree, vec_oprnds2,
2856 vect_get_vec_def_for_operand (op2, stmt, NULL));
2861 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2862 if (op_type == ternary_op)
2864 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2865 VEC_quick_push (tree, vec_oprnds2,
2866 vect_get_vec_def_for_stmt_copy (dt[2],
2871 /* Arguments are ready. Create the new vector stmt. */
2872 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2874 vop1 = ((op_type == binary_op || op_type == ternary_op)
2875 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2876 vop2 = ((op_type == ternary_op)
2877 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2878 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2880 new_temp = make_ssa_name (vec_dest, new_stmt);
2881 gimple_assign_set_lhs (new_stmt, new_temp);
2882 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2884 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2891 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2893 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2894 prev_stmt_info = vinfo_for_stmt (new_stmt);
2897 VEC_free (tree, heap, vec_oprnds0);
2899 VEC_free (tree, heap, vec_oprnds1);
2901 VEC_free (tree, heap, vec_oprnds2);
2907 /* Get vectorized definitions for loop-based vectorization. For the first
2908 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2909 scalar operand), and for the rest we get a copy with
2910 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2911 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2912 The vectors are collected into VEC_OPRNDS. */
2915 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2916 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2920 /* Get first vector operand. */
2921 /* All the vector operands except the very first one (that is scalar oprnd)
2923 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2924 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2926 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2928 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2930 /* Get second vector operand. */
2931 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2932 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2936 /* For conversion in multiple steps, continue to get operands
2939 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2943 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2944 For multi-step conversions store the resulting vectors and call the function
2948 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2949 int multi_step_cvt, gimple stmt,
2950 VEC (tree, heap) *vec_dsts,
2951 gimple_stmt_iterator *gsi,
2952 slp_tree slp_node, enum tree_code code,
2953 stmt_vec_info *prev_stmt_info)
2956 tree vop0, vop1, new_tmp, vec_dest;
2958 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2960 vec_dest = VEC_pop (tree, vec_dsts);
2962 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2964 /* Create demotion operation. */
2965 vop0 = VEC_index (tree, *vec_oprnds, i);
2966 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2967 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2968 new_tmp = make_ssa_name (vec_dest, new_stmt);
2969 gimple_assign_set_lhs (new_stmt, new_tmp);
2970 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2973 /* Store the resulting vector for next recursive call. */
2974 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2977 /* This is the last step of the conversion sequence. Store the
2978 vectors in SLP_NODE or in vector info of the scalar statement
2979 (or in STMT_VINFO_RELATED_STMT chain). */
2981 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2984 if (!*prev_stmt_info)
2985 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2987 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2989 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2994 /* For multi-step demotion operations we first generate demotion operations
2995 from the source type to the intermediate types, and then combine the
2996 results (stored in VEC_OPRNDS) in demotion operation to the destination
3000 /* At each level of recursion we have have of the operands we had at the
3002 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3003 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3004 stmt, vec_dsts, gsi, slp_node,
3005 code, prev_stmt_info);
3010 /* Function vectorizable_type_demotion
3012 Check if STMT performs a binary or unary operation that involves
3013 type demotion, and if it can be vectorized.
3014 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3015 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3016 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3019 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3020 gimple *vec_stmt, slp_tree slp_node)
3025 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3026 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3027 enum tree_code code, code1 = ERROR_MARK;
3030 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3031 stmt_vec_info prev_stmt_info;
3038 int multi_step_cvt = 0;
3039 VEC (tree, heap) *vec_oprnds0 = NULL;
3040 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3041 tree last_oprnd, intermediate_type;
3042 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3044 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3047 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3050 /* Is STMT a vectorizable type-demotion operation? */
3051 if (!is_gimple_assign (stmt))
3054 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3057 code = gimple_assign_rhs_code (stmt);
3058 if (!CONVERT_EXPR_CODE_P (code))
3061 scalar_dest = gimple_assign_lhs (stmt);
3062 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3064 /* Check the operands of the operation. */
3065 op0 = gimple_assign_rhs1 (stmt);
3066 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3067 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3068 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3069 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3070 && CONVERT_EXPR_CODE_P (code))))
3072 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3073 &def_stmt, &def, &dt[0], &vectype_in))
3075 if (vect_print_dump_info (REPORT_DETAILS))
3076 fprintf (vect_dump, "use not simple.");
3079 /* If op0 is an external def use a vector type with the
3080 same size as the output vector type if possible. */
3082 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3084 gcc_assert (vectype_in);
3087 if (vect_print_dump_info (REPORT_DETAILS))
3089 fprintf (vect_dump, "no vectype for scalar type ");
3090 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3096 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3097 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3098 if (nunits_in >= nunits_out)
3101 /* Multiple types in SLP are handled by creating the appropriate number of
3102 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3104 if (slp_node || PURE_SLP_STMT (stmt_info))
3107 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3108 gcc_assert (ncopies >= 1);
3110 /* Supportable by target? */
3111 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3112 &code1, &multi_step_cvt, &interm_types))
3115 if (!vec_stmt) /* transformation not required. */
3117 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3118 if (vect_print_dump_info (REPORT_DETAILS))
3119 fprintf (vect_dump, "=== vectorizable_demotion ===");
3120 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3125 if (vect_print_dump_info (REPORT_DETAILS))
3126 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3129 /* In case of multi-step demotion, we first generate demotion operations to
3130 the intermediate types, and then from that types to the final one.
3131 We create vector destinations for the intermediate type (TYPES) received
3132 from supportable_narrowing_operation, and store them in the correct order
3133 for future use in vect_create_vectorized_demotion_stmts(). */
3135 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3137 vec_dsts = VEC_alloc (tree, heap, 1);
3139 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3140 VEC_quick_push (tree, vec_dsts, vec_dest);
3144 for (i = VEC_length (tree, interm_types) - 1;
3145 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3147 vec_dest = vect_create_destination_var (scalar_dest,
3149 VEC_quick_push (tree, vec_dsts, vec_dest);
3153 /* In case the vectorization factor (VF) is bigger than the number
3154 of elements that we can fit in a vectype (nunits), we have to generate
3155 more than one vector stmt - i.e - we need to "unroll" the
3156 vector stmt by a factor VF/nunits. */
3158 prev_stmt_info = NULL;
3159 for (j = 0; j < ncopies; j++)
3163 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3166 VEC_free (tree, heap, vec_oprnds0);
3167 vec_oprnds0 = VEC_alloc (tree, heap,
3168 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3169 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3170 vect_pow2 (multi_step_cvt) - 1);
3173 /* Arguments are ready. Create the new vector stmts. */
3174 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3175 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3176 multi_step_cvt, stmt, tmp_vec_dsts,
3177 gsi, slp_node, code1,
3181 VEC_free (tree, heap, vec_oprnds0);
3182 VEC_free (tree, heap, vec_dsts);
3183 VEC_free (tree, heap, tmp_vec_dsts);
3184 VEC_free (tree, heap, interm_types);
3186 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3191 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3192 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3193 the resulting vectors and call the function recursively. */
3196 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3197 VEC (tree, heap) **vec_oprnds1,
3198 int multi_step_cvt, gimple stmt,
3199 VEC (tree, heap) *vec_dsts,
3200 gimple_stmt_iterator *gsi,
3201 slp_tree slp_node, enum tree_code code1,
3202 enum tree_code code2, tree decl1,
3203 tree decl2, int op_type,
3204 stmt_vec_info *prev_stmt_info)
3207 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3208 gimple new_stmt1, new_stmt2;
3209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3210 VEC (tree, heap) *vec_tmp;
3212 vec_dest = VEC_pop (tree, vec_dsts);
3213 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3215 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3217 if (op_type == binary_op)
3218 vop1 = VEC_index (tree, *vec_oprnds1, i);
3222 /* Generate the two halves of promotion operation. */
3223 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3224 op_type, vec_dest, gsi, stmt);
3225 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3226 op_type, vec_dest, gsi, stmt);
3227 if (is_gimple_call (new_stmt1))
3229 new_tmp1 = gimple_call_lhs (new_stmt1);
3230 new_tmp2 = gimple_call_lhs (new_stmt2);
3234 new_tmp1 = gimple_assign_lhs (new_stmt1);
3235 new_tmp2 = gimple_assign_lhs (new_stmt2);
3240 /* Store the results for the recursive call. */
3241 VEC_quick_push (tree, vec_tmp, new_tmp1);
3242 VEC_quick_push (tree, vec_tmp, new_tmp2);
3246 /* Last step of promotion sequience - store the results. */
3249 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3250 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3254 if (!*prev_stmt_info)
3255 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3257 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3259 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3260 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3261 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3268 /* For multi-step promotion operation we first generate we call the
3269 function recurcively for every stage. We start from the input type,
3270 create promotion operations to the intermediate types, and then
3271 create promotions to the output type. */
3272 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3273 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3274 multi_step_cvt - 1, stmt,
3275 vec_dsts, gsi, slp_node, code1,
3276 code2, decl2, decl2, op_type,
3280 VEC_free (tree, heap, vec_tmp);
3284 /* Function vectorizable_type_promotion
3286 Check if STMT performs a binary or unary operation that involves
3287 type promotion, and if it can be vectorized.
3288 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3289 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3290 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3293 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3294 gimple *vec_stmt, slp_tree slp_node)
3298 tree op0, op1 = NULL;
3299 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3300 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3302 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3303 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3307 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3308 stmt_vec_info prev_stmt_info;
3315 tree intermediate_type = NULL_TREE;
3316 int multi_step_cvt = 0;
3317 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3318 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3319 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3321 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3327 /* Is STMT a vectorizable type-promotion operation? */
3328 if (!is_gimple_assign (stmt))
3331 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3334 code = gimple_assign_rhs_code (stmt);
3335 if (!CONVERT_EXPR_CODE_P (code)
3336 && code != WIDEN_MULT_EXPR)
3339 scalar_dest = gimple_assign_lhs (stmt);
3340 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3342 /* Check the operands of the operation. */
3343 op0 = gimple_assign_rhs1 (stmt);
3344 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3345 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3346 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3347 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3348 && CONVERT_EXPR_CODE_P (code))))
3350 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3351 &def_stmt, &def, &dt[0], &vectype_in))
3353 if (vect_print_dump_info (REPORT_DETAILS))
3354 fprintf (vect_dump, "use not simple.");
3358 op_type = TREE_CODE_LENGTH (code);
3359 if (op_type == binary_op)
3363 op1 = gimple_assign_rhs2 (stmt);
3364 if (code == WIDEN_MULT_EXPR)
3366 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3368 if (CONSTANT_CLASS_P (op0))
3369 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3370 &def_stmt, &def, &dt[1], &vectype_in);
3372 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3377 if (vect_print_dump_info (REPORT_DETAILS))
3378 fprintf (vect_dump, "use not simple.");
3384 /* If op0 is an external or constant def use a vector type with
3385 the same size as the output vector type. */
3387 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3389 gcc_assert (vectype_in);
3392 if (vect_print_dump_info (REPORT_DETAILS))
3394 fprintf (vect_dump, "no vectype for scalar type ");
3395 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3401 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3402 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3403 if (nunits_in <= nunits_out)
3406 /* Multiple types in SLP are handled by creating the appropriate number of
3407 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3409 if (slp_node || PURE_SLP_STMT (stmt_info))
3412 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3414 gcc_assert (ncopies >= 1);
3416 /* Supportable by target? */
3417 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3418 &decl1, &decl2, &code1, &code2,
3419 &multi_step_cvt, &interm_types))
3422 /* Binary widening operation can only be supported directly by the
3424 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3426 if (!vec_stmt) /* transformation not required. */
3428 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3429 if (vect_print_dump_info (REPORT_DETAILS))
3430 fprintf (vect_dump, "=== vectorizable_promotion ===");
3431 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3437 if (vect_print_dump_info (REPORT_DETAILS))
3438 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3441 if (code == WIDEN_MULT_EXPR)
3443 if (CONSTANT_CLASS_P (op0))
3444 op0 = fold_convert (TREE_TYPE (op1), op0);
3445 else if (CONSTANT_CLASS_P (op1))
3446 op1 = fold_convert (TREE_TYPE (op0), op1);
3450 /* In case of multi-step promotion, we first generate promotion operations
3451 to the intermediate types, and then from that types to the final one.
3452 We store vector destination in VEC_DSTS in the correct order for
3453 recursive creation of promotion operations in
3454 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3455 according to TYPES recieved from supportable_widening_operation(). */
3457 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3459 vec_dsts = VEC_alloc (tree, heap, 1);
3461 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3462 VEC_quick_push (tree, vec_dsts, vec_dest);
3466 for (i = VEC_length (tree, interm_types) - 1;
3467 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3469 vec_dest = vect_create_destination_var (scalar_dest,
3471 VEC_quick_push (tree, vec_dsts, vec_dest);
3477 vec_oprnds0 = VEC_alloc (tree, heap,
3478 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3479 if (op_type == binary_op)
3480 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3483 /* In case the vectorization factor (VF) is bigger than the number
3484 of elements that we can fit in a vectype (nunits), we have to generate
3485 more than one vector stmt - i.e - we need to "unroll" the
3486 vector stmt by a factor VF/nunits. */
3488 prev_stmt_info = NULL;
3489 for (j = 0; j < ncopies; j++)
3495 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3499 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3500 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3501 if (op_type == binary_op)
3503 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3504 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3510 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3511 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3512 if (op_type == binary_op)
3514 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3515 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3519 /* Arguments are ready. Create the new vector stmts. */
3520 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3521 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3522 multi_step_cvt, stmt,
3524 gsi, slp_node, code1, code2,
3525 decl1, decl2, op_type,
3529 VEC_free (tree, heap, vec_dsts);
3530 VEC_free (tree, heap, tmp_vec_dsts);
3531 VEC_free (tree, heap, interm_types);
3532 VEC_free (tree, heap, vec_oprnds0);
3533 VEC_free (tree, heap, vec_oprnds1);
3535 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3540 /* Function vectorizable_store.
3542 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3544 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3545 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3546 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3549 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3555 tree vec_oprnd = NULL_TREE;
3556 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3557 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3558 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3560 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3561 struct loop *loop = NULL;
3562 enum machine_mode vec_mode;
3564 enum dr_alignment_support alignment_support_scheme;
3567 enum vect_def_type dt;
3568 stmt_vec_info prev_stmt_info = NULL;
3569 tree dataref_ptr = NULL_TREE;
3570 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3573 gimple next_stmt, first_stmt = NULL;
3574 bool strided_store = false;
3575 bool store_lanes_p = false;
3576 unsigned int group_size, i;
3577 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3579 VEC(tree,heap) *vec_oprnds = NULL;
3580 bool slp = (slp_node != NULL);
3581 unsigned int vec_num;
3582 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3586 loop = LOOP_VINFO_LOOP (loop_vinfo);
3588 /* Multiple types in SLP are handled by creating the appropriate number of
3589 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3591 if (slp || PURE_SLP_STMT (stmt_info))
3594 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3596 gcc_assert (ncopies >= 1);
3598 /* FORNOW. This restriction should be relaxed. */
3599 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3601 if (vect_print_dump_info (REPORT_DETAILS))
3602 fprintf (vect_dump, "multiple types in nested loop.");
3606 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3609 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3612 /* Is vectorizable store? */
3614 if (!is_gimple_assign (stmt))
3617 scalar_dest = gimple_assign_lhs (stmt);
3618 if (TREE_CODE (scalar_dest) != ARRAY_REF
3619 && TREE_CODE (scalar_dest) != INDIRECT_REF
3620 && TREE_CODE (scalar_dest) != COMPONENT_REF
3621 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3622 && TREE_CODE (scalar_dest) != REALPART_EXPR
3623 && TREE_CODE (scalar_dest) != MEM_REF)
3626 gcc_assert (gimple_assign_single_p (stmt));
3627 op = gimple_assign_rhs1 (stmt);
3628 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3630 if (vect_print_dump_info (REPORT_DETAILS))
3631 fprintf (vect_dump, "use not simple.");
3635 /* The scalar rhs type needs to be trivially convertible to the vector
3636 component type. This should always be the case. */
3637 elem_type = TREE_TYPE (vectype);
3638 if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
3640 if (vect_print_dump_info (REPORT_DETAILS))
3641 fprintf (vect_dump, "??? operands of different types");
3645 vec_mode = TYPE_MODE (vectype);
3646 /* FORNOW. In some cases can vectorize even if data-type not supported
3647 (e.g. - array initialization with 0). */
3648 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3651 if (!STMT_VINFO_DATA_REF (stmt_info))
3654 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3656 if (vect_print_dump_info (REPORT_DETAILS))
3657 fprintf (vect_dump, "negative step for store.");
3661 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3663 strided_store = true;
3664 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3665 if (!slp && !PURE_SLP_STMT (stmt_info))
3667 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3668 if (vect_store_lanes_supported (vectype, group_size))
3669 store_lanes_p = true;
3670 else if (!vect_strided_store_supported (vectype, group_size))
3674 if (first_stmt == stmt)
3676 /* STMT is the leader of the group. Check the operands of all the
3677 stmts of the group. */
3678 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3681 gcc_assert (gimple_assign_single_p (next_stmt));
3682 op = gimple_assign_rhs1 (next_stmt);
3683 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3686 if (vect_print_dump_info (REPORT_DETAILS))
3687 fprintf (vect_dump, "use not simple.");
3690 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3695 if (!vec_stmt) /* transformation not required. */
3697 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3698 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3706 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3707 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3709 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3712 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3714 /* We vectorize all the stmts of the interleaving group when we
3715 reach the last stmt in the group. */
3716 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3717 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3726 strided_store = false;
3727 /* VEC_NUM is the number of vect stmts to be created for this
3729 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3730 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3731 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3734 /* VEC_NUM is the number of vect stmts to be created for this
3736 vec_num = group_size;
3742 group_size = vec_num = 1;
3745 if (vect_print_dump_info (REPORT_DETAILS))
3746 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3748 dr_chain = VEC_alloc (tree, heap, group_size);
3749 oprnds = VEC_alloc (tree, heap, group_size);
3751 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3752 gcc_assert (alignment_support_scheme);
3753 /* Targets with store-lane instructions must not require explicit
3755 gcc_assert (!store_lanes_p
3756 || alignment_support_scheme == dr_aligned
3757 || alignment_support_scheme == dr_unaligned_supported);
3760 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3762 aggr_type = vectype;
3764 /* In case the vectorization factor (VF) is bigger than the number
3765 of elements that we can fit in a vectype (nunits), we have to generate
3766 more than one vector stmt - i.e - we need to "unroll" the
3767 vector stmt by a factor VF/nunits. For more details see documentation in
3768 vect_get_vec_def_for_copy_stmt. */
3770 /* In case of interleaving (non-unit strided access):
3777 We create vectorized stores starting from base address (the access of the
3778 first stmt in the chain (S2 in the above example), when the last store stmt
3779 of the chain (S4) is reached:
3782 VS2: &base + vec_size*1 = vx0
3783 VS3: &base + vec_size*2 = vx1
3784 VS4: &base + vec_size*3 = vx3
3786 Then permutation statements are generated:
3788 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3789 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3792 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3793 (the order of the data-refs in the output of vect_permute_store_chain
3794 corresponds to the order of scalar stmts in the interleaving chain - see
3795 the documentation of vect_permute_store_chain()).
3797 In case of both multiple types and interleaving, above vector stores and
3798 permutation stmts are created for every copy. The result vector stmts are
3799 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3800 STMT_VINFO_RELATED_STMT for the next copies.
3803 prev_stmt_info = NULL;
3804 for (j = 0; j < ncopies; j++)
3813 /* Get vectorized arguments for SLP_NODE. */
3814 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3817 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3821 /* For interleaved stores we collect vectorized defs for all the
3822 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3823 used as an input to vect_permute_store_chain(), and OPRNDS as
3824 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3826 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3827 OPRNDS are of size 1. */
3828 next_stmt = first_stmt;
3829 for (i = 0; i < group_size; i++)
3831 /* Since gaps are not supported for interleaved stores,
3832 GROUP_SIZE is the exact number of stmts in the chain.
3833 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3834 there is no interleaving, GROUP_SIZE is 1, and only one
3835 iteration of the loop will be executed. */
3836 gcc_assert (next_stmt
3837 && gimple_assign_single_p (next_stmt));
3838 op = gimple_assign_rhs1 (next_stmt);
3840 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3842 VEC_quick_push(tree, dr_chain, vec_oprnd);
3843 VEC_quick_push(tree, oprnds, vec_oprnd);
3844 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3848 /* We should have catched mismatched types earlier. */
3849 gcc_assert (useless_type_conversion_p (vectype,
3850 TREE_TYPE (vec_oprnd)));
3851 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3852 NULL_TREE, &dummy, gsi,
3853 &ptr_incr, false, &inv_p);
3854 gcc_assert (bb_vinfo || !inv_p);
3858 /* For interleaved stores we created vectorized defs for all the
3859 defs stored in OPRNDS in the previous iteration (previous copy).
3860 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3861 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3863 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3864 OPRNDS are of size 1. */
3865 for (i = 0; i < group_size; i++)
3867 op = VEC_index (tree, oprnds, i);
3868 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3870 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3871 VEC_replace(tree, dr_chain, i, vec_oprnd);
3872 VEC_replace(tree, oprnds, i, vec_oprnd);
3874 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3875 TYPE_SIZE_UNIT (aggr_type));
3882 /* Combine all the vectors into an array. */
3883 vec_array = create_vector_array (vectype, vec_num);
3884 for (i = 0; i < vec_num; i++)
3886 vec_oprnd = VEC_index (tree, dr_chain, i);
3887 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3891 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3892 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3893 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3894 gimple_call_set_lhs (new_stmt, data_ref);
3895 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3896 mark_symbols_for_renaming (new_stmt);
3903 result_chain = VEC_alloc (tree, heap, group_size);
3905 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3909 next_stmt = first_stmt;
3910 for (i = 0; i < vec_num; i++)
3912 struct ptr_info_def *pi;
3915 /* Bump the vector pointer. */
3916 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3920 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3921 else if (strided_store)
3922 /* For strided stores vectorized defs are interleaved in
3923 vect_permute_store_chain(). */
3924 vec_oprnd = VEC_index (tree, result_chain, i);
3926 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3927 build_int_cst (reference_alias_ptr_type
3928 (DR_REF (first_dr)), 0));
3929 pi = get_ptr_info (dataref_ptr);
3930 pi->align = TYPE_ALIGN_UNIT (vectype);
3931 if (aligned_access_p (first_dr))
3933 else if (DR_MISALIGNMENT (first_dr) == -1)
3935 TREE_TYPE (data_ref)
3936 = build_aligned_type (TREE_TYPE (data_ref),
3937 TYPE_ALIGN (elem_type));
3938 pi->align = TYPE_ALIGN_UNIT (elem_type);
3943 TREE_TYPE (data_ref)
3944 = build_aligned_type (TREE_TYPE (data_ref),
3945 TYPE_ALIGN (elem_type));
3946 pi->misalign = DR_MISALIGNMENT (first_dr);
3949 /* Arguments are ready. Create the new vector stmt. */
3950 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3952 mark_symbols_for_renaming (new_stmt);
3957 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3965 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3967 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3968 prev_stmt_info = vinfo_for_stmt (new_stmt);
3972 VEC_free (tree, heap, dr_chain);
3973 VEC_free (tree, heap, oprnds);
3975 VEC_free (tree, heap, result_chain);
3977 VEC_free (tree, heap, vec_oprnds);
3982 /* Given a vector type VECTYPE returns a builtin DECL to be used
3983 for vector permutation and stores a mask into *MASK that implements
3984 reversal of the vector elements. If that is impossible to do
3985 returns NULL (and *MASK is unchanged). */
3988 perm_mask_for_reverse (tree vectype, tree *mask)
3991 tree mask_element_type, mask_type;
3992 tree mask_vec = NULL;
3995 if (!targetm.vectorize.builtin_vec_perm)
3998 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
3999 &mask_element_type);
4000 if (!builtin_decl || !mask_element_type)
4003 mask_type = get_vectype_for_scalar_type (mask_element_type);
4004 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4006 || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
4009 for (i = 0; i < nunits; i++)
4010 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
4011 mask_vec = build_vector (mask_type, mask_vec);
4013 if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
4017 return builtin_decl;
4020 /* Given a vector variable X, that was generated for the scalar LHS of
4021 STMT, generate instructions to reverse the vector elements of X,
4022 insert them a *GSI and return the permuted vector variable. */
4025 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4027 tree vectype = TREE_TYPE (x);
4028 tree mask_vec, builtin_decl;
4029 tree perm_dest, data_ref;
4032 builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
4034 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4036 /* Generate the permute statement. */
4037 perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
4038 if (!useless_type_conversion_p (vectype,
4039 TREE_TYPE (TREE_TYPE (builtin_decl))))
4041 tree tem = create_tmp_reg (TREE_TYPE (TREE_TYPE (builtin_decl)), NULL);
4042 tem = make_ssa_name (tem, perm_stmt);
4043 gimple_call_set_lhs (perm_stmt, tem);
4044 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4045 perm_stmt = gimple_build_assign (NULL_TREE,
4046 build1 (VIEW_CONVERT_EXPR,
4049 data_ref = make_ssa_name (perm_dest, perm_stmt);
4050 gimple_set_lhs (perm_stmt, data_ref);
4051 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4056 /* vectorizable_load.
4058 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4060 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4061 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4062 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4065 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4066 slp_tree slp_node, slp_instance slp_node_instance)
4069 tree vec_dest = NULL;
4070 tree data_ref = NULL;
4071 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4072 stmt_vec_info prev_stmt_info;
4073 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4074 struct loop *loop = NULL;
4075 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4076 bool nested_in_vect_loop = false;
4077 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4078 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4081 enum machine_mode mode;
4082 gimple new_stmt = NULL;
4084 enum dr_alignment_support alignment_support_scheme;
4085 tree dataref_ptr = NULL_TREE;
4087 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4089 int i, j, group_size;
4090 tree msq = NULL_TREE, lsq;
4091 tree offset = NULL_TREE;
4092 tree realignment_token = NULL_TREE;
4094 VEC(tree,heap) *dr_chain = NULL;
4095 bool strided_load = false;
4096 bool load_lanes_p = false;
4101 bool compute_in_loop = false;
4102 struct loop *at_loop;
4104 bool slp = (slp_node != NULL);
4105 bool slp_perm = false;
4106 enum tree_code code;
4107 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4113 loop = LOOP_VINFO_LOOP (loop_vinfo);
4114 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4115 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4120 /* Multiple types in SLP are handled by creating the appropriate number of
4121 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4123 if (slp || PURE_SLP_STMT (stmt_info))
4126 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4128 gcc_assert (ncopies >= 1);
4130 /* FORNOW. This restriction should be relaxed. */
4131 if (nested_in_vect_loop && ncopies > 1)
4133 if (vect_print_dump_info (REPORT_DETAILS))
4134 fprintf (vect_dump, "multiple types in nested loop.");
4138 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4141 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4144 /* Is vectorizable load? */
4145 if (!is_gimple_assign (stmt))
4148 scalar_dest = gimple_assign_lhs (stmt);
4149 if (TREE_CODE (scalar_dest) != SSA_NAME)
4152 code = gimple_assign_rhs_code (stmt);
4153 if (code != ARRAY_REF
4154 && code != INDIRECT_REF
4155 && code != COMPONENT_REF
4156 && code != IMAGPART_EXPR
4157 && code != REALPART_EXPR
4159 && TREE_CODE_CLASS (code) != tcc_declaration)
4162 if (!STMT_VINFO_DATA_REF (stmt_info))
4165 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4166 if (negative && ncopies > 1)
4168 if (vect_print_dump_info (REPORT_DETAILS))
4169 fprintf (vect_dump, "multiple types with negative step.");
4173 scalar_type = TREE_TYPE (DR_REF (dr));
4174 mode = TYPE_MODE (vectype);
4176 /* FORNOW. In some cases can vectorize even if data-type not supported
4177 (e.g. - data copies). */
4178 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4180 if (vect_print_dump_info (REPORT_DETAILS))
4181 fprintf (vect_dump, "Aligned load, but unsupported type.");
4185 /* The vector component type needs to be trivially convertible to the
4186 scalar lhs. This should always be the case. */
4187 elem_type = TREE_TYPE (vectype);
4188 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
4190 if (vect_print_dump_info (REPORT_DETAILS))
4191 fprintf (vect_dump, "??? operands of different types");
4195 /* Check if the load is a part of an interleaving chain. */
4196 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4198 strided_load = true;
4200 gcc_assert (! nested_in_vect_loop);
4202 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4203 if (!slp && !PURE_SLP_STMT (stmt_info))
4205 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4206 if (vect_load_lanes_supported (vectype, group_size))
4207 load_lanes_p = true;
4208 else if (!vect_strided_load_supported (vectype, group_size))
4215 gcc_assert (!strided_load);
4216 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4217 if (alignment_support_scheme != dr_aligned
4218 && alignment_support_scheme != dr_unaligned_supported)
4220 if (vect_print_dump_info (REPORT_DETAILS))
4221 fprintf (vect_dump, "negative step but alignment required.");
4224 if (!perm_mask_for_reverse (vectype, NULL))
4226 if (vect_print_dump_info (REPORT_DETAILS))
4227 fprintf (vect_dump, "negative step and reversing not supported.");
4232 if (!vec_stmt) /* transformation not required. */
4234 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4235 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4239 if (vect_print_dump_info (REPORT_DETAILS))
4240 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4246 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4247 /* Check if the chain of loads is already vectorized. */
4248 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4250 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4253 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4254 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4256 /* VEC_NUM is the number of vect stmts to be created for this group. */
4259 strided_load = false;
4260 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4261 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4265 vec_num = group_size;
4271 group_size = vec_num = 1;
4274 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4275 gcc_assert (alignment_support_scheme);
4276 /* Targets with load-lane instructions must not require explicit
4278 gcc_assert (!load_lanes_p
4279 || alignment_support_scheme == dr_aligned
4280 || alignment_support_scheme == dr_unaligned_supported);
4282 /* In case the vectorization factor (VF) is bigger than the number
4283 of elements that we can fit in a vectype (nunits), we have to generate
4284 more than one vector stmt - i.e - we need to "unroll" the
4285 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4286 from one copy of the vector stmt to the next, in the field
4287 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4288 stages to find the correct vector defs to be used when vectorizing
4289 stmts that use the defs of the current stmt. The example below
4290 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4291 need to create 4 vectorized stmts):
4293 before vectorization:
4294 RELATED_STMT VEC_STMT
4298 step 1: vectorize stmt S1:
4299 We first create the vector stmt VS1_0, and, as usual, record a
4300 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4301 Next, we create the vector stmt VS1_1, and record a pointer to
4302 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4303 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4305 RELATED_STMT VEC_STMT
4306 VS1_0: vx0 = memref0 VS1_1 -
4307 VS1_1: vx1 = memref1 VS1_2 -
4308 VS1_2: vx2 = memref2 VS1_3 -
4309 VS1_3: vx3 = memref3 - -
4310 S1: x = load - VS1_0
4313 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4314 information we recorded in RELATED_STMT field is used to vectorize
4317 /* In case of interleaving (non-unit strided access):
4324 Vectorized loads are created in the order of memory accesses
4325 starting from the access of the first stmt of the chain:
4328 VS2: vx1 = &base + vec_size*1
4329 VS3: vx3 = &base + vec_size*2
4330 VS4: vx4 = &base + vec_size*3
4332 Then permutation statements are generated:
4334 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4335 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4338 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4339 (the order of the data-refs in the output of vect_permute_load_chain
4340 corresponds to the order of scalar stmts in the interleaving chain - see
4341 the documentation of vect_permute_load_chain()).
4342 The generation of permutation stmts and recording them in
4343 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4345 In case of both multiple types and interleaving, the vector loads and
4346 permutation stmts above are created for every copy. The result vector
4347 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4348 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4350 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4351 on a target that supports unaligned accesses (dr_unaligned_supported)
4352 we generate the following code:
4356 p = p + indx * vectype_size;
4361 Otherwise, the data reference is potentially unaligned on a target that
4362 does not support unaligned accesses (dr_explicit_realign_optimized) -
4363 then generate the following code, in which the data in each iteration is
4364 obtained by two vector loads, one from the previous iteration, and one
4365 from the current iteration:
4367 msq_init = *(floor(p1))
4368 p2 = initial_addr + VS - 1;
4369 realignment_token = call target_builtin;
4372 p2 = p2 + indx * vectype_size
4374 vec_dest = realign_load (msq, lsq, realignment_token)
4379 /* If the misalignment remains the same throughout the execution of the
4380 loop, we can create the init_addr and permutation mask at the loop
4381 preheader. Otherwise, it needs to be created inside the loop.
4382 This can only occur when vectorizing memory accesses in the inner-loop
4383 nested within an outer-loop that is being vectorized. */
4385 if (loop && nested_in_vect_loop_p (loop, stmt)
4386 && (TREE_INT_CST_LOW (DR_STEP (dr))
4387 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4389 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4390 compute_in_loop = true;
4393 if ((alignment_support_scheme == dr_explicit_realign_optimized
4394 || alignment_support_scheme == dr_explicit_realign)
4395 && !compute_in_loop)
4397 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4398 alignment_support_scheme, NULL_TREE,
4400 if (alignment_support_scheme == dr_explicit_realign_optimized)
4402 phi = SSA_NAME_DEF_STMT (msq);
4403 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4410 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4413 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4415 aggr_type = vectype;
4417 prev_stmt_info = NULL;
4418 for (j = 0; j < ncopies; j++)
4420 /* 1. Create the vector or array pointer update chain. */
4422 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4423 offset, &dummy, gsi,
4424 &ptr_incr, false, &inv_p);
4426 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4427 TYPE_SIZE_UNIT (aggr_type));
4429 if (strided_load || slp_perm)
4430 dr_chain = VEC_alloc (tree, heap, vec_num);
4436 vec_array = create_vector_array (vectype, vec_num);
4439 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4440 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4441 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4442 gimple_call_set_lhs (new_stmt, vec_array);
4443 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4444 mark_symbols_for_renaming (new_stmt);
4446 /* Extract each vector into an SSA_NAME. */
4447 for (i = 0; i < vec_num; i++)
4449 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4451 VEC_quick_push (tree, dr_chain, new_temp);
4454 /* Record the mapping between SSA_NAMEs and statements. */
4455 vect_record_strided_load_vectors (stmt, dr_chain);
4459 for (i = 0; i < vec_num; i++)
4462 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4465 /* 2. Create the vector-load in the loop. */
4466 switch (alignment_support_scheme)
4469 case dr_unaligned_supported:
4471 struct ptr_info_def *pi;
4473 = build2 (MEM_REF, vectype, dataref_ptr,
4474 build_int_cst (reference_alias_ptr_type
4475 (DR_REF (first_dr)), 0));
4476 pi = get_ptr_info (dataref_ptr);
4477 pi->align = TYPE_ALIGN_UNIT (vectype);
4478 if (alignment_support_scheme == dr_aligned)
4480 gcc_assert (aligned_access_p (first_dr));
4483 else if (DR_MISALIGNMENT (first_dr) == -1)
4485 TREE_TYPE (data_ref)
4486 = build_aligned_type (TREE_TYPE (data_ref),
4487 TYPE_ALIGN (elem_type));
4488 pi->align = TYPE_ALIGN_UNIT (elem_type);
4493 TREE_TYPE (data_ref)
4494 = build_aligned_type (TREE_TYPE (data_ref),
4495 TYPE_ALIGN (elem_type));
4496 pi->misalign = DR_MISALIGNMENT (first_dr);
4500 case dr_explicit_realign:
4505 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4507 if (compute_in_loop)
4508 msq = vect_setup_realignment (first_stmt, gsi,
4510 dr_explicit_realign,
4513 new_stmt = gimple_build_assign_with_ops
4514 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4516 (TREE_TYPE (dataref_ptr),
4517 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4518 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4519 gimple_assign_set_lhs (new_stmt, ptr);
4520 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4522 = build2 (MEM_REF, vectype, ptr,
4523 build_int_cst (reference_alias_ptr_type
4524 (DR_REF (first_dr)), 0));
4525 vec_dest = vect_create_destination_var (scalar_dest,
4527 new_stmt = gimple_build_assign (vec_dest, data_ref);
4528 new_temp = make_ssa_name (vec_dest, new_stmt);
4529 gimple_assign_set_lhs (new_stmt, new_temp);
4530 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4531 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4532 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4535 bump = size_binop (MULT_EXPR, vs_minus_1,
4536 TYPE_SIZE_UNIT (scalar_type));
4537 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4538 new_stmt = gimple_build_assign_with_ops
4539 (BIT_AND_EXPR, NULL_TREE, ptr,
4542 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4543 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4544 gimple_assign_set_lhs (new_stmt, ptr);
4545 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4547 = build2 (MEM_REF, vectype, ptr,
4548 build_int_cst (reference_alias_ptr_type
4549 (DR_REF (first_dr)), 0));
4552 case dr_explicit_realign_optimized:
4553 new_stmt = gimple_build_assign_with_ops
4554 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4556 (TREE_TYPE (dataref_ptr),
4557 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4558 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4560 gimple_assign_set_lhs (new_stmt, new_temp);
4561 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4563 = build2 (MEM_REF, vectype, new_temp,
4564 build_int_cst (reference_alias_ptr_type
4565 (DR_REF (first_dr)), 0));
4570 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4571 new_stmt = gimple_build_assign (vec_dest, data_ref);
4572 new_temp = make_ssa_name (vec_dest, new_stmt);
4573 gimple_assign_set_lhs (new_stmt, new_temp);
4574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4575 mark_symbols_for_renaming (new_stmt);
4577 /* 3. Handle explicit realignment if necessary/supported.
4579 vec_dest = realign_load (msq, lsq, realignment_token) */
4580 if (alignment_support_scheme == dr_explicit_realign_optimized
4581 || alignment_support_scheme == dr_explicit_realign)
4583 lsq = gimple_assign_lhs (new_stmt);
4584 if (!realignment_token)
4585 realignment_token = dataref_ptr;
4586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4588 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4591 new_temp = make_ssa_name (vec_dest, new_stmt);
4592 gimple_assign_set_lhs (new_stmt, new_temp);
4593 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4595 if (alignment_support_scheme == dr_explicit_realign_optimized)
4598 if (i == vec_num - 1 && j == ncopies - 1)
4599 add_phi_arg (phi, lsq,
4600 loop_latch_edge (containing_loop),
4606 /* 4. Handle invariant-load. */
4607 if (inv_p && !bb_vinfo)
4610 gimple_stmt_iterator gsi2 = *gsi;
4611 gcc_assert (!strided_load);
4613 vec_inv = build_vector_from_val (vectype, scalar_dest);
4614 new_temp = vect_init_vector (stmt, vec_inv,
4616 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4621 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4622 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4625 /* Collect vector loads and later create their permutation in
4626 vect_transform_strided_load (). */
4627 if (strided_load || slp_perm)
4628 VEC_quick_push (tree, dr_chain, new_temp);
4630 /* Store vector loads in the corresponding SLP_NODE. */
4631 if (slp && !slp_perm)
4632 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4637 if (slp && !slp_perm)
4642 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4643 slp_node_instance, false))
4645 VEC_free (tree, heap, dr_chain);
4654 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4655 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4660 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4662 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4663 prev_stmt_info = vinfo_for_stmt (new_stmt);
4667 VEC_free (tree, heap, dr_chain);
4673 /* Function vect_is_simple_cond.
4676 LOOP - the loop that is being vectorized.
4677 COND - Condition that is checked for simple use.
4680 *COMP_VECTYPE - the vector type for the comparison.
4682 Returns whether a COND can be vectorized. Checks whether
4683 condition operands are supportable using vec_is_simple_use. */
4686 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4690 enum vect_def_type dt;
4691 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4693 if (!COMPARISON_CLASS_P (cond))
4696 lhs = TREE_OPERAND (cond, 0);
4697 rhs = TREE_OPERAND (cond, 1);
4699 if (TREE_CODE (lhs) == SSA_NAME)
4701 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4702 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4706 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4707 && TREE_CODE (lhs) != FIXED_CST)
4710 if (TREE_CODE (rhs) == SSA_NAME)
4712 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4713 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4717 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4718 && TREE_CODE (rhs) != FIXED_CST)
4721 *comp_vectype = vectype1 ? vectype1 : vectype2;
4725 /* vectorizable_condition.
4727 Check if STMT is conditional modify expression that can be vectorized.
4728 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4729 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4732 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4733 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4734 else caluse if it is 2).
4736 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4739 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4740 gimple *vec_stmt, tree reduc_def, int reduc_index)
4742 tree scalar_dest = NULL_TREE;
4743 tree vec_dest = NULL_TREE;
4744 tree cond_expr, then_clause, else_clause;
4745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4746 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4748 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4749 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4750 tree vec_compare, vec_cond_expr;
4752 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4754 enum vect_def_type dt, dts[4];
4755 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4756 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4757 enum tree_code code;
4758 stmt_vec_info prev_stmt_info = NULL;
4761 /* FORNOW: unsupported in basic block SLP. */
4762 gcc_assert (loop_vinfo);
4764 /* FORNOW: SLP not supported. */
4765 if (STMT_SLP_TYPE (stmt_info))
4768 gcc_assert (ncopies >= 1);
4769 if (reduc_index && ncopies > 1)
4770 return false; /* FORNOW */
4772 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4775 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4776 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4780 /* FORNOW: not yet supported. */
4781 if (STMT_VINFO_LIVE_P (stmt_info))
4783 if (vect_print_dump_info (REPORT_DETAILS))
4784 fprintf (vect_dump, "value used after loop.");
4788 /* Is vectorizable conditional operation? */
4789 if (!is_gimple_assign (stmt))
4792 code = gimple_assign_rhs_code (stmt);
4794 if (code != COND_EXPR)
4797 cond_expr = gimple_assign_rhs1 (stmt);
4798 then_clause = gimple_assign_rhs2 (stmt);
4799 else_clause = gimple_assign_rhs3 (stmt);
4801 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4805 if (TREE_CODE (then_clause) == SSA_NAME)
4807 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4808 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4809 &then_def_stmt, &def, &dt))
4812 else if (TREE_CODE (then_clause) != INTEGER_CST
4813 && TREE_CODE (then_clause) != REAL_CST
4814 && TREE_CODE (then_clause) != FIXED_CST)
4817 if (TREE_CODE (else_clause) == SSA_NAME)
4819 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4820 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4821 &else_def_stmt, &def, &dt))
4824 else if (TREE_CODE (else_clause) != INTEGER_CST
4825 && TREE_CODE (else_clause) != REAL_CST
4826 && TREE_CODE (else_clause) != FIXED_CST)
4831 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4832 return expand_vec_cond_expr_p (vectype, comp_vectype);
4838 scalar_dest = gimple_assign_lhs (stmt);
4839 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4841 /* Handle cond expr. */
4842 for (j = 0; j < ncopies; j++)
4849 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4851 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4852 NULL, >emp, &def, &dts[0]);
4854 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4856 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4857 NULL, >emp, &def, &dts[1]);
4858 if (reduc_index == 1)
4859 vec_then_clause = reduc_def;
4862 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4864 vect_is_simple_use (then_clause, loop_vinfo,
4865 NULL, >emp, &def, &dts[2]);
4867 if (reduc_index == 2)
4868 vec_else_clause = reduc_def;
4871 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4873 vect_is_simple_use (else_clause, loop_vinfo,
4874 NULL, >emp, &def, &dts[3]);
4879 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4880 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4881 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4883 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4887 /* Arguments are ready. Create the new vector stmt. */
4888 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4889 vec_cond_lhs, vec_cond_rhs);
4890 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4891 vec_compare, vec_then_clause, vec_else_clause);
4893 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4894 new_temp = make_ssa_name (vec_dest, new_stmt);
4895 gimple_assign_set_lhs (new_stmt, new_temp);
4896 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4898 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4900 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4902 prev_stmt_info = vinfo_for_stmt (new_stmt);
4909 /* Make sure the statement is vectorizable. */
4912 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4914 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4915 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4916 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4918 tree scalar_type, vectype;
4919 gimple pattern_stmt, pattern_def_stmt;
4921 if (vect_print_dump_info (REPORT_DETAILS))
4923 fprintf (vect_dump, "==> examining statement: ");
4924 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4927 if (gimple_has_volatile_ops (stmt))
4929 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4930 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4935 /* Skip stmts that do not need to be vectorized. In loops this is expected
4937 - the COND_EXPR which is the loop exit condition
4938 - any LABEL_EXPRs in the loop
4939 - computations that are used only for array indexing or loop control.
4940 In basic blocks we only analyze statements that are a part of some SLP
4941 instance, therefore, all the statements are relevant.
4943 Pattern statement need to be analyzed instead of the original statement
4944 if the original statement is not relevant. Otherwise, we analyze both
4947 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4948 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4949 && !STMT_VINFO_LIVE_P (stmt_info))
4951 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4953 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4954 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4956 /* Analyze PATTERN_STMT instead of the original stmt. */
4957 stmt = pattern_stmt;
4958 stmt_info = vinfo_for_stmt (pattern_stmt);
4959 if (vect_print_dump_info (REPORT_DETAILS))
4961 fprintf (vect_dump, "==> examining pattern statement: ");
4962 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4967 if (vect_print_dump_info (REPORT_DETAILS))
4968 fprintf (vect_dump, "irrelevant.");
4973 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4975 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4976 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4978 /* Analyze PATTERN_STMT too. */
4979 if (vect_print_dump_info (REPORT_DETAILS))
4981 fprintf (vect_dump, "==> examining pattern statement: ");
4982 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4985 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
4989 if (is_pattern_stmt_p (stmt_info)
4990 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
4991 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
4992 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
4994 /* Analyze def stmt of STMT if it's a pattern stmt. */
4995 if (vect_print_dump_info (REPORT_DETAILS))
4997 fprintf (vect_dump, "==> examining pattern def statement: ");
4998 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5001 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5006 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5008 case vect_internal_def:
5011 case vect_reduction_def:
5012 case vect_nested_cycle:
5013 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5014 || relevance == vect_used_in_outer_by_reduction
5015 || relevance == vect_unused_in_scope));
5018 case vect_induction_def:
5019 case vect_constant_def:
5020 case vect_external_def:
5021 case vect_unknown_def_type:
5028 gcc_assert (PURE_SLP_STMT (stmt_info));
5030 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5031 if (vect_print_dump_info (REPORT_DETAILS))
5033 fprintf (vect_dump, "get vectype for scalar type: ");
5034 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5037 vectype = get_vectype_for_scalar_type (scalar_type);
5040 if (vect_print_dump_info (REPORT_DETAILS))
5042 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5043 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5048 if (vect_print_dump_info (REPORT_DETAILS))
5050 fprintf (vect_dump, "vectype: ");
5051 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5054 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5057 if (STMT_VINFO_RELEVANT_P (stmt_info))
5059 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5060 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5061 *need_to_vectorize = true;
5066 && (STMT_VINFO_RELEVANT_P (stmt_info)
5067 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5068 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5069 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5070 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5071 || vectorizable_shift (stmt, NULL, NULL, NULL)
5072 || vectorizable_operation (stmt, NULL, NULL, NULL)
5073 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5074 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5075 || vectorizable_call (stmt, NULL, NULL)
5076 || vectorizable_store (stmt, NULL, NULL, NULL)
5077 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5078 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5082 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5083 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5084 || vectorizable_shift (stmt, NULL, NULL, node)
5085 || vectorizable_operation (stmt, NULL, NULL, node)
5086 || vectorizable_assignment (stmt, NULL, NULL, node)
5087 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5088 || vectorizable_store (stmt, NULL, NULL, node));
5093 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5095 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5096 fprintf (vect_dump, "supported: ");
5097 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5106 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5107 need extra handling, except for vectorizable reductions. */
5108 if (STMT_VINFO_LIVE_P (stmt_info)
5109 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5110 ok = vectorizable_live_operation (stmt, NULL, NULL);
5114 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5116 fprintf (vect_dump, "not vectorized: live stmt not ");
5117 fprintf (vect_dump, "supported: ");
5118 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5128 /* Function vect_transform_stmt.
5130 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5133 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5134 bool *strided_store, slp_tree slp_node,
5135 slp_instance slp_node_instance)
5137 bool is_store = false;
5138 gimple vec_stmt = NULL;
5139 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5142 switch (STMT_VINFO_TYPE (stmt_info))
5144 case type_demotion_vec_info_type:
5145 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5149 case type_promotion_vec_info_type:
5150 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5154 case type_conversion_vec_info_type:
5155 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5159 case induc_vec_info_type:
5160 gcc_assert (!slp_node);
5161 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5165 case shift_vec_info_type:
5166 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5170 case op_vec_info_type:
5171 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5175 case assignment_vec_info_type:
5176 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5180 case load_vec_info_type:
5181 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5186 case store_vec_info_type:
5187 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5189 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5191 /* In case of interleaving, the whole chain is vectorized when the
5192 last store in the chain is reached. Store stmts before the last
5193 one are skipped, and there vec_stmt_info shouldn't be freed
5195 *strided_store = true;
5196 if (STMT_VINFO_VEC_STMT (stmt_info))
5203 case condition_vec_info_type:
5204 gcc_assert (!slp_node);
5205 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5209 case call_vec_info_type:
5210 gcc_assert (!slp_node);
5211 done = vectorizable_call (stmt, gsi, &vec_stmt);
5212 stmt = gsi_stmt (*gsi);
5215 case reduc_vec_info_type:
5216 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5221 if (!STMT_VINFO_LIVE_P (stmt_info))
5223 if (vect_print_dump_info (REPORT_DETAILS))
5224 fprintf (vect_dump, "stmt not supported.");
5229 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5230 is being vectorized, but outside the immediately enclosing loop. */
5232 && STMT_VINFO_LOOP_VINFO (stmt_info)
5233 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5234 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5235 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5236 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5237 || STMT_VINFO_RELEVANT (stmt_info) ==
5238 vect_used_in_outer_by_reduction))
5240 struct loop *innerloop = LOOP_VINFO_LOOP (
5241 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5242 imm_use_iterator imm_iter;
5243 use_operand_p use_p;
5247 if (vect_print_dump_info (REPORT_DETAILS))
5248 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5250 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5251 (to be used when vectorizing outer-loop stmts that use the DEF of
5253 if (gimple_code (stmt) == GIMPLE_PHI)
5254 scalar_dest = PHI_RESULT (stmt);
5256 scalar_dest = gimple_assign_lhs (stmt);
5258 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5260 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5262 exit_phi = USE_STMT (use_p);
5263 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5268 /* Handle stmts whose DEF is used outside the loop-nest that is
5269 being vectorized. */
5270 if (STMT_VINFO_LIVE_P (stmt_info)
5271 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5273 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5278 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5284 /* Remove a group of stores (for SLP or interleaving), free their
5288 vect_remove_stores (gimple first_stmt)
5290 gimple next = first_stmt;
5292 gimple_stmt_iterator next_si;
5296 /* Free the attached stmt_vec_info and remove the stmt. */
5297 next_si = gsi_for_stmt (next);
5298 gsi_remove (&next_si, true);
5299 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5300 free_stmt_vec_info (next);
5306 /* Function new_stmt_vec_info.
5308 Create and initialize a new stmt_vec_info struct for STMT. */
5311 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5312 bb_vec_info bb_vinfo)
5315 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5317 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5318 STMT_VINFO_STMT (res) = stmt;
5319 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5320 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5321 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5322 STMT_VINFO_LIVE_P (res) = false;
5323 STMT_VINFO_VECTYPE (res) = NULL;
5324 STMT_VINFO_VEC_STMT (res) = NULL;
5325 STMT_VINFO_VECTORIZABLE (res) = true;
5326 STMT_VINFO_IN_PATTERN_P (res) = false;
5327 STMT_VINFO_RELATED_STMT (res) = NULL;
5328 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5329 STMT_VINFO_DATA_REF (res) = NULL;
5331 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5332 STMT_VINFO_DR_OFFSET (res) = NULL;
5333 STMT_VINFO_DR_INIT (res) = NULL;
5334 STMT_VINFO_DR_STEP (res) = NULL;
5335 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5337 if (gimple_code (stmt) == GIMPLE_PHI
5338 && is_loop_header_bb_p (gimple_bb (stmt)))
5339 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5341 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5343 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5344 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5345 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5346 STMT_SLP_TYPE (res) = loop_vect;
5347 GROUP_FIRST_ELEMENT (res) = NULL;
5348 GROUP_NEXT_ELEMENT (res) = NULL;
5349 GROUP_SIZE (res) = 0;
5350 GROUP_STORE_COUNT (res) = 0;
5351 GROUP_GAP (res) = 0;
5352 GROUP_SAME_DR_STMT (res) = NULL;
5353 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5359 /* Create a hash table for stmt_vec_info. */
5362 init_stmt_vec_info_vec (void)
5364 gcc_assert (!stmt_vec_info_vec);
5365 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5369 /* Free hash table for stmt_vec_info. */
5372 free_stmt_vec_info_vec (void)
5374 gcc_assert (stmt_vec_info_vec);
5375 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5379 /* Free stmt vectorization related info. */
5382 free_stmt_vec_info (gimple stmt)
5384 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5389 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5390 set_vinfo_for_stmt (stmt, NULL);
5395 /* Function get_vectype_for_scalar_type_and_size.
5397 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5401 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5403 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5404 enum machine_mode simd_mode;
5405 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5412 /* We can't build a vector type of elements with alignment bigger than
5414 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5417 /* If we'd build a vector type of elements whose mode precision doesn't
5418 match their types precision we'll get mismatched types on vector
5419 extracts via BIT_FIELD_REFs. This effectively means we disable
5420 vectorization of bool and/or enum types in some languages. */
5421 if (INTEGRAL_TYPE_P (scalar_type)
5422 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5425 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5426 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5429 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5430 When the component mode passes the above test simply use a type
5431 corresponding to that mode. The theory is that any use that
5432 would cause problems with this will disable vectorization anyway. */
5433 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5434 && !INTEGRAL_TYPE_P (scalar_type)
5435 && !POINTER_TYPE_P (scalar_type))
5436 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5438 /* If no size was supplied use the mode the target prefers. Otherwise
5439 lookup a vector mode of the specified size. */
5441 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5443 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5444 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5448 vectype = build_vector_type (scalar_type, nunits);
5449 if (vect_print_dump_info (REPORT_DETAILS))
5451 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5452 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5458 if (vect_print_dump_info (REPORT_DETAILS))
5460 fprintf (vect_dump, "vectype: ");
5461 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5464 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5465 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5467 if (vect_print_dump_info (REPORT_DETAILS))
5468 fprintf (vect_dump, "mode not supported by target.");
5475 unsigned int current_vector_size;
5477 /* Function get_vectype_for_scalar_type.
5479 Returns the vector type corresponding to SCALAR_TYPE as supported
5483 get_vectype_for_scalar_type (tree scalar_type)
5486 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5487 current_vector_size);
5489 && current_vector_size == 0)
5490 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5494 /* Function get_same_sized_vectype
5496 Returns a vector type corresponding to SCALAR_TYPE of size
5497 VECTOR_TYPE if supported by the target. */
5500 get_same_sized_vectype (tree scalar_type, tree vector_type)
5502 return get_vectype_for_scalar_type_and_size
5503 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5506 /* Function vect_is_simple_use.
5509 LOOP_VINFO - the vect info of the loop that is being vectorized.
5510 BB_VINFO - the vect info of the basic block that is being vectorized.
5511 OPERAND - operand of a stmt in the loop or bb.
5512 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5514 Returns whether a stmt with OPERAND can be vectorized.
5515 For loops, supportable operands are constants, loop invariants, and operands
5516 that are defined by the current iteration of the loop. Unsupportable
5517 operands are those that are defined by a previous iteration of the loop (as
5518 is the case in reduction/induction computations).
5519 For basic blocks, supportable operands are constants and bb invariants.
5520 For now, operands defined outside the basic block are not supported. */
5523 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5524 bb_vec_info bb_vinfo, gimple *def_stmt,
5525 tree *def, enum vect_def_type *dt)
5528 stmt_vec_info stmt_vinfo;
5529 struct loop *loop = NULL;
5532 loop = LOOP_VINFO_LOOP (loop_vinfo);
5537 if (vect_print_dump_info (REPORT_DETAILS))
5539 fprintf (vect_dump, "vect_is_simple_use: operand ");
5540 print_generic_expr (vect_dump, operand, TDF_SLIM);
5543 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5545 *dt = vect_constant_def;
5549 if (is_gimple_min_invariant (operand))
5552 *dt = vect_external_def;
5556 if (TREE_CODE (operand) == PAREN_EXPR)
5558 if (vect_print_dump_info (REPORT_DETAILS))
5559 fprintf (vect_dump, "non-associatable copy.");
5560 operand = TREE_OPERAND (operand, 0);
5563 if (TREE_CODE (operand) != SSA_NAME)
5565 if (vect_print_dump_info (REPORT_DETAILS))
5566 fprintf (vect_dump, "not ssa-name.");
5570 *def_stmt = SSA_NAME_DEF_STMT (operand);
5571 if (*def_stmt == NULL)
5573 if (vect_print_dump_info (REPORT_DETAILS))
5574 fprintf (vect_dump, "no def_stmt.");
5578 if (vect_print_dump_info (REPORT_DETAILS))
5580 fprintf (vect_dump, "def_stmt: ");
5581 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5584 /* Empty stmt is expected only in case of a function argument.
5585 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5586 if (gimple_nop_p (*def_stmt))
5589 *dt = vect_external_def;
5593 bb = gimple_bb (*def_stmt);
5595 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5596 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5597 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5598 *dt = vect_external_def;
5601 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5602 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5605 if (*dt == vect_unknown_def_type)
5607 if (vect_print_dump_info (REPORT_DETAILS))
5608 fprintf (vect_dump, "Unsupported pattern.");
5612 if (vect_print_dump_info (REPORT_DETAILS))
5613 fprintf (vect_dump, "type of def: %d.",*dt);
5615 switch (gimple_code (*def_stmt))
5618 *def = gimple_phi_result (*def_stmt);
5622 *def = gimple_assign_lhs (*def_stmt);
5626 *def = gimple_call_lhs (*def_stmt);
5631 if (vect_print_dump_info (REPORT_DETAILS))
5632 fprintf (vect_dump, "unsupported defining stmt: ");
5639 /* Function vect_is_simple_use_1.
5641 Same as vect_is_simple_use_1 but also determines the vector operand
5642 type of OPERAND and stores it to *VECTYPE. If the definition of
5643 OPERAND is vect_uninitialized_def, vect_constant_def or
5644 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5645 is responsible to compute the best suited vector type for the
5649 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5650 bb_vec_info bb_vinfo, gimple *def_stmt,
5651 tree *def, enum vect_def_type *dt, tree *vectype)
5653 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5656 /* Now get a vector type if the def is internal, otherwise supply
5657 NULL_TREE and leave it up to the caller to figure out a proper
5658 type for the use stmt. */
5659 if (*dt == vect_internal_def
5660 || *dt == vect_induction_def
5661 || *dt == vect_reduction_def
5662 || *dt == vect_double_reduction_def
5663 || *dt == vect_nested_cycle)
5665 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5667 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5668 && !STMT_VINFO_RELEVANT (stmt_info)
5669 && !STMT_VINFO_LIVE_P (stmt_info))
5670 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5672 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5673 gcc_assert (*vectype != NULL_TREE);
5675 else if (*dt == vect_uninitialized_def
5676 || *dt == vect_constant_def
5677 || *dt == vect_external_def)
5678 *vectype = NULL_TREE;
5686 /* Function supportable_widening_operation
5688 Check whether an operation represented by the code CODE is a
5689 widening operation that is supported by the target platform in
5690 vector form (i.e., when operating on arguments of type VECTYPE_IN
5691 producing a result of type VECTYPE_OUT).
5693 Widening operations we currently support are NOP (CONVERT), FLOAT
5694 and WIDEN_MULT. This function checks if these operations are supported
5695 by the target platform either directly (via vector tree-codes), or via
5699 - CODE1 and CODE2 are codes of vector operations to be used when
5700 vectorizing the operation, if available.
5701 - DECL1 and DECL2 are decls of target builtin functions to be used
5702 when vectorizing the operation, if available. In this case,
5703 CODE1 and CODE2 are CALL_EXPR.
5704 - MULTI_STEP_CVT determines the number of required intermediate steps in
5705 case of multi-step conversion (like char->short->int - in that case
5706 MULTI_STEP_CVT will be 1).
5707 - INTERM_TYPES contains the intermediate type required to perform the
5708 widening operation (short in the above example). */
5711 supportable_widening_operation (enum tree_code code, gimple stmt,
5712 tree vectype_out, tree vectype_in,
5713 tree *decl1, tree *decl2,
5714 enum tree_code *code1, enum tree_code *code2,
5715 int *multi_step_cvt,
5716 VEC (tree, heap) **interm_types)
5718 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5719 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5720 struct loop *vect_loop = NULL;
5722 enum machine_mode vec_mode;
5723 enum insn_code icode1, icode2;
5724 optab optab1, optab2;
5725 tree vectype = vectype_in;
5726 tree wide_vectype = vectype_out;
5727 enum tree_code c1, c2;
5730 vect_loop = LOOP_VINFO_LOOP (loop_info);
5732 /* The result of a vectorized widening operation usually requires two vectors
5733 (because the widened results do not fit int one vector). The generated
5734 vector results would normally be expected to be generated in the same
5735 order as in the original scalar computation, i.e. if 8 results are
5736 generated in each vector iteration, they are to be organized as follows:
5737 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5739 However, in the special case that the result of the widening operation is
5740 used in a reduction computation only, the order doesn't matter (because
5741 when vectorizing a reduction we change the order of the computation).
5742 Some targets can take advantage of this and generate more efficient code.
5743 For example, targets like Altivec, that support widen_mult using a sequence
5744 of {mult_even,mult_odd} generate the following vectors:
5745 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5747 When vectorizing outer-loops, we execute the inner-loop sequentially
5748 (each vectorized inner-loop iteration contributes to VF outer-loop
5749 iterations in parallel). We therefore don't allow to change the order
5750 of the computation in the inner-loop during outer-loop vectorization. */
5753 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5754 && !nested_in_vect_loop_p (vect_loop, stmt))
5760 && code == WIDEN_MULT_EXPR
5761 && targetm.vectorize.builtin_mul_widen_even
5762 && targetm.vectorize.builtin_mul_widen_even (vectype)
5763 && targetm.vectorize.builtin_mul_widen_odd
5764 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5766 if (vect_print_dump_info (REPORT_DETAILS))
5767 fprintf (vect_dump, "Unordered widening operation detected.");
5769 *code1 = *code2 = CALL_EXPR;
5770 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5771 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5777 case WIDEN_MULT_EXPR:
5778 if (BYTES_BIG_ENDIAN)
5780 c1 = VEC_WIDEN_MULT_HI_EXPR;
5781 c2 = VEC_WIDEN_MULT_LO_EXPR;
5785 c2 = VEC_WIDEN_MULT_HI_EXPR;
5786 c1 = VEC_WIDEN_MULT_LO_EXPR;
5791 if (BYTES_BIG_ENDIAN)
5793 c1 = VEC_UNPACK_HI_EXPR;
5794 c2 = VEC_UNPACK_LO_EXPR;
5798 c2 = VEC_UNPACK_HI_EXPR;
5799 c1 = VEC_UNPACK_LO_EXPR;
5804 if (BYTES_BIG_ENDIAN)
5806 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5807 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5811 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5812 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5816 case FIX_TRUNC_EXPR:
5817 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5818 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5819 computing the operation. */
5826 if (code == FIX_TRUNC_EXPR)
5828 /* The signedness is determined from output operand. */
5829 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5830 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5834 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5835 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5838 if (!optab1 || !optab2)
5841 vec_mode = TYPE_MODE (vectype);
5842 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5843 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5846 /* Check if it's a multi-step conversion that can be done using intermediate
5848 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5849 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5852 tree prev_type = vectype, intermediate_type;
5853 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5854 optab optab3, optab4;
5856 if (!CONVERT_EXPR_CODE_P (code))
5862 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5863 intermediate steps in promotion sequence. We try
5864 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5866 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5867 for (i = 0; i < 3; i++)
5869 intermediate_mode = insn_data[icode1].operand[0].mode;
5870 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5871 TYPE_UNSIGNED (prev_type));
5872 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5873 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5875 if (!optab3 || !optab4
5876 || ((icode1 = optab_handler (optab1, prev_mode))
5877 == CODE_FOR_nothing)
5878 || insn_data[icode1].operand[0].mode != intermediate_mode
5879 || ((icode2 = optab_handler (optab2, prev_mode))
5880 == CODE_FOR_nothing)
5881 || insn_data[icode2].operand[0].mode != intermediate_mode
5882 || ((icode1 = optab_handler (optab3, intermediate_mode))
5883 == CODE_FOR_nothing)
5884 || ((icode2 = optab_handler (optab4, intermediate_mode))
5885 == CODE_FOR_nothing))
5888 VEC_quick_push (tree, *interm_types, intermediate_type);
5889 (*multi_step_cvt)++;
5891 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5892 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5895 prev_type = intermediate_type;
5896 prev_mode = intermediate_mode;
5908 /* Function supportable_narrowing_operation
5910 Check whether an operation represented by the code CODE is a
5911 narrowing operation that is supported by the target platform in
5912 vector form (i.e., when operating on arguments of type VECTYPE_IN
5913 and producing a result of type VECTYPE_OUT).
5915 Narrowing operations we currently support are NOP (CONVERT) and
5916 FIX_TRUNC. This function checks if these operations are supported by
5917 the target platform directly via vector tree-codes.
5920 - CODE1 is the code of a vector operation to be used when
5921 vectorizing the operation, if available.
5922 - MULTI_STEP_CVT determines the number of required intermediate steps in
5923 case of multi-step conversion (like int->short->char - in that case
5924 MULTI_STEP_CVT will be 1).
5925 - INTERM_TYPES contains the intermediate type required to perform the
5926 narrowing operation (short in the above example). */
5929 supportable_narrowing_operation (enum tree_code code,
5930 tree vectype_out, tree vectype_in,
5931 enum tree_code *code1, int *multi_step_cvt,
5932 VEC (tree, heap) **interm_types)
5934 enum machine_mode vec_mode;
5935 enum insn_code icode1;
5936 optab optab1, interm_optab;
5937 tree vectype = vectype_in;
5938 tree narrow_vectype = vectype_out;
5940 tree intermediate_type, prev_type;
5946 c1 = VEC_PACK_TRUNC_EXPR;
5949 case FIX_TRUNC_EXPR:
5950 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5954 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5955 tree code and optabs used for computing the operation. */
5962 if (code == FIX_TRUNC_EXPR)
5963 /* The signedness is determined from output operand. */
5964 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5966 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5971 vec_mode = TYPE_MODE (vectype);
5972 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
5975 /* Check if it's a multi-step conversion that can be done using intermediate
5977 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5979 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5982 prev_type = vectype;
5983 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5984 intermediate steps in promotion sequence. We try
5985 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5987 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5988 for (i = 0; i < 3; i++)
5990 intermediate_mode = insn_data[icode1].operand[0].mode;
5991 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5992 TYPE_UNSIGNED (prev_type));
5993 interm_optab = optab_for_tree_code (c1, intermediate_type,
5996 || ((icode1 = optab_handler (optab1, prev_mode))
5997 == CODE_FOR_nothing)
5998 || insn_data[icode1].operand[0].mode != intermediate_mode
5999 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6000 == CODE_FOR_nothing))
6003 VEC_quick_push (tree, *interm_types, intermediate_type);
6004 (*multi_step_cvt)++;
6006 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6009 prev_type = intermediate_type;
6010 prev_mode = intermediate_mode;