1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
164 if (is_gimple_debug (USE_STMT (use_p)))
166 use_stmt = USE_STMT (use_p);
168 if (vinfo_for_stmt (use_stmt)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
184 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "last stmt in pattern. don't mark"
189 stmt_info = vinfo_for_stmt (pattern_stmt);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
191 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
192 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
197 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
198 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
199 STMT_VINFO_RELEVANT (stmt_info) = relevant;
201 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
204 if (vect_print_dump_info (REPORT_DETAILS))
205 fprintf (vect_dump, "already marked relevant/live.");
209 VEC_safe_push (gimple, heap, *worklist, stmt);
213 /* Function vect_stmt_relevant_p.
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
223 CHECKME: what other side effects would the vectorizer allow? */
226 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
227 enum vect_relevant *relevant, bool *live_p)
229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 imm_use_iterator imm_iter;
235 *relevant = vect_unused_in_scope;
238 /* cond stmt other than loop exit cond. */
239 if (is_ctrl_stmt (stmt)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
241 != loop_exit_ctrl_vec_info_type)
242 *relevant = vect_used_in_scope;
244 /* changing memory. */
245 if (gimple_code (stmt) != GIMPLE_PHI)
246 if (gimple_vdef (stmt))
248 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
250 *relevant = vect_used_in_scope;
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
256 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
258 basic_block bb = gimple_bb (USE_STMT (use_p));
259 if (!flow_bb_inside_loop_p (loop, bb))
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
264 if (is_gimple_debug (USE_STMT (use_p)))
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
270 gcc_assert (bb == single_exit (loop)->dest);
277 return (*live_p || *relevant);
281 /* Function exist_non_indexing_operands_for_use_p
283 USE is one of the uses attached to STMT. Check if USE is
284 used in STMT for anything other than indexing an array. */
287 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
292 /* USE corresponds to some operand in STMT. If there is no data
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info))
298 /* STMT has a data_ref. FORNOW this means that its of one of
302 (This should have been verified in analyze_data_refs).
304 'var' in the second case corresponds to a def, not a use,
305 so USE cannot correspond to any operands that are not used
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
311 if (!gimple_assign_copy_p (stmt))
313 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
315 operand = gimple_assign_rhs1 (stmt);
316 if (TREE_CODE (operand) != SSA_NAME)
327 Function process_use.
330 - a USE in STMT in a loop represented by LOOP_VINFO
331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
332 that defined USE. This is done by calling mark_relevant and passing it
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
336 Generally, LIVE_P and RELEVANT are used to define the liveness and
337 relevance info of the DEF_STMT of this USE:
338 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
339 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
341 - case 1: If USE is used only for address computations (e.g. array indexing),
342 which does not need to be directly vectorized, then the liveness/relevance
343 of the respective DEF_STMT is left unchanged.
344 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
345 skip DEF_STMT cause it had already been processed.
346 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
347 be modified accordingly.
349 Return true if everything is as expected. Return false otherwise. */
352 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
353 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
355 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
356 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
357 stmt_vec_info dstmt_vinfo;
358 basic_block bb, def_bb;
361 enum vect_def_type dt;
363 /* case 1: we are only interested in uses that need to be vectorized. Uses
364 that are used for address computation are not considered relevant. */
365 if (!exist_non_indexing_operands_for_use_p (use, stmt))
368 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
370 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
371 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
375 if (!def_stmt || gimple_nop_p (def_stmt))
378 def_bb = gimple_bb (def_stmt);
379 if (!flow_bb_inside_loop_p (loop, def_bb))
381 if (vect_print_dump_info (REPORT_DETAILS))
382 fprintf (vect_dump, "def_stmt is out of loop.");
386 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
387 DEF_STMT must have already been processed, because this should be the
388 only way that STMT, which is a reduction-phi, was put in the worklist,
389 as there should be no other uses for DEF_STMT in the loop. So we just
390 check that everything is as expected, and we are done. */
391 dstmt_vinfo = vinfo_for_stmt (def_stmt);
392 bb = gimple_bb (stmt);
393 if (gimple_code (stmt) == GIMPLE_PHI
394 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
395 && gimple_code (def_stmt) != GIMPLE_PHI
396 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
397 && bb->loop_father == def_bb->loop_father)
399 if (vect_print_dump_info (REPORT_DETAILS))
400 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
401 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
402 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
403 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
404 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
405 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
409 /* case 3a: outer-loop stmt defining an inner-loop stmt:
410 outer-loop-header-bb:
416 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
418 if (vect_print_dump_info (REPORT_DETAILS))
419 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
423 case vect_unused_in_scope:
424 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
425 vect_used_in_scope : vect_unused_in_scope;
428 case vect_used_in_outer_by_reduction:
429 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
430 relevant = vect_used_by_reduction;
433 case vect_used_in_outer:
434 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
435 relevant = vect_used_in_scope;
438 case vect_used_in_scope:
446 /* case 3b: inner-loop stmt defining an outer-loop stmt:
447 outer-loop-header-bb:
451 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
453 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
455 if (vect_print_dump_info (REPORT_DETAILS))
456 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
460 case vect_unused_in_scope:
461 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
462 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
463 vect_used_in_outer_by_reduction : vect_unused_in_scope;
466 case vect_used_by_reduction:
467 relevant = vect_used_in_outer_by_reduction;
470 case vect_used_in_scope:
471 relevant = vect_used_in_outer;
479 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
480 is_pattern_stmt_p (stmt_vinfo));
485 /* Function vect_mark_stmts_to_be_vectorized.
487 Not all stmts in the loop need to be vectorized. For example:
496 Stmt 1 and 3 do not need to be vectorized, because loop control and
497 addressing of vectorized data-refs are handled differently.
499 This pass detects such stmts. */
502 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
504 VEC(gimple,heap) *worklist;
505 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
506 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
507 unsigned int nbbs = loop->num_nodes;
508 gimple_stmt_iterator si;
511 stmt_vec_info stmt_vinfo;
515 enum vect_relevant relevant, tmp_relevant;
516 enum vect_def_type def_type;
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
521 worklist = VEC_alloc (gimple, heap, 64);
523 /* 1. Init worklist. */
524 for (i = 0; i < nbbs; i++)
527 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
530 if (vect_print_dump_info (REPORT_DETAILS))
532 fprintf (vect_dump, "init: phi relevant? ");
533 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
536 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
537 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
539 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
541 stmt = gsi_stmt (si);
542 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "init: stmt relevant? ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
548 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
549 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
553 /* 2. Process_worklist */
554 while (VEC_length (gimple, worklist) > 0)
559 stmt = VEC_pop (gimple, worklist);
560 if (vect_print_dump_info (REPORT_DETAILS))
562 fprintf (vect_dump, "worklist: examine stmt: ");
563 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
566 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
567 (DEF_STMT) as relevant/irrelevant and live/dead according to the
568 liveness and relevance properties of STMT. */
569 stmt_vinfo = vinfo_for_stmt (stmt);
570 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
571 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
573 /* Generally, the liveness and relevance properties of STMT are
574 propagated as is to the DEF_STMTs of its USEs:
575 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
576 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
578 One exception is when STMT has been identified as defining a reduction
579 variable; in this case we set the liveness/relevance as follows:
581 relevant = vect_used_by_reduction
582 This is because we distinguish between two kinds of relevant stmts -
583 those that are used by a reduction computation, and those that are
584 (also) used by a regular computation. This allows us later on to
585 identify stmts that are used solely by a reduction, and therefore the
586 order of the results that they produce does not have to be kept. */
588 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
589 tmp_relevant = relevant;
592 case vect_reduction_def:
593 switch (tmp_relevant)
595 case vect_unused_in_scope:
596 relevant = vect_used_by_reduction;
599 case vect_used_by_reduction:
600 if (gimple_code (stmt) == GIMPLE_PHI)
605 if (vect_print_dump_info (REPORT_DETAILS))
606 fprintf (vect_dump, "unsupported use of reduction.");
608 VEC_free (gimple, heap, worklist);
615 case vect_nested_cycle:
616 if (tmp_relevant != vect_unused_in_scope
617 && tmp_relevant != vect_used_in_outer_by_reduction
618 && tmp_relevant != vect_used_in_outer)
620 if (vect_print_dump_info (REPORT_DETAILS))
621 fprintf (vect_dump, "unsupported use of nested cycle.");
623 VEC_free (gimple, heap, worklist);
630 case vect_double_reduction_def:
631 if (tmp_relevant != vect_unused_in_scope
632 && tmp_relevant != vect_used_by_reduction)
634 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "unsupported use of double reduction.");
637 VEC_free (gimple, heap, worklist);
648 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
650 /* Pattern statements are not inserted into the code, so
651 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
652 have to scan the RHS or function arguments instead. */
653 if (is_gimple_assign (stmt))
655 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
656 tree op = gimple_assign_rhs1 (stmt);
659 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
661 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
662 live_p, relevant, &worklist)
663 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
664 live_p, relevant, &worklist))
666 VEC_free (gimple, heap, worklist);
671 for (; i < gimple_num_ops (stmt); i++)
673 op = gimple_op (stmt, i);
674 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
677 VEC_free (gimple, heap, worklist);
682 else if (is_gimple_call (stmt))
684 for (i = 0; i < gimple_call_num_args (stmt); i++)
686 tree arg = gimple_call_arg (stmt, i);
687 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
690 VEC_free (gimple, heap, worklist);
697 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
699 tree op = USE_FROM_PTR (use_p);
700 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
703 VEC_free (gimple, heap, worklist);
707 } /* while worklist */
709 VEC_free (gimple, heap, worklist);
714 /* Get cost by calling cost target builtin. */
717 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
719 tree dummy_type = NULL;
722 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
727 /* Get cost for STMT. */
730 cost_for_stmt (gimple stmt)
732 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
734 switch (STMT_VINFO_TYPE (stmt_info))
736 case load_vec_info_type:
737 return vect_get_stmt_cost (scalar_load);
738 case store_vec_info_type:
739 return vect_get_stmt_cost (scalar_store);
740 case op_vec_info_type:
741 case condition_vec_info_type:
742 case assignment_vec_info_type:
743 case reduc_vec_info_type:
744 case induc_vec_info_type:
745 case type_promotion_vec_info_type:
746 case type_demotion_vec_info_type:
747 case type_conversion_vec_info_type:
748 case call_vec_info_type:
749 return vect_get_stmt_cost (scalar_stmt);
750 case undef_vec_info_type:
756 /* Function vect_model_simple_cost.
758 Models cost for simple operations, i.e. those that only emit ncopies of a
759 single op. Right now, this does not account for multiple insns that could
760 be generated for the single vector op. We will handle that shortly. */
763 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
764 enum vect_def_type *dt, slp_tree slp_node)
767 int inside_cost = 0, outside_cost = 0;
769 /* The SLP costs were already calculated during SLP tree build. */
770 if (PURE_SLP_STMT (stmt_info))
773 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
775 /* FORNOW: Assuming maximum 2 args per stmts. */
776 for (i = 0; i < 2; i++)
778 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
779 outside_cost += vect_get_stmt_cost (vector_stmt);
782 if (vect_print_dump_info (REPORT_COST))
783 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
784 "outside_cost = %d .", inside_cost, outside_cost);
786 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
787 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
788 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
792 /* Function vect_cost_strided_group_size
794 For strided load or store, return the group_size only if it is the first
795 load or store of a group, else return 1. This ensures that group size is
796 only returned once per group. */
799 vect_cost_strided_group_size (stmt_vec_info stmt_info)
801 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
803 if (first_stmt == STMT_VINFO_STMT (stmt_info))
804 return GROUP_SIZE (stmt_info);
810 /* Function vect_model_store_cost
812 Models cost for stores. In the case of strided accesses, one access
813 has the overhead of the strided access attributed to it. */
816 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
817 bool store_lanes_p, enum vect_def_type dt,
821 unsigned int inside_cost = 0, outside_cost = 0;
822 struct data_reference *first_dr;
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info))
829 if (dt == vect_constant_def || dt == vect_external_def)
830 outside_cost = vect_get_stmt_cost (scalar_to_vec);
832 /* Strided access? */
833 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
837 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
842 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
843 group_size = vect_cost_strided_group_size (stmt_info);
846 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
848 /* Not a strided access. */
852 first_dr = STMT_VINFO_DATA_REF (stmt_info);
855 /* We assume that the cost of a single store-lanes instruction is
856 equivalent to the cost of GROUP_SIZE separate stores. If a strided
857 access is instead being provided by a permute-and-store operation,
858 include the cost of the permutes. */
859 if (!store_lanes_p && group_size > 1)
861 /* Uses a high and low interleave operation for each needed permute. */
862 inside_cost = ncopies * exact_log2(group_size) * group_size
863 * vect_get_stmt_cost (vector_stmt);
865 if (vect_print_dump_info (REPORT_COST))
866 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
871 /* Costs of the stores. */
872 vect_get_store_cost (first_dr, ncopies, &inside_cost);
874 if (vect_print_dump_info (REPORT_COST))
875 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
876 "outside_cost = %d .", inside_cost, outside_cost);
878 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
879 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
880 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
884 /* Calculate cost of DR's memory access. */
886 vect_get_store_cost (struct data_reference *dr, int ncopies,
887 unsigned int *inside_cost)
889 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
891 switch (alignment_support_scheme)
895 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
897 if (vect_print_dump_info (REPORT_COST))
898 fprintf (vect_dump, "vect_model_store_cost: aligned.");
903 case dr_unaligned_supported:
905 gimple stmt = DR_STMT (dr);
906 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
907 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
909 /* Here, we assign an additional cost for the unaligned store. */
910 *inside_cost += ncopies
911 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
912 vectype, DR_MISALIGNMENT (dr));
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
927 /* Function vect_model_load_cost
929 Models cost for loads. In the case of strided accesses, the last access
930 has the overhead of the strided access attributed to it. Since unaligned
931 accesses are supported for loads, we also account for the costs of the
932 access scheme chosen. */
935 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
940 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
941 unsigned int inside_cost = 0, outside_cost = 0;
943 /* The SLP costs were already calculated during SLP tree build. */
944 if (PURE_SLP_STMT (stmt_info))
947 /* Strided accesses? */
948 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
949 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
951 group_size = vect_cost_strided_group_size (stmt_info);
952 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
954 /* Not a strided access. */
961 /* We assume that the cost of a single load-lanes instruction is
962 equivalent to the cost of GROUP_SIZE separate loads. If a strided
963 access is instead being provided by a load-and-permute operation,
964 include the cost of the permutes. */
965 if (!load_lanes_p && group_size > 1)
967 /* Uses an even and odd extract operations for each needed permute. */
968 inside_cost = ncopies * exact_log2(group_size) * group_size
969 * vect_get_stmt_cost (vector_stmt);
971 if (vect_print_dump_info (REPORT_COST))
972 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
976 /* The loads themselves. */
977 vect_get_load_cost (first_dr, ncopies,
978 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
980 &inside_cost, &outside_cost);
982 if (vect_print_dump_info (REPORT_COST))
983 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
984 "outside_cost = %d .", inside_cost, outside_cost);
986 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
987 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
988 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
992 /* Calculate cost of DR's memory access. */
994 vect_get_load_cost (struct data_reference *dr, int ncopies,
995 bool add_realign_cost, unsigned int *inside_cost,
996 unsigned int *outside_cost)
998 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1000 switch (alignment_support_scheme)
1004 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1006 if (vect_print_dump_info (REPORT_COST))
1007 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1011 case dr_unaligned_supported:
1013 gimple stmt = DR_STMT (dr);
1014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1017 /* Here, we assign an additional cost for the unaligned load. */
1018 *inside_cost += ncopies
1019 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1020 vectype, DR_MISALIGNMENT (dr));
1021 if (vect_print_dump_info (REPORT_COST))
1022 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1027 case dr_explicit_realign:
1029 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1030 + vect_get_stmt_cost (vector_stmt));
1032 /* FIXME: If the misalignment remains fixed across the iterations of
1033 the containing loop, the following cost should be added to the
1035 if (targetm.vectorize.builtin_mask_for_load)
1036 *inside_cost += vect_get_stmt_cost (vector_stmt);
1040 case dr_explicit_realign_optimized:
1042 if (vect_print_dump_info (REPORT_COST))
1043 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1046 /* Unaligned software pipeline has a load of an address, an initial
1047 load, and possibly a mask operation to "prime" the loop. However,
1048 if this is an access in a group of loads, which provide strided
1049 access, then the above cost should only be considered for one
1050 access in the group. Inside the loop, there is a load op
1051 and a realignment op. */
1053 if (add_realign_cost)
1055 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1056 if (targetm.vectorize.builtin_mask_for_load)
1057 *outside_cost += vect_get_stmt_cost (vector_stmt);
1060 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1061 + vect_get_stmt_cost (vector_stmt));
1071 /* Function vect_init_vector.
1073 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1074 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1075 is not NULL. Otherwise, place the initialization at the loop preheader.
1076 Return the DEF of INIT_STMT.
1077 It will be used in the vectorization of STMT. */
1080 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1081 gimple_stmt_iterator *gsi)
1083 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1091 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1092 add_referenced_var (new_var);
1093 init_stmt = gimple_build_assign (new_var, vector_var);
1094 new_temp = make_ssa_name (new_var, init_stmt);
1095 gimple_assign_set_lhs (init_stmt, new_temp);
1098 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1101 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1105 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1107 if (nested_in_vect_loop_p (loop, stmt))
1110 pe = loop_preheader_edge (loop);
1111 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1112 gcc_assert (!new_bb);
1116 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1118 gimple_stmt_iterator gsi_bb_start;
1120 gcc_assert (bb_vinfo);
1121 bb = BB_VINFO_BB (bb_vinfo);
1122 gsi_bb_start = gsi_after_labels (bb);
1123 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1127 if (vect_print_dump_info (REPORT_DETAILS))
1129 fprintf (vect_dump, "created new init_stmt: ");
1130 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1133 vec_oprnd = gimple_assign_lhs (init_stmt);
1138 /* Function vect_get_vec_def_for_operand.
1140 OP is an operand in STMT. This function returns a (vector) def that will be
1141 used in the vectorized stmt for STMT.
1143 In the case that OP is an SSA_NAME which is defined in the loop, then
1144 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1146 In case OP is an invariant or constant, a new stmt that creates a vector def
1147 needs to be introduced. */
1150 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1155 stmt_vec_info def_stmt_info = NULL;
1156 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1157 unsigned int nunits;
1158 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1164 enum vect_def_type dt;
1168 if (vect_print_dump_info (REPORT_DETAILS))
1170 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1171 print_generic_expr (vect_dump, op, TDF_SLIM);
1174 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1176 gcc_assert (is_simple_use);
1177 if (vect_print_dump_info (REPORT_DETAILS))
1181 fprintf (vect_dump, "def = ");
1182 print_generic_expr (vect_dump, def, TDF_SLIM);
1186 fprintf (vect_dump, " def_stmt = ");
1187 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1193 /* Case 1: operand is a constant. */
1194 case vect_constant_def:
1196 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1197 gcc_assert (vector_type);
1198 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1203 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1204 if (vect_print_dump_info (REPORT_DETAILS))
1205 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1207 vec_cst = build_vector_from_val (vector_type, op);
1208 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1211 /* Case 2: operand is defined outside the loop - loop invariant. */
1212 case vect_external_def:
1214 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1215 gcc_assert (vector_type);
1216 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1221 /* Create 'vec_inv = {inv,inv,..,inv}' */
1222 if (vect_print_dump_info (REPORT_DETAILS))
1223 fprintf (vect_dump, "Create vector_inv.");
1225 for (i = nunits - 1; i >= 0; --i)
1227 t = tree_cons (NULL_TREE, def, t);
1230 /* FIXME: use build_constructor directly. */
1231 vec_inv = build_constructor_from_list (vector_type, t);
1232 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1235 /* Case 3: operand is defined inside the loop. */
1236 case vect_internal_def:
1239 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1241 /* Get the def from the vectorized stmt. */
1242 def_stmt_info = vinfo_for_stmt (def_stmt);
1244 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1245 /* Get vectorized pattern statement. */
1247 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1248 && !STMT_VINFO_RELEVANT (def_stmt_info))
1249 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1250 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1251 gcc_assert (vec_stmt);
1252 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1253 vec_oprnd = PHI_RESULT (vec_stmt);
1254 else if (is_gimple_call (vec_stmt))
1255 vec_oprnd = gimple_call_lhs (vec_stmt);
1257 vec_oprnd = gimple_assign_lhs (vec_stmt);
1261 /* Case 4: operand is defined by a loop header phi - reduction */
1262 case vect_reduction_def:
1263 case vect_double_reduction_def:
1264 case vect_nested_cycle:
1268 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1269 loop = (gimple_bb (def_stmt))->loop_father;
1271 /* Get the def before the loop */
1272 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1273 return get_initial_def_for_reduction (stmt, op, scalar_def);
1276 /* Case 5: operand is defined by loop-header phi - induction. */
1277 case vect_induction_def:
1279 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1281 /* Get the def from the vectorized stmt. */
1282 def_stmt_info = vinfo_for_stmt (def_stmt);
1283 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1284 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1285 vec_oprnd = PHI_RESULT (vec_stmt);
1287 vec_oprnd = gimple_get_lhs (vec_stmt);
1297 /* Function vect_get_vec_def_for_stmt_copy
1299 Return a vector-def for an operand. This function is used when the
1300 vectorized stmt to be created (by the caller to this function) is a "copy"
1301 created in case the vectorized result cannot fit in one vector, and several
1302 copies of the vector-stmt are required. In this case the vector-def is
1303 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1304 of the stmt that defines VEC_OPRND.
1305 DT is the type of the vector def VEC_OPRND.
1308 In case the vectorization factor (VF) is bigger than the number
1309 of elements that can fit in a vectype (nunits), we have to generate
1310 more than one vector stmt to vectorize the scalar stmt. This situation
1311 arises when there are multiple data-types operated upon in the loop; the
1312 smallest data-type determines the VF, and as a result, when vectorizing
1313 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1314 vector stmt (each computing a vector of 'nunits' results, and together
1315 computing 'VF' results in each iteration). This function is called when
1316 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1317 which VF=16 and nunits=4, so the number of copies required is 4):
1319 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1321 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1322 VS1.1: vx.1 = memref1 VS1.2
1323 VS1.2: vx.2 = memref2 VS1.3
1324 VS1.3: vx.3 = memref3
1326 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1327 VSnew.1: vz1 = vx.1 + ... VSnew.2
1328 VSnew.2: vz2 = vx.2 + ... VSnew.3
1329 VSnew.3: vz3 = vx.3 + ...
1331 The vectorization of S1 is explained in vectorizable_load.
1332 The vectorization of S2:
1333 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1334 the function 'vect_get_vec_def_for_operand' is called to
1335 get the relevant vector-def for each operand of S2. For operand x it
1336 returns the vector-def 'vx.0'.
1338 To create the remaining copies of the vector-stmt (VSnew.j), this
1339 function is called to get the relevant vector-def for each operand. It is
1340 obtained from the respective VS1.j stmt, which is recorded in the
1341 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1343 For example, to obtain the vector-def 'vx.1' in order to create the
1344 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1345 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1346 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1347 and return its def ('vx.1').
1348 Overall, to create the above sequence this function will be called 3 times:
1349 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1350 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1351 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1354 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1356 gimple vec_stmt_for_operand;
1357 stmt_vec_info def_stmt_info;
1359 /* Do nothing; can reuse same def. */
1360 if (dt == vect_external_def || dt == vect_constant_def )
1363 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1364 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1365 gcc_assert (def_stmt_info);
1366 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1367 gcc_assert (vec_stmt_for_operand);
1368 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1369 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1370 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1372 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1377 /* Get vectorized definitions for the operands to create a copy of an original
1378 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1381 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1382 VEC(tree,heap) **vec_oprnds0,
1383 VEC(tree,heap) **vec_oprnds1)
1385 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1387 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1388 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1390 if (vec_oprnds1 && *vec_oprnds1)
1392 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1393 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1394 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1399 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1403 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1404 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1408 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1413 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1414 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1415 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1419 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1420 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1421 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1427 /* Function vect_finish_stmt_generation.
1429 Insert a new stmt. */
1432 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1433 gimple_stmt_iterator *gsi)
1435 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1436 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1437 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1439 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1441 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1443 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1446 if (vect_print_dump_info (REPORT_DETAILS))
1448 fprintf (vect_dump, "add new stmt: ");
1449 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1452 gimple_set_location (vec_stmt, gimple_location (stmt));
1455 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1456 a function declaration if the target has a vectorized version
1457 of the function, or NULL_TREE if the function cannot be vectorized. */
1460 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1462 tree fndecl = gimple_call_fndecl (call);
1464 /* We only handle functions that do not read or clobber memory -- i.e.
1465 const or novops ones. */
1466 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1470 || TREE_CODE (fndecl) != FUNCTION_DECL
1471 || !DECL_BUILT_IN (fndecl))
1474 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1478 /* Function vectorizable_call.
1480 Check if STMT performs a function call that can be vectorized.
1481 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1482 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1483 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1486 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1491 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1492 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1493 tree vectype_out, vectype_in;
1496 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1497 tree fndecl, new_temp, def, rhs_type;
1499 enum vect_def_type dt[3]
1500 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1501 gimple new_stmt = NULL;
1503 VEC(tree, heap) *vargs = NULL;
1504 enum { NARROW, NONE, WIDEN } modifier;
1508 /* FORNOW: unsupported in basic block SLP. */
1509 gcc_assert (loop_vinfo);
1511 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1514 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1517 /* FORNOW: SLP not supported. */
1518 if (STMT_SLP_TYPE (stmt_info))
1521 /* Is STMT a vectorizable call? */
1522 if (!is_gimple_call (stmt))
1525 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1528 if (stmt_can_throw_internal (stmt))
1531 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1533 /* Process function arguments. */
1534 rhs_type = NULL_TREE;
1535 vectype_in = NULL_TREE;
1536 nargs = gimple_call_num_args (stmt);
1538 /* Bail out if the function has more than three arguments, we do not have
1539 interesting builtin functions to vectorize with more than two arguments
1540 except for fma. No arguments is also not good. */
1541 if (nargs == 0 || nargs > 3)
1544 for (i = 0; i < nargs; i++)
1548 op = gimple_call_arg (stmt, i);
1550 /* We can only handle calls with arguments of the same type. */
1552 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1554 if (vect_print_dump_info (REPORT_DETAILS))
1555 fprintf (vect_dump, "argument types differ.");
1559 rhs_type = TREE_TYPE (op);
1561 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1562 &def_stmt, &def, &dt[i], &opvectype))
1564 if (vect_print_dump_info (REPORT_DETAILS))
1565 fprintf (vect_dump, "use not simple.");
1570 vectype_in = opvectype;
1572 && opvectype != vectype_in)
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 fprintf (vect_dump, "argument vector types differ.");
1579 /* If all arguments are external or constant defs use a vector type with
1580 the same size as the output vector type. */
1582 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1584 gcc_assert (vectype_in);
1587 if (vect_print_dump_info (REPORT_DETAILS))
1589 fprintf (vect_dump, "no vectype for scalar type ");
1590 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1597 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1598 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1599 if (nunits_in == nunits_out / 2)
1601 else if (nunits_out == nunits_in)
1603 else if (nunits_out == nunits_in / 2)
1608 /* For now, we only vectorize functions if a target specific builtin
1609 is available. TODO -- in some cases, it might be profitable to
1610 insert the calls for pieces of the vector, in order to be able
1611 to vectorize other operations in the loop. */
1612 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1613 if (fndecl == NULL_TREE)
1615 if (vect_print_dump_info (REPORT_DETAILS))
1616 fprintf (vect_dump, "function is not vectorizable.");
1621 gcc_assert (!gimple_vuse (stmt));
1623 if (modifier == NARROW)
1624 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1626 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1628 /* Sanity check: make sure that at least one copy of the vectorized stmt
1629 needs to be generated. */
1630 gcc_assert (ncopies >= 1);
1632 if (!vec_stmt) /* transformation not required. */
1634 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1635 if (vect_print_dump_info (REPORT_DETAILS))
1636 fprintf (vect_dump, "=== vectorizable_call ===");
1637 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1643 if (vect_print_dump_info (REPORT_DETAILS))
1644 fprintf (vect_dump, "transform call.");
1647 scalar_dest = gimple_call_lhs (stmt);
1648 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1650 prev_stmt_info = NULL;
1654 for (j = 0; j < ncopies; ++j)
1656 /* Build argument list for the vectorized call. */
1658 vargs = VEC_alloc (tree, heap, nargs);
1660 VEC_truncate (tree, vargs, 0);
1662 for (i = 0; i < nargs; i++)
1664 op = gimple_call_arg (stmt, i);
1667 = vect_get_vec_def_for_operand (op, stmt, NULL);
1670 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1672 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1675 VEC_quick_push (tree, vargs, vec_oprnd0);
1678 new_stmt = gimple_build_call_vec (fndecl, vargs);
1679 new_temp = make_ssa_name (vec_dest, new_stmt);
1680 gimple_call_set_lhs (new_stmt, new_temp);
1682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1683 mark_symbols_for_renaming (new_stmt);
1686 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1690 prev_stmt_info = vinfo_for_stmt (new_stmt);
1696 for (j = 0; j < ncopies; ++j)
1698 /* Build argument list for the vectorized call. */
1700 vargs = VEC_alloc (tree, heap, nargs * 2);
1702 VEC_truncate (tree, vargs, 0);
1704 for (i = 0; i < nargs; i++)
1706 op = gimple_call_arg (stmt, i);
1710 = vect_get_vec_def_for_operand (op, stmt, NULL);
1712 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1716 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1718 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1720 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1723 VEC_quick_push (tree, vargs, vec_oprnd0);
1724 VEC_quick_push (tree, vargs, vec_oprnd1);
1727 new_stmt = gimple_build_call_vec (fndecl, vargs);
1728 new_temp = make_ssa_name (vec_dest, new_stmt);
1729 gimple_call_set_lhs (new_stmt, new_temp);
1731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1732 mark_symbols_for_renaming (new_stmt);
1735 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1737 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1739 prev_stmt_info = vinfo_for_stmt (new_stmt);
1742 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1747 /* No current target implements this case. */
1751 VEC_free (tree, heap, vargs);
1753 /* Update the exception handling table with the vector stmt if necessary. */
1754 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1755 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1757 /* The call in STMT might prevent it from being removed in dce.
1758 We however cannot remove it here, due to the way the ssa name
1759 it defines is mapped to the new definition. So just replace
1760 rhs of the statement with something harmless. */
1762 type = TREE_TYPE (scalar_dest);
1763 if (is_pattern_stmt_p (stmt_info))
1764 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1766 lhs = gimple_call_lhs (stmt);
1767 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1768 set_vinfo_for_stmt (new_stmt, stmt_info);
1769 set_vinfo_for_stmt (stmt, NULL);
1770 STMT_VINFO_STMT (stmt_info) = new_stmt;
1771 gsi_replace (gsi, new_stmt, false);
1772 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1778 /* Function vect_gen_widened_results_half
1780 Create a vector stmt whose code, type, number of arguments, and result
1781 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1782 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1783 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1784 needs to be created (DECL is a function-decl of a target-builtin).
1785 STMT is the original scalar stmt that we are vectorizing. */
1788 vect_gen_widened_results_half (enum tree_code code,
1790 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1791 tree vec_dest, gimple_stmt_iterator *gsi,
1797 /* Generate half of the widened result: */
1798 if (code == CALL_EXPR)
1800 /* Target specific support */
1801 if (op_type == binary_op)
1802 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1804 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1805 new_temp = make_ssa_name (vec_dest, new_stmt);
1806 gimple_call_set_lhs (new_stmt, new_temp);
1810 /* Generic support */
1811 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1812 if (op_type != binary_op)
1814 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1816 new_temp = make_ssa_name (vec_dest, new_stmt);
1817 gimple_assign_set_lhs (new_stmt, new_temp);
1819 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1825 /* Check if STMT performs a conversion operation, that can be vectorized.
1826 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1827 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1828 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1831 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1832 gimple *vec_stmt, slp_tree slp_node)
1837 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1839 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1840 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1841 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1845 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1846 gimple new_stmt = NULL;
1847 stmt_vec_info prev_stmt_info;
1850 tree vectype_out, vectype_in;
1854 enum { NARROW, NONE, WIDEN } modifier;
1856 VEC(tree,heap) *vec_oprnds0 = NULL;
1858 VEC(tree,heap) *dummy = NULL;
1861 /* Is STMT a vectorizable conversion? */
1863 /* FORNOW: unsupported in basic block SLP. */
1864 gcc_assert (loop_vinfo);
1866 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1869 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1872 if (!is_gimple_assign (stmt))
1875 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1878 code = gimple_assign_rhs_code (stmt);
1879 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1882 /* Check types of lhs and rhs. */
1883 scalar_dest = gimple_assign_lhs (stmt);
1884 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1886 op0 = gimple_assign_rhs1 (stmt);
1887 rhs_type = TREE_TYPE (op0);
1888 /* Check the operands of the operation. */
1889 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1890 &def_stmt, &def, &dt[0], &vectype_in))
1892 if (vect_print_dump_info (REPORT_DETAILS))
1893 fprintf (vect_dump, "use not simple.");
1896 /* If op0 is an external or constant defs use a vector type of
1897 the same size as the output vector type. */
1899 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1901 gcc_assert (vectype_in);
1904 if (vect_print_dump_info (REPORT_DETAILS))
1906 fprintf (vect_dump, "no vectype for scalar type ");
1907 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1914 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1915 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1916 if (nunits_in == nunits_out / 2)
1918 else if (nunits_out == nunits_in)
1920 else if (nunits_out == nunits_in / 2)
1925 if (modifier == NARROW)
1926 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1928 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1930 /* Multiple types in SLP are handled by creating the appropriate number of
1931 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1933 if (slp_node || PURE_SLP_STMT (stmt_info))
1936 /* Sanity check: make sure that at least one copy of the vectorized stmt
1937 needs to be generated. */
1938 gcc_assert (ncopies >= 1);
1940 /* Supportable by target? */
1941 if ((modifier == NONE
1942 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1943 || (modifier == WIDEN
1944 && !supportable_widening_operation (code, stmt,
1945 vectype_out, vectype_in,
1948 &dummy_int, &dummy))
1949 || (modifier == NARROW
1950 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1951 &code1, &dummy_int, &dummy)))
1953 if (vect_print_dump_info (REPORT_DETAILS))
1954 fprintf (vect_dump, "conversion not supported by target.");
1958 if (modifier != NONE)
1960 /* FORNOW: SLP not supported. */
1961 if (STMT_SLP_TYPE (stmt_info))
1965 if (!vec_stmt) /* transformation not required. */
1967 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1972 if (vect_print_dump_info (REPORT_DETAILS))
1973 fprintf (vect_dump, "transform conversion.");
1976 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1978 if (modifier == NONE && !slp_node)
1979 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1981 prev_stmt_info = NULL;
1985 for (j = 0; j < ncopies; j++)
1988 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1990 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1993 targetm.vectorize.builtin_conversion (code,
1994 vectype_out, vectype_in);
1995 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1997 /* Arguments are ready. create the new vector stmt. */
1998 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1999 new_temp = make_ssa_name (vec_dest, new_stmt);
2000 gimple_call_set_lhs (new_stmt, new_temp);
2001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2003 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2007 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2009 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2010 prev_stmt_info = vinfo_for_stmt (new_stmt);
2015 /* In case the vectorization factor (VF) is bigger than the number
2016 of elements that we can fit in a vectype (nunits), we have to
2017 generate more than one vector stmt - i.e - we need to "unroll"
2018 the vector stmt by a factor VF/nunits. */
2019 for (j = 0; j < ncopies; j++)
2022 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2024 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2026 /* Generate first half of the widened result: */
2028 = vect_gen_widened_results_half (code1, decl1,
2029 vec_oprnd0, vec_oprnd1,
2030 unary_op, vec_dest, gsi, stmt);
2032 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2034 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2035 prev_stmt_info = vinfo_for_stmt (new_stmt);
2037 /* Generate second half of the widened result: */
2039 = vect_gen_widened_results_half (code2, decl2,
2040 vec_oprnd0, vec_oprnd1,
2041 unary_op, vec_dest, gsi, stmt);
2042 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2043 prev_stmt_info = vinfo_for_stmt (new_stmt);
2048 /* In case the vectorization factor (VF) is bigger than the number
2049 of elements that we can fit in a vectype (nunits), we have to
2050 generate more than one vector stmt - i.e - we need to "unroll"
2051 the vector stmt by a factor VF/nunits. */
2052 for (j = 0; j < ncopies; j++)
2057 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2058 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2062 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2063 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2066 /* Arguments are ready. Create the new vector stmt. */
2067 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2069 new_temp = make_ssa_name (vec_dest, new_stmt);
2070 gimple_assign_set_lhs (new_stmt, new_temp);
2071 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2074 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2076 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2078 prev_stmt_info = vinfo_for_stmt (new_stmt);
2081 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2085 VEC_free (tree, heap, vec_oprnds0);
2091 /* Function vectorizable_assignment.
2093 Check if STMT performs an assignment (copy) that can be vectorized.
2094 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2095 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2096 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2099 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2100 gimple *vec_stmt, slp_tree slp_node)
2105 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2106 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2107 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2111 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2112 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2115 VEC(tree,heap) *vec_oprnds = NULL;
2117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2118 gimple new_stmt = NULL;
2119 stmt_vec_info prev_stmt_info = NULL;
2120 enum tree_code code;
2123 /* Multiple types in SLP are handled by creating the appropriate number of
2124 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2126 if (slp_node || PURE_SLP_STMT (stmt_info))
2129 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2131 gcc_assert (ncopies >= 1);
2133 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2136 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2139 /* Is vectorizable assignment? */
2140 if (!is_gimple_assign (stmt))
2143 scalar_dest = gimple_assign_lhs (stmt);
2144 if (TREE_CODE (scalar_dest) != SSA_NAME)
2147 code = gimple_assign_rhs_code (stmt);
2148 if (gimple_assign_single_p (stmt)
2149 || code == PAREN_EXPR
2150 || CONVERT_EXPR_CODE_P (code))
2151 op = gimple_assign_rhs1 (stmt);
2155 if (code == VIEW_CONVERT_EXPR)
2156 op = TREE_OPERAND (op, 0);
2158 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2159 &def_stmt, &def, &dt[0], &vectype_in))
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "use not simple.");
2166 /* We can handle NOP_EXPR conversions that do not change the number
2167 of elements or the vector size. */
2168 if ((CONVERT_EXPR_CODE_P (code)
2169 || code == VIEW_CONVERT_EXPR)
2171 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2172 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2173 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2176 if (!vec_stmt) /* transformation not required. */
2178 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2179 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "=== vectorizable_assignment ===");
2181 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2186 if (vect_print_dump_info (REPORT_DETAILS))
2187 fprintf (vect_dump, "transform assignment.");
2190 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2193 for (j = 0; j < ncopies; j++)
2197 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2199 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2201 /* Arguments are ready. create the new vector stmt. */
2202 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2204 if (CONVERT_EXPR_CODE_P (code)
2205 || code == VIEW_CONVERT_EXPR)
2206 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2207 new_stmt = gimple_build_assign (vec_dest, vop);
2208 new_temp = make_ssa_name (vec_dest, new_stmt);
2209 gimple_assign_set_lhs (new_stmt, new_temp);
2210 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2212 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2219 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2221 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2223 prev_stmt_info = vinfo_for_stmt (new_stmt);
2226 VEC_free (tree, heap, vec_oprnds);
2231 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2232 either as shift by a scalar or by a vector. */
2235 vect_supportable_shift (enum tree_code code, tree scalar_type)
2238 enum machine_mode vec_mode;
2243 vectype = get_vectype_for_scalar_type (scalar_type);
2247 optab = optab_for_tree_code (code, vectype, optab_scalar);
2249 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2251 optab = optab_for_tree_code (code, vectype, optab_vector);
2253 || (optab_handler (optab, TYPE_MODE (vectype))
2254 == CODE_FOR_nothing))
2258 vec_mode = TYPE_MODE (vectype);
2259 icode = (int) optab_handler (optab, vec_mode);
2260 if (icode == CODE_FOR_nothing)
2267 /* Function vectorizable_shift.
2269 Check if STMT performs a shift operation that can be vectorized.
2270 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2271 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2272 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2275 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2276 gimple *vec_stmt, slp_tree slp_node)
2280 tree op0, op1 = NULL;
2281 tree vec_oprnd1 = NULL_TREE;
2282 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2284 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2285 enum tree_code code;
2286 enum machine_mode vec_mode;
2290 enum machine_mode optab_op2_mode;
2293 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2294 gimple new_stmt = NULL;
2295 stmt_vec_info prev_stmt_info;
2301 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2304 bool scalar_shift_arg = true;
2305 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2308 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2311 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2314 /* Is STMT a vectorizable binary/unary operation? */
2315 if (!is_gimple_assign (stmt))
2318 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2321 code = gimple_assign_rhs_code (stmt);
2323 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2324 || code == RROTATE_EXPR))
2327 scalar_dest = gimple_assign_lhs (stmt);
2328 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2330 op0 = gimple_assign_rhs1 (stmt);
2331 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2332 &def_stmt, &def, &dt[0], &vectype))
2334 if (vect_print_dump_info (REPORT_DETAILS))
2335 fprintf (vect_dump, "use not simple.");
2338 /* If op0 is an external or constant def use a vector type with
2339 the same size as the output vector type. */
2341 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2343 gcc_assert (vectype);
2346 if (vect_print_dump_info (REPORT_DETAILS))
2348 fprintf (vect_dump, "no vectype for scalar type ");
2349 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2355 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2356 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2357 if (nunits_out != nunits_in)
2360 op1 = gimple_assign_rhs2 (stmt);
2361 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2363 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "use not simple.");
2369 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2373 /* Multiple types in SLP are handled by creating the appropriate number of
2374 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2376 if (slp_node || PURE_SLP_STMT (stmt_info))
2379 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2381 gcc_assert (ncopies >= 1);
2383 /* Determine whether the shift amount is a vector, or scalar. If the
2384 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2386 if (dt[1] == vect_internal_def && !slp_node)
2387 scalar_shift_arg = false;
2388 else if (dt[1] == vect_constant_def
2389 || dt[1] == vect_external_def
2390 || dt[1] == vect_internal_def)
2392 /* In SLP, need to check whether the shift count is the same,
2393 in loops if it is a constant or invariant, it is always
2397 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2400 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2401 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2402 scalar_shift_arg = false;
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "operand mode requires invariant argument.");
2412 /* Vector shifted by vector. */
2413 if (!scalar_shift_arg)
2415 optab = optab_for_tree_code (code, vectype, optab_vector);
2416 if (vect_print_dump_info (REPORT_DETAILS))
2417 fprintf (vect_dump, "vector/vector shift/rotate found.");
2419 /* See if the machine has a vector shifted by scalar insn and if not
2420 then see if it has a vector shifted by vector insn. */
2423 optab = optab_for_tree_code (code, vectype, optab_scalar);
2425 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2427 if (vect_print_dump_info (REPORT_DETAILS))
2428 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2432 optab = optab_for_tree_code (code, vectype, optab_vector);
2434 && (optab_handler (optab, TYPE_MODE (vectype))
2435 != CODE_FOR_nothing))
2437 scalar_shift_arg = false;
2439 if (vect_print_dump_info (REPORT_DETAILS))
2440 fprintf (vect_dump, "vector/vector shift/rotate found.");
2442 /* Unlike the other binary operators, shifts/rotates have
2443 the rhs being int, instead of the same type as the lhs,
2444 so make sure the scalar is the right type if we are
2445 dealing with vectors of short/char. */
2446 if (dt[1] == vect_constant_def)
2447 op1 = fold_convert (TREE_TYPE (vectype), op1);
2452 /* Supportable by target? */
2455 if (vect_print_dump_info (REPORT_DETAILS))
2456 fprintf (vect_dump, "no optab.");
2459 vec_mode = TYPE_MODE (vectype);
2460 icode = (int) optab_handler (optab, vec_mode);
2461 if (icode == CODE_FOR_nothing)
2463 if (vect_print_dump_info (REPORT_DETAILS))
2464 fprintf (vect_dump, "op not supported by target.");
2465 /* Check only during analysis. */
2466 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2467 || (vf < vect_min_worthwhile_factor (code)
2470 if (vect_print_dump_info (REPORT_DETAILS))
2471 fprintf (vect_dump, "proceeding using word mode.");
2474 /* Worthwhile without SIMD support? Check only during analysis. */
2475 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2476 && vf < vect_min_worthwhile_factor (code)
2479 if (vect_print_dump_info (REPORT_DETAILS))
2480 fprintf (vect_dump, "not worthwhile without SIMD support.");
2484 if (!vec_stmt) /* transformation not required. */
2486 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2487 if (vect_print_dump_info (REPORT_DETAILS))
2488 fprintf (vect_dump, "=== vectorizable_shift ===");
2489 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2495 if (vect_print_dump_info (REPORT_DETAILS))
2496 fprintf (vect_dump, "transform binary/unary operation.");
2499 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2501 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2502 created in the previous stages of the recursion, so no allocation is
2503 needed, except for the case of shift with scalar shift argument. In that
2504 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2505 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2506 In case of loop-based vectorization we allocate VECs of size 1. We
2507 allocate VEC_OPRNDS1 only in case of binary operation. */
2510 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2511 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2513 else if (scalar_shift_arg)
2514 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2516 prev_stmt_info = NULL;
2517 for (j = 0; j < ncopies; j++)
2522 if (scalar_shift_arg)
2524 /* Vector shl and shr insn patterns can be defined with scalar
2525 operand 2 (shift operand). In this case, use constant or loop
2526 invariant op1 directly, without extending it to vector mode
2528 optab_op2_mode = insn_data[icode].operand[2].mode;
2529 if (!VECTOR_MODE_P (optab_op2_mode))
2531 if (vect_print_dump_info (REPORT_DETAILS))
2532 fprintf (vect_dump, "operand 1 using scalar mode.");
2534 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2537 /* Store vec_oprnd1 for every vector stmt to be created
2538 for SLP_NODE. We check during the analysis that all
2539 the shift arguments are the same.
2540 TODO: Allow different constants for different vector
2541 stmts generated for an SLP instance. */
2542 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2543 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2548 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2549 (a special case for certain kind of vector shifts); otherwise,
2550 operand 1 should be of a vector type (the usual case). */
2552 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2555 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2559 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2561 /* Arguments are ready. Create the new vector stmt. */
2562 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2564 vop1 = VEC_index (tree, vec_oprnds1, i);
2565 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2566 new_temp = make_ssa_name (vec_dest, new_stmt);
2567 gimple_assign_set_lhs (new_stmt, new_temp);
2568 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2570 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2577 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2580 prev_stmt_info = vinfo_for_stmt (new_stmt);
2583 VEC_free (tree, heap, vec_oprnds0);
2584 VEC_free (tree, heap, vec_oprnds1);
2590 /* Function vectorizable_operation.
2592 Check if STMT performs a binary, unary or ternary operation that can
2594 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2595 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2596 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2599 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2600 gimple *vec_stmt, slp_tree slp_node)
2604 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2607 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2608 enum tree_code code;
2609 enum machine_mode vec_mode;
2616 enum vect_def_type dt[3]
2617 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2618 gimple new_stmt = NULL;
2619 stmt_vec_info prev_stmt_info;
2625 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2626 tree vop0, vop1, vop2;
2627 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2630 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2633 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2636 /* Is STMT a vectorizable binary/unary operation? */
2637 if (!is_gimple_assign (stmt))
2640 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2643 code = gimple_assign_rhs_code (stmt);
2645 /* For pointer addition, we should use the normal plus for
2646 the vector addition. */
2647 if (code == POINTER_PLUS_EXPR)
2650 /* Support only unary or binary operations. */
2651 op_type = TREE_CODE_LENGTH (code);
2652 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2654 if (vect_print_dump_info (REPORT_DETAILS))
2655 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2660 scalar_dest = gimple_assign_lhs (stmt);
2661 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2663 op0 = gimple_assign_rhs1 (stmt);
2664 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2665 &def_stmt, &def, &dt[0], &vectype))
2667 if (vect_print_dump_info (REPORT_DETAILS))
2668 fprintf (vect_dump, "use not simple.");
2671 /* If op0 is an external or constant def use a vector type with
2672 the same size as the output vector type. */
2674 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2676 gcc_assert (vectype);
2679 if (vect_print_dump_info (REPORT_DETAILS))
2681 fprintf (vect_dump, "no vectype for scalar type ");
2682 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2688 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2689 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2690 if (nunits_out != nunits_in)
2693 if (op_type == binary_op || op_type == ternary_op)
2695 op1 = gimple_assign_rhs2 (stmt);
2696 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2699 if (vect_print_dump_info (REPORT_DETAILS))
2700 fprintf (vect_dump, "use not simple.");
2704 if (op_type == ternary_op)
2706 op2 = gimple_assign_rhs3 (stmt);
2707 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2710 if (vect_print_dump_info (REPORT_DETAILS))
2711 fprintf (vect_dump, "use not simple.");
2717 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2721 /* Multiple types in SLP are handled by creating the appropriate number of
2722 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2724 if (slp_node || PURE_SLP_STMT (stmt_info))
2727 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2729 gcc_assert (ncopies >= 1);
2731 /* Shifts are handled in vectorizable_shift (). */
2732 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2733 || code == RROTATE_EXPR)
2736 optab = optab_for_tree_code (code, vectype, optab_default);
2738 /* Supportable by target? */
2741 if (vect_print_dump_info (REPORT_DETAILS))
2742 fprintf (vect_dump, "no optab.");
2745 vec_mode = TYPE_MODE (vectype);
2746 icode = (int) optab_handler (optab, vec_mode);
2747 if (icode == CODE_FOR_nothing)
2749 if (vect_print_dump_info (REPORT_DETAILS))
2750 fprintf (vect_dump, "op not supported by target.");
2751 /* Check only during analysis. */
2752 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2753 || (vf < vect_min_worthwhile_factor (code)
2756 if (vect_print_dump_info (REPORT_DETAILS))
2757 fprintf (vect_dump, "proceeding using word mode.");
2760 /* Worthwhile without SIMD support? Check only during analysis. */
2761 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2762 && vf < vect_min_worthwhile_factor (code)
2765 if (vect_print_dump_info (REPORT_DETAILS))
2766 fprintf (vect_dump, "not worthwhile without SIMD support.");
2770 if (!vec_stmt) /* transformation not required. */
2772 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2773 if (vect_print_dump_info (REPORT_DETAILS))
2774 fprintf (vect_dump, "=== vectorizable_operation ===");
2775 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2781 if (vect_print_dump_info (REPORT_DETAILS))
2782 fprintf (vect_dump, "transform binary/unary operation.");
2785 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2787 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2788 created in the previous stages of the recursion, so no allocation is
2789 needed, except for the case of shift with scalar shift argument. In that
2790 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2791 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2792 In case of loop-based vectorization we allocate VECs of size 1. We
2793 allocate VEC_OPRNDS1 only in case of binary operation. */
2796 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2797 if (op_type == binary_op || op_type == ternary_op)
2798 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2799 if (op_type == ternary_op)
2800 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2803 /* In case the vectorization factor (VF) is bigger than the number
2804 of elements that we can fit in a vectype (nunits), we have to generate
2805 more than one vector stmt - i.e - we need to "unroll" the
2806 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2807 from one copy of the vector stmt to the next, in the field
2808 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2809 stages to find the correct vector defs to be used when vectorizing
2810 stmts that use the defs of the current stmt. The example below
2811 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2812 we need to create 4 vectorized stmts):
2814 before vectorization:
2815 RELATED_STMT VEC_STMT
2819 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2821 RELATED_STMT VEC_STMT
2822 VS1_0: vx0 = memref0 VS1_1 -
2823 VS1_1: vx1 = memref1 VS1_2 -
2824 VS1_2: vx2 = memref2 VS1_3 -
2825 VS1_3: vx3 = memref3 - -
2826 S1: x = load - VS1_0
2829 step2: vectorize stmt S2 (done here):
2830 To vectorize stmt S2 we first need to find the relevant vector
2831 def for the first operand 'x'. This is, as usual, obtained from
2832 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2833 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2834 relevant vector def 'vx0'. Having found 'vx0' we can generate
2835 the vector stmt VS2_0, and as usual, record it in the
2836 STMT_VINFO_VEC_STMT of stmt S2.
2837 When creating the second copy (VS2_1), we obtain the relevant vector
2838 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2839 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2840 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2841 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2842 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2843 chain of stmts and pointers:
2844 RELATED_STMT VEC_STMT
2845 VS1_0: vx0 = memref0 VS1_1 -
2846 VS1_1: vx1 = memref1 VS1_2 -
2847 VS1_2: vx2 = memref2 VS1_3 -
2848 VS1_3: vx3 = memref3 - -
2849 S1: x = load - VS1_0
2850 VS2_0: vz0 = vx0 + v1 VS2_1 -
2851 VS2_1: vz1 = vx1 + v1 VS2_2 -
2852 VS2_2: vz2 = vx2 + v1 VS2_3 -
2853 VS2_3: vz3 = vx3 + v1 - -
2854 S2: z = x + 1 - VS2_0 */
2856 prev_stmt_info = NULL;
2857 for (j = 0; j < ncopies; j++)
2862 if (op_type == binary_op || op_type == ternary_op)
2863 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2866 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2868 if (op_type == ternary_op)
2870 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2871 VEC_quick_push (tree, vec_oprnds2,
2872 vect_get_vec_def_for_operand (op2, stmt, NULL));
2877 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2878 if (op_type == ternary_op)
2880 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2881 VEC_quick_push (tree, vec_oprnds2,
2882 vect_get_vec_def_for_stmt_copy (dt[2],
2887 /* Arguments are ready. Create the new vector stmt. */
2888 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2890 vop1 = ((op_type == binary_op || op_type == ternary_op)
2891 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2892 vop2 = ((op_type == ternary_op)
2893 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2894 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2896 new_temp = make_ssa_name (vec_dest, new_stmt);
2897 gimple_assign_set_lhs (new_stmt, new_temp);
2898 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2900 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2907 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2909 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2910 prev_stmt_info = vinfo_for_stmt (new_stmt);
2913 VEC_free (tree, heap, vec_oprnds0);
2915 VEC_free (tree, heap, vec_oprnds1);
2917 VEC_free (tree, heap, vec_oprnds2);
2923 /* Get vectorized definitions for loop-based vectorization. For the first
2924 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2925 scalar operand), and for the rest we get a copy with
2926 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2927 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2928 The vectors are collected into VEC_OPRNDS. */
2931 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2932 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2936 /* Get first vector operand. */
2937 /* All the vector operands except the very first one (that is scalar oprnd)
2939 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2940 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2942 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2944 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2946 /* Get second vector operand. */
2947 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2948 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2952 /* For conversion in multiple steps, continue to get operands
2955 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2959 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2960 For multi-step conversions store the resulting vectors and call the function
2964 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2965 int multi_step_cvt, gimple stmt,
2966 VEC (tree, heap) *vec_dsts,
2967 gimple_stmt_iterator *gsi,
2968 slp_tree slp_node, enum tree_code code,
2969 stmt_vec_info *prev_stmt_info)
2972 tree vop0, vop1, new_tmp, vec_dest;
2974 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2976 vec_dest = VEC_pop (tree, vec_dsts);
2978 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2980 /* Create demotion operation. */
2981 vop0 = VEC_index (tree, *vec_oprnds, i);
2982 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2983 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2984 new_tmp = make_ssa_name (vec_dest, new_stmt);
2985 gimple_assign_set_lhs (new_stmt, new_tmp);
2986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2989 /* Store the resulting vector for next recursive call. */
2990 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2993 /* This is the last step of the conversion sequence. Store the
2994 vectors in SLP_NODE or in vector info of the scalar statement
2995 (or in STMT_VINFO_RELATED_STMT chain). */
2997 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3000 if (!*prev_stmt_info)
3001 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3003 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3005 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3010 /* For multi-step demotion operations we first generate demotion operations
3011 from the source type to the intermediate types, and then combine the
3012 results (stored in VEC_OPRNDS) in demotion operation to the destination
3016 /* At each level of recursion we have have of the operands we had at the
3018 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3019 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3020 stmt, vec_dsts, gsi, slp_node,
3021 code, prev_stmt_info);
3026 /* Function vectorizable_type_demotion
3028 Check if STMT performs a binary or unary operation that involves
3029 type demotion, and if it can be vectorized.
3030 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3031 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3032 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3035 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3036 gimple *vec_stmt, slp_tree slp_node)
3041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3043 enum tree_code code, code1 = ERROR_MARK;
3046 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3047 stmt_vec_info prev_stmt_info;
3054 int multi_step_cvt = 0;
3055 VEC (tree, heap) *vec_oprnds0 = NULL;
3056 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3057 tree last_oprnd, intermediate_type;
3058 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3060 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3063 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3066 /* Is STMT a vectorizable type-demotion operation? */
3067 if (!is_gimple_assign (stmt))
3070 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3073 code = gimple_assign_rhs_code (stmt);
3074 if (!CONVERT_EXPR_CODE_P (code))
3077 scalar_dest = gimple_assign_lhs (stmt);
3078 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3080 /* Check the operands of the operation. */
3081 op0 = gimple_assign_rhs1 (stmt);
3082 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3083 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3084 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3085 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3086 && CONVERT_EXPR_CODE_P (code))))
3088 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3089 &def_stmt, &def, &dt[0], &vectype_in))
3091 if (vect_print_dump_info (REPORT_DETAILS))
3092 fprintf (vect_dump, "use not simple.");
3095 /* If op0 is an external def use a vector type with the
3096 same size as the output vector type if possible. */
3098 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3100 gcc_assert (vectype_in);
3103 if (vect_print_dump_info (REPORT_DETAILS))
3105 fprintf (vect_dump, "no vectype for scalar type ");
3106 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3112 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3113 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3114 if (nunits_in >= nunits_out)
3117 /* Multiple types in SLP are handled by creating the appropriate number of
3118 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3120 if (slp_node || PURE_SLP_STMT (stmt_info))
3123 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3124 gcc_assert (ncopies >= 1);
3126 /* Supportable by target? */
3127 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3128 &code1, &multi_step_cvt, &interm_types))
3131 if (!vec_stmt) /* transformation not required. */
3133 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3134 if (vect_print_dump_info (REPORT_DETAILS))
3135 fprintf (vect_dump, "=== vectorizable_demotion ===");
3136 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3141 if (vect_print_dump_info (REPORT_DETAILS))
3142 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3145 /* In case of multi-step demotion, we first generate demotion operations to
3146 the intermediate types, and then from that types to the final one.
3147 We create vector destinations for the intermediate type (TYPES) received
3148 from supportable_narrowing_operation, and store them in the correct order
3149 for future use in vect_create_vectorized_demotion_stmts(). */
3151 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3153 vec_dsts = VEC_alloc (tree, heap, 1);
3155 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3156 VEC_quick_push (tree, vec_dsts, vec_dest);
3160 for (i = VEC_length (tree, interm_types) - 1;
3161 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3163 vec_dest = vect_create_destination_var (scalar_dest,
3165 VEC_quick_push (tree, vec_dsts, vec_dest);
3169 /* In case the vectorization factor (VF) is bigger than the number
3170 of elements that we can fit in a vectype (nunits), we have to generate
3171 more than one vector stmt - i.e - we need to "unroll" the
3172 vector stmt by a factor VF/nunits. */
3174 prev_stmt_info = NULL;
3175 for (j = 0; j < ncopies; j++)
3179 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3182 VEC_free (tree, heap, vec_oprnds0);
3183 vec_oprnds0 = VEC_alloc (tree, heap,
3184 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3185 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3186 vect_pow2 (multi_step_cvt) - 1);
3189 /* Arguments are ready. Create the new vector stmts. */
3190 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3191 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3192 multi_step_cvt, stmt, tmp_vec_dsts,
3193 gsi, slp_node, code1,
3197 VEC_free (tree, heap, vec_oprnds0);
3198 VEC_free (tree, heap, vec_dsts);
3199 VEC_free (tree, heap, tmp_vec_dsts);
3200 VEC_free (tree, heap, interm_types);
3202 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3207 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3208 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3209 the resulting vectors and call the function recursively. */
3212 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3213 VEC (tree, heap) **vec_oprnds1,
3214 int multi_step_cvt, gimple stmt,
3215 VEC (tree, heap) *vec_dsts,
3216 gimple_stmt_iterator *gsi,
3217 slp_tree slp_node, enum tree_code code1,
3218 enum tree_code code2, tree decl1,
3219 tree decl2, int op_type,
3220 stmt_vec_info *prev_stmt_info)
3223 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3224 gimple new_stmt1, new_stmt2;
3225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3226 VEC (tree, heap) *vec_tmp;
3228 vec_dest = VEC_pop (tree, vec_dsts);
3229 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3231 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3233 if (op_type == binary_op)
3234 vop1 = VEC_index (tree, *vec_oprnds1, i);
3238 /* Generate the two halves of promotion operation. */
3239 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3240 op_type, vec_dest, gsi, stmt);
3241 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3242 op_type, vec_dest, gsi, stmt);
3243 if (is_gimple_call (new_stmt1))
3245 new_tmp1 = gimple_call_lhs (new_stmt1);
3246 new_tmp2 = gimple_call_lhs (new_stmt2);
3250 new_tmp1 = gimple_assign_lhs (new_stmt1);
3251 new_tmp2 = gimple_assign_lhs (new_stmt2);
3256 /* Store the results for the recursive call. */
3257 VEC_quick_push (tree, vec_tmp, new_tmp1);
3258 VEC_quick_push (tree, vec_tmp, new_tmp2);
3262 /* Last step of promotion sequience - store the results. */
3265 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3266 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3270 if (!*prev_stmt_info)
3271 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3273 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3275 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3276 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3277 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3284 /* For multi-step promotion operation we first generate we call the
3285 function recurcively for every stage. We start from the input type,
3286 create promotion operations to the intermediate types, and then
3287 create promotions to the output type. */
3288 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3289 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3290 multi_step_cvt - 1, stmt,
3291 vec_dsts, gsi, slp_node, code1,
3292 code2, decl2, decl2, op_type,
3296 VEC_free (tree, heap, vec_tmp);
3300 /* Function vectorizable_type_promotion
3302 Check if STMT performs a binary or unary operation that involves
3303 type promotion, and if it can be vectorized.
3304 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3305 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3306 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3309 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3310 gimple *vec_stmt, slp_tree slp_node)
3314 tree op0, op1 = NULL;
3315 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3316 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3317 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3318 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3319 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3323 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3324 stmt_vec_info prev_stmt_info;
3331 tree intermediate_type = NULL_TREE;
3332 int multi_step_cvt = 0;
3333 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3334 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3335 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3338 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3341 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3344 /* Is STMT a vectorizable type-promotion operation? */
3345 if (!is_gimple_assign (stmt))
3348 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3351 code = gimple_assign_rhs_code (stmt);
3352 if (!CONVERT_EXPR_CODE_P (code)
3353 && code != WIDEN_MULT_EXPR
3354 && code != WIDEN_LSHIFT_EXPR)
3357 scalar_dest = gimple_assign_lhs (stmt);
3358 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3360 /* Check the operands of the operation. */
3361 op0 = gimple_assign_rhs1 (stmt);
3362 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3363 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3364 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3365 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3366 && CONVERT_EXPR_CODE_P (code))))
3368 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3369 &def_stmt, &def, &dt[0], &vectype_in))
3371 if (vect_print_dump_info (REPORT_DETAILS))
3372 fprintf (vect_dump, "use not simple.");
3376 op_type = TREE_CODE_LENGTH (code);
3377 if (op_type == binary_op)
3381 op1 = gimple_assign_rhs2 (stmt);
3382 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3384 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3386 if (CONSTANT_CLASS_P (op0))
3387 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3388 &def_stmt, &def, &dt[1], &vectype_in);
3390 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3395 if (vect_print_dump_info (REPORT_DETAILS))
3396 fprintf (vect_dump, "use not simple.");
3402 /* If op0 is an external or constant def use a vector type with
3403 the same size as the output vector type. */
3405 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3407 gcc_assert (vectype_in);
3410 if (vect_print_dump_info (REPORT_DETAILS))
3412 fprintf (vect_dump, "no vectype for scalar type ");
3413 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3419 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3420 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3421 if (nunits_in <= nunits_out)
3424 /* Multiple types in SLP are handled by creating the appropriate number of
3425 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3427 if (slp_node || PURE_SLP_STMT (stmt_info))
3430 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3432 gcc_assert (ncopies >= 1);
3434 /* Supportable by target? */
3435 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3436 &decl1, &decl2, &code1, &code2,
3437 &multi_step_cvt, &interm_types))
3440 /* Binary widening operation can only be supported directly by the
3442 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3444 if (!vec_stmt) /* transformation not required. */
3446 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3447 if (vect_print_dump_info (REPORT_DETAILS))
3448 fprintf (vect_dump, "=== vectorizable_promotion ===");
3449 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3455 if (vect_print_dump_info (REPORT_DETAILS))
3456 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3459 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3461 if (CONSTANT_CLASS_P (op0))
3462 op0 = fold_convert (TREE_TYPE (op1), op0);
3463 else if (CONSTANT_CLASS_P (op1))
3464 op1 = fold_convert (TREE_TYPE (op0), op1);
3468 /* In case of multi-step promotion, we first generate promotion operations
3469 to the intermediate types, and then from that types to the final one.
3470 We store vector destination in VEC_DSTS in the correct order for
3471 recursive creation of promotion operations in
3472 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3473 according to TYPES recieved from supportable_widening_operation(). */
3475 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3477 vec_dsts = VEC_alloc (tree, heap, 1);
3479 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3480 VEC_quick_push (tree, vec_dsts, vec_dest);
3484 for (i = VEC_length (tree, interm_types) - 1;
3485 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3487 vec_dest = vect_create_destination_var (scalar_dest,
3489 VEC_quick_push (tree, vec_dsts, vec_dest);
3495 vec_oprnds0 = VEC_alloc (tree, heap,
3496 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3497 if (op_type == binary_op)
3498 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3500 else if (code == WIDEN_LSHIFT_EXPR)
3501 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3503 /* In case the vectorization factor (VF) is bigger than the number
3504 of elements that we can fit in a vectype (nunits), we have to generate
3505 more than one vector stmt - i.e - we need to "unroll" the
3506 vector stmt by a factor VF/nunits. */
3508 prev_stmt_info = NULL;
3509 for (j = 0; j < ncopies; j++)
3516 if (code == WIDEN_LSHIFT_EXPR)
3519 /* Store vec_oprnd1 for every vector stmt to be created
3520 for SLP_NODE. We check during the analysis that all
3521 the shift arguments are the same. */
3522 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3523 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3525 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
3529 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3534 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3535 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3536 if (op_type == binary_op)
3538 if (code == WIDEN_LSHIFT_EXPR)
3541 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3542 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3548 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3549 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3550 if (op_type == binary_op)
3552 if (code == WIDEN_LSHIFT_EXPR)
3555 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3556 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3560 /* Arguments are ready. Create the new vector stmts. */
3561 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3562 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3563 multi_step_cvt, stmt,
3565 gsi, slp_node, code1, code2,
3566 decl1, decl2, op_type,
3570 VEC_free (tree, heap, vec_dsts);
3571 VEC_free (tree, heap, tmp_vec_dsts);
3572 VEC_free (tree, heap, interm_types);
3573 VEC_free (tree, heap, vec_oprnds0);
3574 VEC_free (tree, heap, vec_oprnds1);
3576 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3581 /* Function vectorizable_store.
3583 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3585 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3586 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3587 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3590 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3596 tree vec_oprnd = NULL_TREE;
3597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3598 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3599 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3601 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3602 struct loop *loop = NULL;
3603 enum machine_mode vec_mode;
3605 enum dr_alignment_support alignment_support_scheme;
3608 enum vect_def_type dt;
3609 stmt_vec_info prev_stmt_info = NULL;
3610 tree dataref_ptr = NULL_TREE;
3611 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3614 gimple next_stmt, first_stmt = NULL;
3615 bool strided_store = false;
3616 bool store_lanes_p = false;
3617 unsigned int group_size, i;
3618 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3620 VEC(tree,heap) *vec_oprnds = NULL;
3621 bool slp = (slp_node != NULL);
3622 unsigned int vec_num;
3623 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3627 loop = LOOP_VINFO_LOOP (loop_vinfo);
3629 /* Multiple types in SLP are handled by creating the appropriate number of
3630 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3632 if (slp || PURE_SLP_STMT (stmt_info))
3635 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3637 gcc_assert (ncopies >= 1);
3639 /* FORNOW. This restriction should be relaxed. */
3640 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3642 if (vect_print_dump_info (REPORT_DETAILS))
3643 fprintf (vect_dump, "multiple types in nested loop.");
3647 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3650 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3653 /* Is vectorizable store? */
3655 if (!is_gimple_assign (stmt))
3658 scalar_dest = gimple_assign_lhs (stmt);
3659 if (TREE_CODE (scalar_dest) != ARRAY_REF
3660 && TREE_CODE (scalar_dest) != INDIRECT_REF
3661 && TREE_CODE (scalar_dest) != COMPONENT_REF
3662 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3663 && TREE_CODE (scalar_dest) != REALPART_EXPR
3664 && TREE_CODE (scalar_dest) != MEM_REF)
3667 gcc_assert (gimple_assign_single_p (stmt));
3668 op = gimple_assign_rhs1 (stmt);
3669 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3671 if (vect_print_dump_info (REPORT_DETAILS))
3672 fprintf (vect_dump, "use not simple.");
3676 /* The scalar rhs type needs to be trivially convertible to the vector
3677 component type. This should always be the case. */
3678 elem_type = TREE_TYPE (vectype);
3679 if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
3681 if (vect_print_dump_info (REPORT_DETAILS))
3682 fprintf (vect_dump, "??? operands of different types");
3686 vec_mode = TYPE_MODE (vectype);
3687 /* FORNOW. In some cases can vectorize even if data-type not supported
3688 (e.g. - array initialization with 0). */
3689 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3692 if (!STMT_VINFO_DATA_REF (stmt_info))
3695 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3697 if (vect_print_dump_info (REPORT_DETAILS))
3698 fprintf (vect_dump, "negative step for store.");
3702 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3704 strided_store = true;
3705 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3706 if (!slp && !PURE_SLP_STMT (stmt_info))
3708 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3709 if (vect_store_lanes_supported (vectype, group_size))
3710 store_lanes_p = true;
3711 else if (!vect_strided_store_supported (vectype, group_size))
3715 if (first_stmt == stmt)
3717 /* STMT is the leader of the group. Check the operands of all the
3718 stmts of the group. */
3719 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3722 gcc_assert (gimple_assign_single_p (next_stmt));
3723 op = gimple_assign_rhs1 (next_stmt);
3724 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3727 if (vect_print_dump_info (REPORT_DETAILS))
3728 fprintf (vect_dump, "use not simple.");
3731 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3736 if (!vec_stmt) /* transformation not required. */
3738 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3739 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3747 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3748 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3750 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3753 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3755 /* We vectorize all the stmts of the interleaving group when we
3756 reach the last stmt in the group. */
3757 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3758 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3767 strided_store = false;
3768 /* VEC_NUM is the number of vect stmts to be created for this
3770 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3771 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3772 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3775 /* VEC_NUM is the number of vect stmts to be created for this
3777 vec_num = group_size;
3783 group_size = vec_num = 1;
3786 if (vect_print_dump_info (REPORT_DETAILS))
3787 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3789 dr_chain = VEC_alloc (tree, heap, group_size);
3790 oprnds = VEC_alloc (tree, heap, group_size);
3792 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3793 gcc_assert (alignment_support_scheme);
3794 /* Targets with store-lane instructions must not require explicit
3796 gcc_assert (!store_lanes_p
3797 || alignment_support_scheme == dr_aligned
3798 || alignment_support_scheme == dr_unaligned_supported);
3801 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3803 aggr_type = vectype;
3805 /* In case the vectorization factor (VF) is bigger than the number
3806 of elements that we can fit in a vectype (nunits), we have to generate
3807 more than one vector stmt - i.e - we need to "unroll" the
3808 vector stmt by a factor VF/nunits. For more details see documentation in
3809 vect_get_vec_def_for_copy_stmt. */
3811 /* In case of interleaving (non-unit strided access):
3818 We create vectorized stores starting from base address (the access of the
3819 first stmt in the chain (S2 in the above example), when the last store stmt
3820 of the chain (S4) is reached:
3823 VS2: &base + vec_size*1 = vx0
3824 VS3: &base + vec_size*2 = vx1
3825 VS4: &base + vec_size*3 = vx3
3827 Then permutation statements are generated:
3829 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3830 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3833 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3834 (the order of the data-refs in the output of vect_permute_store_chain
3835 corresponds to the order of scalar stmts in the interleaving chain - see
3836 the documentation of vect_permute_store_chain()).
3838 In case of both multiple types and interleaving, above vector stores and
3839 permutation stmts are created for every copy. The result vector stmts are
3840 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3841 STMT_VINFO_RELATED_STMT for the next copies.
3844 prev_stmt_info = NULL;
3845 for (j = 0; j < ncopies; j++)
3854 /* Get vectorized arguments for SLP_NODE. */
3855 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3858 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3862 /* For interleaved stores we collect vectorized defs for all the
3863 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3864 used as an input to vect_permute_store_chain(), and OPRNDS as
3865 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3867 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3868 OPRNDS are of size 1. */
3869 next_stmt = first_stmt;
3870 for (i = 0; i < group_size; i++)
3872 /* Since gaps are not supported for interleaved stores,
3873 GROUP_SIZE is the exact number of stmts in the chain.
3874 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3875 there is no interleaving, GROUP_SIZE is 1, and only one
3876 iteration of the loop will be executed. */
3877 gcc_assert (next_stmt
3878 && gimple_assign_single_p (next_stmt));
3879 op = gimple_assign_rhs1 (next_stmt);
3881 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3883 VEC_quick_push(tree, dr_chain, vec_oprnd);
3884 VEC_quick_push(tree, oprnds, vec_oprnd);
3885 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3889 /* We should have catched mismatched types earlier. */
3890 gcc_assert (useless_type_conversion_p (vectype,
3891 TREE_TYPE (vec_oprnd)));
3892 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3893 NULL_TREE, &dummy, gsi,
3894 &ptr_incr, false, &inv_p);
3895 gcc_assert (bb_vinfo || !inv_p);
3899 /* For interleaved stores we created vectorized defs for all the
3900 defs stored in OPRNDS in the previous iteration (previous copy).
3901 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3902 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3904 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3905 OPRNDS are of size 1. */
3906 for (i = 0; i < group_size; i++)
3908 op = VEC_index (tree, oprnds, i);
3909 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3911 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3912 VEC_replace(tree, dr_chain, i, vec_oprnd);
3913 VEC_replace(tree, oprnds, i, vec_oprnd);
3915 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3916 TYPE_SIZE_UNIT (aggr_type));
3923 /* Combine all the vectors into an array. */
3924 vec_array = create_vector_array (vectype, vec_num);
3925 for (i = 0; i < vec_num; i++)
3927 vec_oprnd = VEC_index (tree, dr_chain, i);
3928 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3932 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3933 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3934 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3935 gimple_call_set_lhs (new_stmt, data_ref);
3936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3937 mark_symbols_for_renaming (new_stmt);
3944 result_chain = VEC_alloc (tree, heap, group_size);
3946 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3950 next_stmt = first_stmt;
3951 for (i = 0; i < vec_num; i++)
3953 struct ptr_info_def *pi;
3956 /* Bump the vector pointer. */
3957 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3961 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3962 else if (strided_store)
3963 /* For strided stores vectorized defs are interleaved in
3964 vect_permute_store_chain(). */
3965 vec_oprnd = VEC_index (tree, result_chain, i);
3967 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3968 build_int_cst (reference_alias_ptr_type
3969 (DR_REF (first_dr)), 0));
3970 pi = get_ptr_info (dataref_ptr);
3971 pi->align = TYPE_ALIGN_UNIT (vectype);
3972 if (aligned_access_p (first_dr))
3974 else if (DR_MISALIGNMENT (first_dr) == -1)
3976 TREE_TYPE (data_ref)
3977 = build_aligned_type (TREE_TYPE (data_ref),
3978 TYPE_ALIGN (elem_type));
3979 pi->align = TYPE_ALIGN_UNIT (elem_type);
3984 TREE_TYPE (data_ref)
3985 = build_aligned_type (TREE_TYPE (data_ref),
3986 TYPE_ALIGN (elem_type));
3987 pi->misalign = DR_MISALIGNMENT (first_dr);
3990 /* Arguments are ready. Create the new vector stmt. */
3991 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3993 mark_symbols_for_renaming (new_stmt);
3998 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4006 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4008 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4009 prev_stmt_info = vinfo_for_stmt (new_stmt);
4013 VEC_free (tree, heap, dr_chain);
4014 VEC_free (tree, heap, oprnds);
4016 VEC_free (tree, heap, result_chain);
4018 VEC_free (tree, heap, vec_oprnds);
4023 /* Given a vector type VECTYPE returns a builtin DECL to be used
4024 for vector permutation and returns the mask that implements
4025 reversal of the vector elements. If that is impossible to do,
4029 perm_mask_for_reverse (tree vectype)
4031 tree mask_element_type, mask_type, mask_vec = NULL;
4034 if (!can_vec_perm_expr_p (vectype, NULL_TREE))
4038 = lang_hooks.types.type_for_size
4039 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4040 mask_type = get_vectype_for_scalar_type (mask_element_type);
4041 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4043 for (i = 0; i < nunits; i++)
4044 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
4045 mask_vec = build_vector (mask_type, mask_vec);
4047 if (!can_vec_perm_expr_p (vectype, mask_vec))
4053 /* Given a vector variable X, that was generated for the scalar LHS of
4054 STMT, generate instructions to reverse the vector elements of X,
4055 insert them a *GSI and return the permuted vector variable. */
4058 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4060 tree vectype = TREE_TYPE (x);
4061 tree mask_vec, perm_dest, data_ref;
4064 mask_vec = perm_mask_for_reverse (vectype);
4066 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4068 /* Generate the permute statement. */
4069 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4071 data_ref = make_ssa_name (perm_dest, perm_stmt);
4072 gimple_set_lhs (perm_stmt, data_ref);
4073 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4078 /* vectorizable_load.
4080 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4082 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4083 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4084 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4087 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4088 slp_tree slp_node, slp_instance slp_node_instance)
4091 tree vec_dest = NULL;
4092 tree data_ref = NULL;
4093 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4094 stmt_vec_info prev_stmt_info;
4095 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4096 struct loop *loop = NULL;
4097 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4098 bool nested_in_vect_loop = false;
4099 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4100 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4103 enum machine_mode mode;
4104 gimple new_stmt = NULL;
4106 enum dr_alignment_support alignment_support_scheme;
4107 tree dataref_ptr = NULL_TREE;
4109 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4111 int i, j, group_size;
4112 tree msq = NULL_TREE, lsq;
4113 tree offset = NULL_TREE;
4114 tree realignment_token = NULL_TREE;
4116 VEC(tree,heap) *dr_chain = NULL;
4117 bool strided_load = false;
4118 bool load_lanes_p = false;
4123 bool compute_in_loop = false;
4124 struct loop *at_loop;
4126 bool slp = (slp_node != NULL);
4127 bool slp_perm = false;
4128 enum tree_code code;
4129 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4135 loop = LOOP_VINFO_LOOP (loop_vinfo);
4136 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4137 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4142 /* Multiple types in SLP are handled by creating the appropriate number of
4143 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4145 if (slp || PURE_SLP_STMT (stmt_info))
4148 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4150 gcc_assert (ncopies >= 1);
4152 /* FORNOW. This restriction should be relaxed. */
4153 if (nested_in_vect_loop && ncopies > 1)
4155 if (vect_print_dump_info (REPORT_DETAILS))
4156 fprintf (vect_dump, "multiple types in nested loop.");
4160 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4163 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4166 /* Is vectorizable load? */
4167 if (!is_gimple_assign (stmt))
4170 scalar_dest = gimple_assign_lhs (stmt);
4171 if (TREE_CODE (scalar_dest) != SSA_NAME)
4174 code = gimple_assign_rhs_code (stmt);
4175 if (code != ARRAY_REF
4176 && code != INDIRECT_REF
4177 && code != COMPONENT_REF
4178 && code != IMAGPART_EXPR
4179 && code != REALPART_EXPR
4181 && TREE_CODE_CLASS (code) != tcc_declaration)
4184 if (!STMT_VINFO_DATA_REF (stmt_info))
4187 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4188 if (negative && ncopies > 1)
4190 if (vect_print_dump_info (REPORT_DETAILS))
4191 fprintf (vect_dump, "multiple types with negative step.");
4195 scalar_type = TREE_TYPE (DR_REF (dr));
4196 mode = TYPE_MODE (vectype);
4198 /* FORNOW. In some cases can vectorize even if data-type not supported
4199 (e.g. - data copies). */
4200 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4202 if (vect_print_dump_info (REPORT_DETAILS))
4203 fprintf (vect_dump, "Aligned load, but unsupported type.");
4207 /* The vector component type needs to be trivially convertible to the
4208 scalar lhs. This should always be the case. */
4209 elem_type = TREE_TYPE (vectype);
4210 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
4212 if (vect_print_dump_info (REPORT_DETAILS))
4213 fprintf (vect_dump, "??? operands of different types");
4217 /* Check if the load is a part of an interleaving chain. */
4218 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4220 strided_load = true;
4222 gcc_assert (! nested_in_vect_loop);
4224 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4225 if (!slp && !PURE_SLP_STMT (stmt_info))
4227 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4228 if (vect_load_lanes_supported (vectype, group_size))
4229 load_lanes_p = true;
4230 else if (!vect_strided_load_supported (vectype, group_size))
4237 gcc_assert (!strided_load);
4238 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4239 if (alignment_support_scheme != dr_aligned
4240 && alignment_support_scheme != dr_unaligned_supported)
4242 if (vect_print_dump_info (REPORT_DETAILS))
4243 fprintf (vect_dump, "negative step but alignment required.");
4246 if (!perm_mask_for_reverse (vectype))
4248 if (vect_print_dump_info (REPORT_DETAILS))
4249 fprintf (vect_dump, "negative step and reversing not supported.");
4254 if (!vec_stmt) /* transformation not required. */
4256 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4257 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4261 if (vect_print_dump_info (REPORT_DETAILS))
4262 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4268 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4270 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4271 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4272 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4274 /* Check if the chain of loads is already vectorized. */
4275 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4277 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4280 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4281 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4283 /* VEC_NUM is the number of vect stmts to be created for this group. */
4286 strided_load = false;
4287 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4288 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4292 vec_num = group_size;
4298 group_size = vec_num = 1;
4301 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4302 gcc_assert (alignment_support_scheme);
4303 /* Targets with load-lane instructions must not require explicit
4305 gcc_assert (!load_lanes_p
4306 || alignment_support_scheme == dr_aligned
4307 || alignment_support_scheme == dr_unaligned_supported);
4309 /* In case the vectorization factor (VF) is bigger than the number
4310 of elements that we can fit in a vectype (nunits), we have to generate
4311 more than one vector stmt - i.e - we need to "unroll" the
4312 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4313 from one copy of the vector stmt to the next, in the field
4314 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4315 stages to find the correct vector defs to be used when vectorizing
4316 stmts that use the defs of the current stmt. The example below
4317 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4318 need to create 4 vectorized stmts):
4320 before vectorization:
4321 RELATED_STMT VEC_STMT
4325 step 1: vectorize stmt S1:
4326 We first create the vector stmt VS1_0, and, as usual, record a
4327 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4328 Next, we create the vector stmt VS1_1, and record a pointer to
4329 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4330 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4332 RELATED_STMT VEC_STMT
4333 VS1_0: vx0 = memref0 VS1_1 -
4334 VS1_1: vx1 = memref1 VS1_2 -
4335 VS1_2: vx2 = memref2 VS1_3 -
4336 VS1_3: vx3 = memref3 - -
4337 S1: x = load - VS1_0
4340 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4341 information we recorded in RELATED_STMT field is used to vectorize
4344 /* In case of interleaving (non-unit strided access):
4351 Vectorized loads are created in the order of memory accesses
4352 starting from the access of the first stmt of the chain:
4355 VS2: vx1 = &base + vec_size*1
4356 VS3: vx3 = &base + vec_size*2
4357 VS4: vx4 = &base + vec_size*3
4359 Then permutation statements are generated:
4361 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4362 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4365 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4366 (the order of the data-refs in the output of vect_permute_load_chain
4367 corresponds to the order of scalar stmts in the interleaving chain - see
4368 the documentation of vect_permute_load_chain()).
4369 The generation of permutation stmts and recording them in
4370 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4372 In case of both multiple types and interleaving, the vector loads and
4373 permutation stmts above are created for every copy. The result vector
4374 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4375 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4377 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4378 on a target that supports unaligned accesses (dr_unaligned_supported)
4379 we generate the following code:
4383 p = p + indx * vectype_size;
4388 Otherwise, the data reference is potentially unaligned on a target that
4389 does not support unaligned accesses (dr_explicit_realign_optimized) -
4390 then generate the following code, in which the data in each iteration is
4391 obtained by two vector loads, one from the previous iteration, and one
4392 from the current iteration:
4394 msq_init = *(floor(p1))
4395 p2 = initial_addr + VS - 1;
4396 realignment_token = call target_builtin;
4399 p2 = p2 + indx * vectype_size
4401 vec_dest = realign_load (msq, lsq, realignment_token)
4406 /* If the misalignment remains the same throughout the execution of the
4407 loop, we can create the init_addr and permutation mask at the loop
4408 preheader. Otherwise, it needs to be created inside the loop.
4409 This can only occur when vectorizing memory accesses in the inner-loop
4410 nested within an outer-loop that is being vectorized. */
4412 if (loop && nested_in_vect_loop_p (loop, stmt)
4413 && (TREE_INT_CST_LOW (DR_STEP (dr))
4414 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4416 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4417 compute_in_loop = true;
4420 if ((alignment_support_scheme == dr_explicit_realign_optimized
4421 || alignment_support_scheme == dr_explicit_realign)
4422 && !compute_in_loop)
4424 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4425 alignment_support_scheme, NULL_TREE,
4427 if (alignment_support_scheme == dr_explicit_realign_optimized)
4429 phi = SSA_NAME_DEF_STMT (msq);
4430 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4437 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4440 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4442 aggr_type = vectype;
4444 prev_stmt_info = NULL;
4445 for (j = 0; j < ncopies; j++)
4447 /* 1. Create the vector or array pointer update chain. */
4449 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4450 offset, &dummy, gsi,
4451 &ptr_incr, false, &inv_p);
4453 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4454 TYPE_SIZE_UNIT (aggr_type));
4456 if (strided_load || slp_perm)
4457 dr_chain = VEC_alloc (tree, heap, vec_num);
4463 vec_array = create_vector_array (vectype, vec_num);
4466 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4467 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4468 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4469 gimple_call_set_lhs (new_stmt, vec_array);
4470 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4471 mark_symbols_for_renaming (new_stmt);
4473 /* Extract each vector into an SSA_NAME. */
4474 for (i = 0; i < vec_num; i++)
4476 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4478 VEC_quick_push (tree, dr_chain, new_temp);
4481 /* Record the mapping between SSA_NAMEs and statements. */
4482 vect_record_strided_load_vectors (stmt, dr_chain);
4486 for (i = 0; i < vec_num; i++)
4489 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4492 /* 2. Create the vector-load in the loop. */
4493 switch (alignment_support_scheme)
4496 case dr_unaligned_supported:
4498 struct ptr_info_def *pi;
4500 = build2 (MEM_REF, vectype, dataref_ptr,
4501 build_int_cst (reference_alias_ptr_type
4502 (DR_REF (first_dr)), 0));
4503 pi = get_ptr_info (dataref_ptr);
4504 pi->align = TYPE_ALIGN_UNIT (vectype);
4505 if (alignment_support_scheme == dr_aligned)
4507 gcc_assert (aligned_access_p (first_dr));
4510 else if (DR_MISALIGNMENT (first_dr) == -1)
4512 TREE_TYPE (data_ref)
4513 = build_aligned_type (TREE_TYPE (data_ref),
4514 TYPE_ALIGN (elem_type));
4515 pi->align = TYPE_ALIGN_UNIT (elem_type);
4520 TREE_TYPE (data_ref)
4521 = build_aligned_type (TREE_TYPE (data_ref),
4522 TYPE_ALIGN (elem_type));
4523 pi->misalign = DR_MISALIGNMENT (first_dr);
4527 case dr_explicit_realign:
4532 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4534 if (compute_in_loop)
4535 msq = vect_setup_realignment (first_stmt, gsi,
4537 dr_explicit_realign,
4540 new_stmt = gimple_build_assign_with_ops
4541 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4543 (TREE_TYPE (dataref_ptr),
4544 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4545 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4546 gimple_assign_set_lhs (new_stmt, ptr);
4547 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4549 = build2 (MEM_REF, vectype, ptr,
4550 build_int_cst (reference_alias_ptr_type
4551 (DR_REF (first_dr)), 0));
4552 vec_dest = vect_create_destination_var (scalar_dest,
4554 new_stmt = gimple_build_assign (vec_dest, data_ref);
4555 new_temp = make_ssa_name (vec_dest, new_stmt);
4556 gimple_assign_set_lhs (new_stmt, new_temp);
4557 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4558 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4559 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4562 bump = size_binop (MULT_EXPR, vs_minus_1,
4563 TYPE_SIZE_UNIT (scalar_type));
4564 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4565 new_stmt = gimple_build_assign_with_ops
4566 (BIT_AND_EXPR, NULL_TREE, ptr,
4569 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4570 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4571 gimple_assign_set_lhs (new_stmt, ptr);
4572 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4574 = build2 (MEM_REF, vectype, ptr,
4575 build_int_cst (reference_alias_ptr_type
4576 (DR_REF (first_dr)), 0));
4579 case dr_explicit_realign_optimized:
4580 new_stmt = gimple_build_assign_with_ops
4581 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4583 (TREE_TYPE (dataref_ptr),
4584 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4585 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4587 gimple_assign_set_lhs (new_stmt, new_temp);
4588 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4590 = build2 (MEM_REF, vectype, new_temp,
4591 build_int_cst (reference_alias_ptr_type
4592 (DR_REF (first_dr)), 0));
4597 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4598 new_stmt = gimple_build_assign (vec_dest, data_ref);
4599 new_temp = make_ssa_name (vec_dest, new_stmt);
4600 gimple_assign_set_lhs (new_stmt, new_temp);
4601 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4602 mark_symbols_for_renaming (new_stmt);
4604 /* 3. Handle explicit realignment if necessary/supported.
4606 vec_dest = realign_load (msq, lsq, realignment_token) */
4607 if (alignment_support_scheme == dr_explicit_realign_optimized
4608 || alignment_support_scheme == dr_explicit_realign)
4610 lsq = gimple_assign_lhs (new_stmt);
4611 if (!realignment_token)
4612 realignment_token = dataref_ptr;
4613 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4615 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4618 new_temp = make_ssa_name (vec_dest, new_stmt);
4619 gimple_assign_set_lhs (new_stmt, new_temp);
4620 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4622 if (alignment_support_scheme == dr_explicit_realign_optimized)
4625 if (i == vec_num - 1 && j == ncopies - 1)
4626 add_phi_arg (phi, lsq,
4627 loop_latch_edge (containing_loop),
4633 /* 4. Handle invariant-load. */
4634 if (inv_p && !bb_vinfo)
4637 gimple_stmt_iterator gsi2 = *gsi;
4638 gcc_assert (!strided_load);
4640 vec_inv = build_vector_from_val (vectype, scalar_dest);
4641 new_temp = vect_init_vector (stmt, vec_inv,
4643 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4648 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4649 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4652 /* Collect vector loads and later create their permutation in
4653 vect_transform_strided_load (). */
4654 if (strided_load || slp_perm)
4655 VEC_quick_push (tree, dr_chain, new_temp);
4657 /* Store vector loads in the corresponding SLP_NODE. */
4658 if (slp && !slp_perm)
4659 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4664 if (slp && !slp_perm)
4669 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4670 slp_node_instance, false))
4672 VEC_free (tree, heap, dr_chain);
4681 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4682 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4687 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4689 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4690 prev_stmt_info = vinfo_for_stmt (new_stmt);
4694 VEC_free (tree, heap, dr_chain);
4700 /* Function vect_is_simple_cond.
4703 LOOP - the loop that is being vectorized.
4704 COND - Condition that is checked for simple use.
4707 *COMP_VECTYPE - the vector type for the comparison.
4709 Returns whether a COND can be vectorized. Checks whether
4710 condition operands are supportable using vec_is_simple_use. */
4713 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4717 enum vect_def_type dt;
4718 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4720 if (!COMPARISON_CLASS_P (cond))
4723 lhs = TREE_OPERAND (cond, 0);
4724 rhs = TREE_OPERAND (cond, 1);
4726 if (TREE_CODE (lhs) == SSA_NAME)
4728 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4729 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4733 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4734 && TREE_CODE (lhs) != FIXED_CST)
4737 if (TREE_CODE (rhs) == SSA_NAME)
4739 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4740 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4744 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4745 && TREE_CODE (rhs) != FIXED_CST)
4748 *comp_vectype = vectype1 ? vectype1 : vectype2;
4752 /* vectorizable_condition.
4754 Check if STMT is conditional modify expression that can be vectorized.
4755 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4756 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4759 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4760 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4761 else caluse if it is 2).
4763 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4766 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4767 gimple *vec_stmt, tree reduc_def, int reduc_index)
4769 tree scalar_dest = NULL_TREE;
4770 tree vec_dest = NULL_TREE;
4771 tree cond_expr, then_clause, else_clause;
4772 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4773 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4775 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4776 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4777 tree vec_compare, vec_cond_expr;
4779 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4781 enum vect_def_type dt, dts[4];
4782 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4783 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4784 enum tree_code code;
4785 stmt_vec_info prev_stmt_info = NULL;
4788 /* FORNOW: unsupported in basic block SLP. */
4789 gcc_assert (loop_vinfo);
4791 /* FORNOW: SLP not supported. */
4792 if (STMT_SLP_TYPE (stmt_info))
4795 gcc_assert (ncopies >= 1);
4796 if (reduc_index && ncopies > 1)
4797 return false; /* FORNOW */
4799 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4802 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4803 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4807 /* FORNOW: not yet supported. */
4808 if (STMT_VINFO_LIVE_P (stmt_info))
4810 if (vect_print_dump_info (REPORT_DETAILS))
4811 fprintf (vect_dump, "value used after loop.");
4815 /* Is vectorizable conditional operation? */
4816 if (!is_gimple_assign (stmt))
4819 code = gimple_assign_rhs_code (stmt);
4821 if (code != COND_EXPR)
4824 cond_expr = gimple_assign_rhs1 (stmt);
4825 then_clause = gimple_assign_rhs2 (stmt);
4826 else_clause = gimple_assign_rhs3 (stmt);
4828 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4832 if (TREE_CODE (then_clause) == SSA_NAME)
4834 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4835 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4836 &then_def_stmt, &def, &dt))
4839 else if (TREE_CODE (then_clause) != INTEGER_CST
4840 && TREE_CODE (then_clause) != REAL_CST
4841 && TREE_CODE (then_clause) != FIXED_CST)
4844 if (TREE_CODE (else_clause) == SSA_NAME)
4846 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4847 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4848 &else_def_stmt, &def, &dt))
4851 else if (TREE_CODE (else_clause) != INTEGER_CST
4852 && TREE_CODE (else_clause) != REAL_CST
4853 && TREE_CODE (else_clause) != FIXED_CST)
4858 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4859 return expand_vec_cond_expr_p (vectype, comp_vectype);
4865 scalar_dest = gimple_assign_lhs (stmt);
4866 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4868 /* Handle cond expr. */
4869 for (j = 0; j < ncopies; j++)
4876 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4878 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4879 NULL, >emp, &def, &dts[0]);
4881 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4883 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4884 NULL, >emp, &def, &dts[1]);
4885 if (reduc_index == 1)
4886 vec_then_clause = reduc_def;
4889 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4891 vect_is_simple_use (then_clause, loop_vinfo,
4892 NULL, >emp, &def, &dts[2]);
4894 if (reduc_index == 2)
4895 vec_else_clause = reduc_def;
4898 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4900 vect_is_simple_use (else_clause, loop_vinfo,
4901 NULL, >emp, &def, &dts[3]);
4906 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4907 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4908 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4910 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4914 /* Arguments are ready. Create the new vector stmt. */
4915 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4916 vec_cond_lhs, vec_cond_rhs);
4917 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4918 vec_compare, vec_then_clause, vec_else_clause);
4920 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4921 new_temp = make_ssa_name (vec_dest, new_stmt);
4922 gimple_assign_set_lhs (new_stmt, new_temp);
4923 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4925 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4927 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4929 prev_stmt_info = vinfo_for_stmt (new_stmt);
4936 /* Make sure the statement is vectorizable. */
4939 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4942 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4943 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4945 tree scalar_type, vectype;
4946 gimple pattern_stmt, pattern_def_stmt;
4948 if (vect_print_dump_info (REPORT_DETAILS))
4950 fprintf (vect_dump, "==> examining statement: ");
4951 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4954 if (gimple_has_volatile_ops (stmt))
4956 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4957 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4962 /* Skip stmts that do not need to be vectorized. In loops this is expected
4964 - the COND_EXPR which is the loop exit condition
4965 - any LABEL_EXPRs in the loop
4966 - computations that are used only for array indexing or loop control.
4967 In basic blocks we only analyze statements that are a part of some SLP
4968 instance, therefore, all the statements are relevant.
4970 Pattern statement need to be analyzed instead of the original statement
4971 if the original statement is not relevant. Otherwise, we analyze both
4974 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4975 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4976 && !STMT_VINFO_LIVE_P (stmt_info))
4978 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4980 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4981 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4983 /* Analyze PATTERN_STMT instead of the original stmt. */
4984 stmt = pattern_stmt;
4985 stmt_info = vinfo_for_stmt (pattern_stmt);
4986 if (vect_print_dump_info (REPORT_DETAILS))
4988 fprintf (vect_dump, "==> examining pattern statement: ");
4989 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4994 if (vect_print_dump_info (REPORT_DETAILS))
4995 fprintf (vect_dump, "irrelevant.");
5000 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5002 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5003 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5005 /* Analyze PATTERN_STMT too. */
5006 if (vect_print_dump_info (REPORT_DETAILS))
5008 fprintf (vect_dump, "==> examining pattern statement: ");
5009 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5012 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5016 if (is_pattern_stmt_p (stmt_info)
5017 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5018 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5019 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5021 /* Analyze def stmt of STMT if it's a pattern stmt. */
5022 if (vect_print_dump_info (REPORT_DETAILS))
5024 fprintf (vect_dump, "==> examining pattern def statement: ");
5025 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5028 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5033 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5035 case vect_internal_def:
5038 case vect_reduction_def:
5039 case vect_nested_cycle:
5040 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5041 || relevance == vect_used_in_outer_by_reduction
5042 || relevance == vect_unused_in_scope));
5045 case vect_induction_def:
5046 case vect_constant_def:
5047 case vect_external_def:
5048 case vect_unknown_def_type:
5055 gcc_assert (PURE_SLP_STMT (stmt_info));
5057 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5058 if (vect_print_dump_info (REPORT_DETAILS))
5060 fprintf (vect_dump, "get vectype for scalar type: ");
5061 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5064 vectype = get_vectype_for_scalar_type (scalar_type);
5067 if (vect_print_dump_info (REPORT_DETAILS))
5069 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5070 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5075 if (vect_print_dump_info (REPORT_DETAILS))
5077 fprintf (vect_dump, "vectype: ");
5078 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5081 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5084 if (STMT_VINFO_RELEVANT_P (stmt_info))
5086 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5087 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5088 *need_to_vectorize = true;
5093 && (STMT_VINFO_RELEVANT_P (stmt_info)
5094 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5095 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5096 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5097 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5098 || vectorizable_shift (stmt, NULL, NULL, NULL)
5099 || vectorizable_operation (stmt, NULL, NULL, NULL)
5100 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5101 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5102 || vectorizable_call (stmt, NULL, NULL)
5103 || vectorizable_store (stmt, NULL, NULL, NULL)
5104 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5105 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5109 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5110 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5111 || vectorizable_shift (stmt, NULL, NULL, node)
5112 || vectorizable_operation (stmt, NULL, NULL, node)
5113 || vectorizable_assignment (stmt, NULL, NULL, node)
5114 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5115 || vectorizable_store (stmt, NULL, NULL, node));
5120 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5122 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5123 fprintf (vect_dump, "supported: ");
5124 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5133 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5134 need extra handling, except for vectorizable reductions. */
5135 if (STMT_VINFO_LIVE_P (stmt_info)
5136 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5137 ok = vectorizable_live_operation (stmt, NULL, NULL);
5141 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5143 fprintf (vect_dump, "not vectorized: live stmt not ");
5144 fprintf (vect_dump, "supported: ");
5145 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5155 /* Function vect_transform_stmt.
5157 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5160 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5161 bool *strided_store, slp_tree slp_node,
5162 slp_instance slp_node_instance)
5164 bool is_store = false;
5165 gimple vec_stmt = NULL;
5166 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5169 switch (STMT_VINFO_TYPE (stmt_info))
5171 case type_demotion_vec_info_type:
5172 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5176 case type_promotion_vec_info_type:
5177 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5181 case type_conversion_vec_info_type:
5182 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5186 case induc_vec_info_type:
5187 gcc_assert (!slp_node);
5188 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5192 case shift_vec_info_type:
5193 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5197 case op_vec_info_type:
5198 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5202 case assignment_vec_info_type:
5203 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5207 case load_vec_info_type:
5208 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5213 case store_vec_info_type:
5214 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5216 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5218 /* In case of interleaving, the whole chain is vectorized when the
5219 last store in the chain is reached. Store stmts before the last
5220 one are skipped, and there vec_stmt_info shouldn't be freed
5222 *strided_store = true;
5223 if (STMT_VINFO_VEC_STMT (stmt_info))
5230 case condition_vec_info_type:
5231 gcc_assert (!slp_node);
5232 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5236 case call_vec_info_type:
5237 gcc_assert (!slp_node);
5238 done = vectorizable_call (stmt, gsi, &vec_stmt);
5239 stmt = gsi_stmt (*gsi);
5242 case reduc_vec_info_type:
5243 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5248 if (!STMT_VINFO_LIVE_P (stmt_info))
5250 if (vect_print_dump_info (REPORT_DETAILS))
5251 fprintf (vect_dump, "stmt not supported.");
5256 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5257 is being vectorized, but outside the immediately enclosing loop. */
5259 && STMT_VINFO_LOOP_VINFO (stmt_info)
5260 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5261 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5262 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5263 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5264 || STMT_VINFO_RELEVANT (stmt_info) ==
5265 vect_used_in_outer_by_reduction))
5267 struct loop *innerloop = LOOP_VINFO_LOOP (
5268 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5269 imm_use_iterator imm_iter;
5270 use_operand_p use_p;
5274 if (vect_print_dump_info (REPORT_DETAILS))
5275 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5277 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5278 (to be used when vectorizing outer-loop stmts that use the DEF of
5280 if (gimple_code (stmt) == GIMPLE_PHI)
5281 scalar_dest = PHI_RESULT (stmt);
5283 scalar_dest = gimple_assign_lhs (stmt);
5285 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5287 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5289 exit_phi = USE_STMT (use_p);
5290 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5295 /* Handle stmts whose DEF is used outside the loop-nest that is
5296 being vectorized. */
5297 if (STMT_VINFO_LIVE_P (stmt_info)
5298 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5300 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5305 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5311 /* Remove a group of stores (for SLP or interleaving), free their
5315 vect_remove_stores (gimple first_stmt)
5317 gimple next = first_stmt;
5319 gimple_stmt_iterator next_si;
5323 /* Free the attached stmt_vec_info and remove the stmt. */
5324 next_si = gsi_for_stmt (next);
5325 gsi_remove (&next_si, true);
5326 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5327 free_stmt_vec_info (next);
5333 /* Function new_stmt_vec_info.
5335 Create and initialize a new stmt_vec_info struct for STMT. */
5338 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5339 bb_vec_info bb_vinfo)
5342 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5344 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5345 STMT_VINFO_STMT (res) = stmt;
5346 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5347 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5348 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5349 STMT_VINFO_LIVE_P (res) = false;
5350 STMT_VINFO_VECTYPE (res) = NULL;
5351 STMT_VINFO_VEC_STMT (res) = NULL;
5352 STMT_VINFO_VECTORIZABLE (res) = true;
5353 STMT_VINFO_IN_PATTERN_P (res) = false;
5354 STMT_VINFO_RELATED_STMT (res) = NULL;
5355 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5356 STMT_VINFO_DATA_REF (res) = NULL;
5358 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5359 STMT_VINFO_DR_OFFSET (res) = NULL;
5360 STMT_VINFO_DR_INIT (res) = NULL;
5361 STMT_VINFO_DR_STEP (res) = NULL;
5362 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5364 if (gimple_code (stmt) == GIMPLE_PHI
5365 && is_loop_header_bb_p (gimple_bb (stmt)))
5366 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5368 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5370 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5371 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5372 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5373 STMT_SLP_TYPE (res) = loop_vect;
5374 GROUP_FIRST_ELEMENT (res) = NULL;
5375 GROUP_NEXT_ELEMENT (res) = NULL;
5376 GROUP_SIZE (res) = 0;
5377 GROUP_STORE_COUNT (res) = 0;
5378 GROUP_GAP (res) = 0;
5379 GROUP_SAME_DR_STMT (res) = NULL;
5380 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5386 /* Create a hash table for stmt_vec_info. */
5389 init_stmt_vec_info_vec (void)
5391 gcc_assert (!stmt_vec_info_vec);
5392 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5396 /* Free hash table for stmt_vec_info. */
5399 free_stmt_vec_info_vec (void)
5401 gcc_assert (stmt_vec_info_vec);
5402 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5406 /* Free stmt vectorization related info. */
5409 free_stmt_vec_info (gimple stmt)
5411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5416 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5417 set_vinfo_for_stmt (stmt, NULL);
5422 /* Function get_vectype_for_scalar_type_and_size.
5424 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5428 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5430 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5431 enum machine_mode simd_mode;
5432 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5439 /* We can't build a vector type of elements with alignment bigger than
5441 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5444 /* If we'd build a vector type of elements whose mode precision doesn't
5445 match their types precision we'll get mismatched types on vector
5446 extracts via BIT_FIELD_REFs. This effectively means we disable
5447 vectorization of bool and/or enum types in some languages. */
5448 if (INTEGRAL_TYPE_P (scalar_type)
5449 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5452 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5453 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5456 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5457 When the component mode passes the above test simply use a type
5458 corresponding to that mode. The theory is that any use that
5459 would cause problems with this will disable vectorization anyway. */
5460 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5461 && !INTEGRAL_TYPE_P (scalar_type)
5462 && !POINTER_TYPE_P (scalar_type))
5463 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5465 /* If no size was supplied use the mode the target prefers. Otherwise
5466 lookup a vector mode of the specified size. */
5468 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5470 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5471 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5475 vectype = build_vector_type (scalar_type, nunits);
5476 if (vect_print_dump_info (REPORT_DETAILS))
5478 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5479 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5485 if (vect_print_dump_info (REPORT_DETAILS))
5487 fprintf (vect_dump, "vectype: ");
5488 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5491 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5492 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5494 if (vect_print_dump_info (REPORT_DETAILS))
5495 fprintf (vect_dump, "mode not supported by target.");
5502 unsigned int current_vector_size;
5504 /* Function get_vectype_for_scalar_type.
5506 Returns the vector type corresponding to SCALAR_TYPE as supported
5510 get_vectype_for_scalar_type (tree scalar_type)
5513 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5514 current_vector_size);
5516 && current_vector_size == 0)
5517 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5521 /* Function get_same_sized_vectype
5523 Returns a vector type corresponding to SCALAR_TYPE of size
5524 VECTOR_TYPE if supported by the target. */
5527 get_same_sized_vectype (tree scalar_type, tree vector_type)
5529 return get_vectype_for_scalar_type_and_size
5530 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5533 /* Function vect_is_simple_use.
5536 LOOP_VINFO - the vect info of the loop that is being vectorized.
5537 BB_VINFO - the vect info of the basic block that is being vectorized.
5538 OPERAND - operand of a stmt in the loop or bb.
5539 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5541 Returns whether a stmt with OPERAND can be vectorized.
5542 For loops, supportable operands are constants, loop invariants, and operands
5543 that are defined by the current iteration of the loop. Unsupportable
5544 operands are those that are defined by a previous iteration of the loop (as
5545 is the case in reduction/induction computations).
5546 For basic blocks, supportable operands are constants and bb invariants.
5547 For now, operands defined outside the basic block are not supported. */
5550 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5551 bb_vec_info bb_vinfo, gimple *def_stmt,
5552 tree *def, enum vect_def_type *dt)
5555 stmt_vec_info stmt_vinfo;
5556 struct loop *loop = NULL;
5559 loop = LOOP_VINFO_LOOP (loop_vinfo);
5564 if (vect_print_dump_info (REPORT_DETAILS))
5566 fprintf (vect_dump, "vect_is_simple_use: operand ");
5567 print_generic_expr (vect_dump, operand, TDF_SLIM);
5570 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5572 *dt = vect_constant_def;
5576 if (is_gimple_min_invariant (operand))
5579 *dt = vect_external_def;
5583 if (TREE_CODE (operand) == PAREN_EXPR)
5585 if (vect_print_dump_info (REPORT_DETAILS))
5586 fprintf (vect_dump, "non-associatable copy.");
5587 operand = TREE_OPERAND (operand, 0);
5590 if (TREE_CODE (operand) != SSA_NAME)
5592 if (vect_print_dump_info (REPORT_DETAILS))
5593 fprintf (vect_dump, "not ssa-name.");
5597 *def_stmt = SSA_NAME_DEF_STMT (operand);
5598 if (*def_stmt == NULL)
5600 if (vect_print_dump_info (REPORT_DETAILS))
5601 fprintf (vect_dump, "no def_stmt.");
5605 if (vect_print_dump_info (REPORT_DETAILS))
5607 fprintf (vect_dump, "def_stmt: ");
5608 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5611 /* Empty stmt is expected only in case of a function argument.
5612 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5613 if (gimple_nop_p (*def_stmt))
5616 *dt = vect_external_def;
5620 bb = gimple_bb (*def_stmt);
5622 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5623 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5624 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5625 *dt = vect_external_def;
5628 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5629 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5632 if (*dt == vect_unknown_def_type)
5634 if (vect_print_dump_info (REPORT_DETAILS))
5635 fprintf (vect_dump, "Unsupported pattern.");
5639 if (vect_print_dump_info (REPORT_DETAILS))
5640 fprintf (vect_dump, "type of def: %d.",*dt);
5642 switch (gimple_code (*def_stmt))
5645 *def = gimple_phi_result (*def_stmt);
5649 *def = gimple_assign_lhs (*def_stmt);
5653 *def = gimple_call_lhs (*def_stmt);
5658 if (vect_print_dump_info (REPORT_DETAILS))
5659 fprintf (vect_dump, "unsupported defining stmt: ");
5666 /* Function vect_is_simple_use_1.
5668 Same as vect_is_simple_use_1 but also determines the vector operand
5669 type of OPERAND and stores it to *VECTYPE. If the definition of
5670 OPERAND is vect_uninitialized_def, vect_constant_def or
5671 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5672 is responsible to compute the best suited vector type for the
5676 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5677 bb_vec_info bb_vinfo, gimple *def_stmt,
5678 tree *def, enum vect_def_type *dt, tree *vectype)
5680 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5683 /* Now get a vector type if the def is internal, otherwise supply
5684 NULL_TREE and leave it up to the caller to figure out a proper
5685 type for the use stmt. */
5686 if (*dt == vect_internal_def
5687 || *dt == vect_induction_def
5688 || *dt == vect_reduction_def
5689 || *dt == vect_double_reduction_def
5690 || *dt == vect_nested_cycle)
5692 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5694 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5695 && !STMT_VINFO_RELEVANT (stmt_info)
5696 && !STMT_VINFO_LIVE_P (stmt_info))
5697 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5699 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5700 gcc_assert (*vectype != NULL_TREE);
5702 else if (*dt == vect_uninitialized_def
5703 || *dt == vect_constant_def
5704 || *dt == vect_external_def)
5705 *vectype = NULL_TREE;
5713 /* Function supportable_widening_operation
5715 Check whether an operation represented by the code CODE is a
5716 widening operation that is supported by the target platform in
5717 vector form (i.e., when operating on arguments of type VECTYPE_IN
5718 producing a result of type VECTYPE_OUT).
5720 Widening operations we currently support are NOP (CONVERT), FLOAT
5721 and WIDEN_MULT. This function checks if these operations are supported
5722 by the target platform either directly (via vector tree-codes), or via
5726 - CODE1 and CODE2 are codes of vector operations to be used when
5727 vectorizing the operation, if available.
5728 - DECL1 and DECL2 are decls of target builtin functions to be used
5729 when vectorizing the operation, if available. In this case,
5730 CODE1 and CODE2 are CALL_EXPR.
5731 - MULTI_STEP_CVT determines the number of required intermediate steps in
5732 case of multi-step conversion (like char->short->int - in that case
5733 MULTI_STEP_CVT will be 1).
5734 - INTERM_TYPES contains the intermediate type required to perform the
5735 widening operation (short in the above example). */
5738 supportable_widening_operation (enum tree_code code, gimple stmt,
5739 tree vectype_out, tree vectype_in,
5740 tree *decl1, tree *decl2,
5741 enum tree_code *code1, enum tree_code *code2,
5742 int *multi_step_cvt,
5743 VEC (tree, heap) **interm_types)
5745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5746 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5747 struct loop *vect_loop = NULL;
5749 enum machine_mode vec_mode;
5750 enum insn_code icode1, icode2;
5751 optab optab1, optab2;
5752 tree vectype = vectype_in;
5753 tree wide_vectype = vectype_out;
5754 enum tree_code c1, c2;
5757 vect_loop = LOOP_VINFO_LOOP (loop_info);
5759 /* The result of a vectorized widening operation usually requires two vectors
5760 (because the widened results do not fit int one vector). The generated
5761 vector results would normally be expected to be generated in the same
5762 order as in the original scalar computation, i.e. if 8 results are
5763 generated in each vector iteration, they are to be organized as follows:
5764 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5766 However, in the special case that the result of the widening operation is
5767 used in a reduction computation only, the order doesn't matter (because
5768 when vectorizing a reduction we change the order of the computation).
5769 Some targets can take advantage of this and generate more efficient code.
5770 For example, targets like Altivec, that support widen_mult using a sequence
5771 of {mult_even,mult_odd} generate the following vectors:
5772 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5774 When vectorizing outer-loops, we execute the inner-loop sequentially
5775 (each vectorized inner-loop iteration contributes to VF outer-loop
5776 iterations in parallel). We therefore don't allow to change the order
5777 of the computation in the inner-loop during outer-loop vectorization. */
5780 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5781 && !nested_in_vect_loop_p (vect_loop, stmt))
5787 && code == WIDEN_MULT_EXPR
5788 && targetm.vectorize.builtin_mul_widen_even
5789 && targetm.vectorize.builtin_mul_widen_even (vectype)
5790 && targetm.vectorize.builtin_mul_widen_odd
5791 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5793 if (vect_print_dump_info (REPORT_DETAILS))
5794 fprintf (vect_dump, "Unordered widening operation detected.");
5796 *code1 = *code2 = CALL_EXPR;
5797 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5798 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5804 case WIDEN_MULT_EXPR:
5805 if (BYTES_BIG_ENDIAN)
5807 c1 = VEC_WIDEN_MULT_HI_EXPR;
5808 c2 = VEC_WIDEN_MULT_LO_EXPR;
5812 c2 = VEC_WIDEN_MULT_HI_EXPR;
5813 c1 = VEC_WIDEN_MULT_LO_EXPR;
5817 case WIDEN_LSHIFT_EXPR:
5818 if (BYTES_BIG_ENDIAN)
5820 c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
5821 c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
5825 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5826 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5831 if (BYTES_BIG_ENDIAN)
5833 c1 = VEC_UNPACK_HI_EXPR;
5834 c2 = VEC_UNPACK_LO_EXPR;
5838 c2 = VEC_UNPACK_HI_EXPR;
5839 c1 = VEC_UNPACK_LO_EXPR;
5844 if (BYTES_BIG_ENDIAN)
5846 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5847 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5851 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5852 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5856 case FIX_TRUNC_EXPR:
5857 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5858 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5859 computing the operation. */
5866 if (code == FIX_TRUNC_EXPR)
5868 /* The signedness is determined from output operand. */
5869 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5870 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5874 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5875 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5878 if (!optab1 || !optab2)
5881 vec_mode = TYPE_MODE (vectype);
5882 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5883 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5886 /* Check if it's a multi-step conversion that can be done using intermediate
5888 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5889 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5892 tree prev_type = vectype, intermediate_type;
5893 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5894 optab optab3, optab4;
5896 if (!CONVERT_EXPR_CODE_P (code))
5902 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5903 intermediate steps in promotion sequence. We try
5904 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5906 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5907 for (i = 0; i < 3; i++)
5909 intermediate_mode = insn_data[icode1].operand[0].mode;
5910 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5911 TYPE_UNSIGNED (prev_type));
5912 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5913 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5915 if (!optab3 || !optab4
5916 || ((icode1 = optab_handler (optab1, prev_mode))
5917 == CODE_FOR_nothing)
5918 || insn_data[icode1].operand[0].mode != intermediate_mode
5919 || ((icode2 = optab_handler (optab2, prev_mode))
5920 == CODE_FOR_nothing)
5921 || insn_data[icode2].operand[0].mode != intermediate_mode
5922 || ((icode1 = optab_handler (optab3, intermediate_mode))
5923 == CODE_FOR_nothing)
5924 || ((icode2 = optab_handler (optab4, intermediate_mode))
5925 == CODE_FOR_nothing))
5928 VEC_quick_push (tree, *interm_types, intermediate_type);
5929 (*multi_step_cvt)++;
5931 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5932 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5935 prev_type = intermediate_type;
5936 prev_mode = intermediate_mode;
5948 /* Function supportable_narrowing_operation
5950 Check whether an operation represented by the code CODE is a
5951 narrowing operation that is supported by the target platform in
5952 vector form (i.e., when operating on arguments of type VECTYPE_IN
5953 and producing a result of type VECTYPE_OUT).
5955 Narrowing operations we currently support are NOP (CONVERT) and
5956 FIX_TRUNC. This function checks if these operations are supported by
5957 the target platform directly via vector tree-codes.
5960 - CODE1 is the code of a vector operation to be used when
5961 vectorizing the operation, if available.
5962 - MULTI_STEP_CVT determines the number of required intermediate steps in
5963 case of multi-step conversion (like int->short->char - in that case
5964 MULTI_STEP_CVT will be 1).
5965 - INTERM_TYPES contains the intermediate type required to perform the
5966 narrowing operation (short in the above example). */
5969 supportable_narrowing_operation (enum tree_code code,
5970 tree vectype_out, tree vectype_in,
5971 enum tree_code *code1, int *multi_step_cvt,
5972 VEC (tree, heap) **interm_types)
5974 enum machine_mode vec_mode;
5975 enum insn_code icode1;
5976 optab optab1, interm_optab;
5977 tree vectype = vectype_in;
5978 tree narrow_vectype = vectype_out;
5980 tree intermediate_type, prev_type;
5986 c1 = VEC_PACK_TRUNC_EXPR;
5989 case FIX_TRUNC_EXPR:
5990 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5994 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5995 tree code and optabs used for computing the operation. */
6002 if (code == FIX_TRUNC_EXPR)
6003 /* The signedness is determined from output operand. */
6004 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6006 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6011 vec_mode = TYPE_MODE (vectype);
6012 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6015 /* Check if it's a multi-step conversion that can be done using intermediate
6017 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
6019 enum machine_mode intermediate_mode, prev_mode = vec_mode;
6022 prev_type = vectype;
6023 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6024 intermediate steps in promotion sequence. We try
6025 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6027 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6028 for (i = 0; i < 3; i++)
6030 intermediate_mode = insn_data[icode1].operand[0].mode;
6031 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6032 TYPE_UNSIGNED (prev_type));
6033 interm_optab = optab_for_tree_code (c1, intermediate_type,
6036 || ((icode1 = optab_handler (optab1, prev_mode))
6037 == CODE_FOR_nothing)
6038 || insn_data[icode1].operand[0].mode != intermediate_mode
6039 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6040 == CODE_FOR_nothing))
6043 VEC_quick_push (tree, *interm_types, intermediate_type);
6044 (*multi_step_cvt)++;
6046 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6049 prev_type = intermediate_type;
6050 prev_mode = intermediate_mode;