1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
164 if (is_gimple_debug (USE_STMT (use_p)))
166 use_stmt = USE_STMT (use_p);
168 if (vinfo_for_stmt (use_stmt)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
184 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "last stmt in pattern. don't mark"
189 stmt_info = vinfo_for_stmt (pattern_stmt);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
191 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
192 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
197 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
198 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
199 STMT_VINFO_RELEVANT (stmt_info) = relevant;
201 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
204 if (vect_print_dump_info (REPORT_DETAILS))
205 fprintf (vect_dump, "already marked relevant/live.");
209 VEC_safe_push (gimple, heap, *worklist, stmt);
213 /* Function vect_stmt_relevant_p.
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
223 CHECKME: what other side effects would the vectorizer allow? */
226 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
227 enum vect_relevant *relevant, bool *live_p)
229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 imm_use_iterator imm_iter;
235 *relevant = vect_unused_in_scope;
238 /* cond stmt other than loop exit cond. */
239 if (is_ctrl_stmt (stmt)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
241 != loop_exit_ctrl_vec_info_type)
242 *relevant = vect_used_in_scope;
244 /* changing memory. */
245 if (gimple_code (stmt) != GIMPLE_PHI)
246 if (gimple_vdef (stmt))
248 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
250 *relevant = vect_used_in_scope;
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
256 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
258 basic_block bb = gimple_bb (USE_STMT (use_p));
259 if (!flow_bb_inside_loop_p (loop, bb))
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
264 if (is_gimple_debug (USE_STMT (use_p)))
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
270 gcc_assert (bb == single_exit (loop)->dest);
277 return (*live_p || *relevant);
281 /* Function exist_non_indexing_operands_for_use_p
283 USE is one of the uses attached to STMT. Check if USE is
284 used in STMT for anything other than indexing an array. */
287 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
292 /* USE corresponds to some operand in STMT. If there is no data
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info))
298 /* STMT has a data_ref. FORNOW this means that its of one of
302 (This should have been verified in analyze_data_refs).
304 'var' in the second case corresponds to a def, not a use,
305 so USE cannot correspond to any operands that are not used
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
311 if (!gimple_assign_copy_p (stmt))
313 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
315 operand = gimple_assign_rhs1 (stmt);
316 if (TREE_CODE (operand) != SSA_NAME)
327 Function process_use.
330 - a USE in STMT in a loop represented by LOOP_VINFO
331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
332 that defined USE. This is done by calling mark_relevant and passing it
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
336 Generally, LIVE_P and RELEVANT are used to define the liveness and
337 relevance info of the DEF_STMT of this USE:
338 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
339 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
341 - case 1: If USE is used only for address computations (e.g. array indexing),
342 which does not need to be directly vectorized, then the liveness/relevance
343 of the respective DEF_STMT is left unchanged.
344 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
345 skip DEF_STMT cause it had already been processed.
346 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
347 be modified accordingly.
349 Return true if everything is as expected. Return false otherwise. */
352 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
353 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
355 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
356 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
357 stmt_vec_info dstmt_vinfo;
358 basic_block bb, def_bb;
361 enum vect_def_type dt;
363 /* case 1: we are only interested in uses that need to be vectorized. Uses
364 that are used for address computation are not considered relevant. */
365 if (!exist_non_indexing_operands_for_use_p (use, stmt))
368 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
370 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
371 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
375 if (!def_stmt || gimple_nop_p (def_stmt))
378 def_bb = gimple_bb (def_stmt);
379 if (!flow_bb_inside_loop_p (loop, def_bb))
381 if (vect_print_dump_info (REPORT_DETAILS))
382 fprintf (vect_dump, "def_stmt is out of loop.");
386 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
387 DEF_STMT must have already been processed, because this should be the
388 only way that STMT, which is a reduction-phi, was put in the worklist,
389 as there should be no other uses for DEF_STMT in the loop. So we just
390 check that everything is as expected, and we are done. */
391 dstmt_vinfo = vinfo_for_stmt (def_stmt);
392 bb = gimple_bb (stmt);
393 if (gimple_code (stmt) == GIMPLE_PHI
394 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
395 && gimple_code (def_stmt) != GIMPLE_PHI
396 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
397 && bb->loop_father == def_bb->loop_father)
399 if (vect_print_dump_info (REPORT_DETAILS))
400 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
401 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
402 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
403 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
404 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
405 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
409 /* case 3a: outer-loop stmt defining an inner-loop stmt:
410 outer-loop-header-bb:
416 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
418 if (vect_print_dump_info (REPORT_DETAILS))
419 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
423 case vect_unused_in_scope:
424 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
425 vect_used_in_scope : vect_unused_in_scope;
428 case vect_used_in_outer_by_reduction:
429 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
430 relevant = vect_used_by_reduction;
433 case vect_used_in_outer:
434 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
435 relevant = vect_used_in_scope;
438 case vect_used_in_scope:
446 /* case 3b: inner-loop stmt defining an outer-loop stmt:
447 outer-loop-header-bb:
451 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
453 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
455 if (vect_print_dump_info (REPORT_DETAILS))
456 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
460 case vect_unused_in_scope:
461 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
462 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
463 vect_used_in_outer_by_reduction : vect_unused_in_scope;
466 case vect_used_by_reduction:
467 relevant = vect_used_in_outer_by_reduction;
470 case vect_used_in_scope:
471 relevant = vect_used_in_outer;
479 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
480 is_pattern_stmt_p (stmt_vinfo));
485 /* Function vect_mark_stmts_to_be_vectorized.
487 Not all stmts in the loop need to be vectorized. For example:
496 Stmt 1 and 3 do not need to be vectorized, because loop control and
497 addressing of vectorized data-refs are handled differently.
499 This pass detects such stmts. */
502 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
504 VEC(gimple,heap) *worklist;
505 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
506 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
507 unsigned int nbbs = loop->num_nodes;
508 gimple_stmt_iterator si;
511 stmt_vec_info stmt_vinfo;
515 enum vect_relevant relevant, tmp_relevant;
516 enum vect_def_type def_type;
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
521 worklist = VEC_alloc (gimple, heap, 64);
523 /* 1. Init worklist. */
524 for (i = 0; i < nbbs; i++)
527 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
530 if (vect_print_dump_info (REPORT_DETAILS))
532 fprintf (vect_dump, "init: phi relevant? ");
533 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
536 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
537 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
539 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
541 stmt = gsi_stmt (si);
542 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "init: stmt relevant? ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
548 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
549 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
553 /* 2. Process_worklist */
554 while (VEC_length (gimple, worklist) > 0)
559 stmt = VEC_pop (gimple, worklist);
560 if (vect_print_dump_info (REPORT_DETAILS))
562 fprintf (vect_dump, "worklist: examine stmt: ");
563 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
566 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
567 (DEF_STMT) as relevant/irrelevant and live/dead according to the
568 liveness and relevance properties of STMT. */
569 stmt_vinfo = vinfo_for_stmt (stmt);
570 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
571 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
573 /* Generally, the liveness and relevance properties of STMT are
574 propagated as is to the DEF_STMTs of its USEs:
575 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
576 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
578 One exception is when STMT has been identified as defining a reduction
579 variable; in this case we set the liveness/relevance as follows:
581 relevant = vect_used_by_reduction
582 This is because we distinguish between two kinds of relevant stmts -
583 those that are used by a reduction computation, and those that are
584 (also) used by a regular computation. This allows us later on to
585 identify stmts that are used solely by a reduction, and therefore the
586 order of the results that they produce does not have to be kept. */
588 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
589 tmp_relevant = relevant;
592 case vect_reduction_def:
593 switch (tmp_relevant)
595 case vect_unused_in_scope:
596 relevant = vect_used_by_reduction;
599 case vect_used_by_reduction:
600 if (gimple_code (stmt) == GIMPLE_PHI)
605 if (vect_print_dump_info (REPORT_DETAILS))
606 fprintf (vect_dump, "unsupported use of reduction.");
608 VEC_free (gimple, heap, worklist);
615 case vect_nested_cycle:
616 if (tmp_relevant != vect_unused_in_scope
617 && tmp_relevant != vect_used_in_outer_by_reduction
618 && tmp_relevant != vect_used_in_outer)
620 if (vect_print_dump_info (REPORT_DETAILS))
621 fprintf (vect_dump, "unsupported use of nested cycle.");
623 VEC_free (gimple, heap, worklist);
630 case vect_double_reduction_def:
631 if (tmp_relevant != vect_unused_in_scope
632 && tmp_relevant != vect_used_by_reduction)
634 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "unsupported use of double reduction.");
637 VEC_free (gimple, heap, worklist);
648 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
650 /* Pattern statements are not inserted into the code, so
651 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
652 have to scan the RHS or function arguments instead. */
653 if (is_gimple_assign (stmt))
655 for (i = 1; i < gimple_num_ops (stmt); i++)
657 tree op = gimple_op (stmt, i);
658 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
661 VEC_free (gimple, heap, worklist);
666 else if (is_gimple_call (stmt))
668 for (i = 0; i < gimple_call_num_args (stmt); i++)
670 tree arg = gimple_call_arg (stmt, i);
671 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
674 VEC_free (gimple, heap, worklist);
681 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
683 tree op = USE_FROM_PTR (use_p);
684 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
687 VEC_free (gimple, heap, worklist);
691 } /* while worklist */
693 VEC_free (gimple, heap, worklist);
698 /* Get cost by calling cost target builtin. */
701 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
703 tree dummy_type = NULL;
706 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
711 /* Get cost for STMT. */
714 cost_for_stmt (gimple stmt)
716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
718 switch (STMT_VINFO_TYPE (stmt_info))
720 case load_vec_info_type:
721 return vect_get_stmt_cost (scalar_load);
722 case store_vec_info_type:
723 return vect_get_stmt_cost (scalar_store);
724 case op_vec_info_type:
725 case condition_vec_info_type:
726 case assignment_vec_info_type:
727 case reduc_vec_info_type:
728 case induc_vec_info_type:
729 case type_promotion_vec_info_type:
730 case type_demotion_vec_info_type:
731 case type_conversion_vec_info_type:
732 case call_vec_info_type:
733 return vect_get_stmt_cost (scalar_stmt);
734 case undef_vec_info_type:
740 /* Function vect_model_simple_cost.
742 Models cost for simple operations, i.e. those that only emit ncopies of a
743 single op. Right now, this does not account for multiple insns that could
744 be generated for the single vector op. We will handle that shortly. */
747 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
748 enum vect_def_type *dt, slp_tree slp_node)
751 int inside_cost = 0, outside_cost = 0;
753 /* The SLP costs were already calculated during SLP tree build. */
754 if (PURE_SLP_STMT (stmt_info))
757 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
759 /* FORNOW: Assuming maximum 2 args per stmts. */
760 for (i = 0; i < 2; i++)
762 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
763 outside_cost += vect_get_stmt_cost (vector_stmt);
766 if (vect_print_dump_info (REPORT_COST))
767 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
768 "outside_cost = %d .", inside_cost, outside_cost);
770 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
771 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
772 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
776 /* Function vect_cost_strided_group_size
778 For strided load or store, return the group_size only if it is the first
779 load or store of a group, else return 1. This ensures that group size is
780 only returned once per group. */
783 vect_cost_strided_group_size (stmt_vec_info stmt_info)
785 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
787 if (first_stmt == STMT_VINFO_STMT (stmt_info))
788 return GROUP_SIZE (stmt_info);
794 /* Function vect_model_store_cost
796 Models cost for stores. In the case of strided accesses, one access
797 has the overhead of the strided access attributed to it. */
800 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
801 bool store_lanes_p, enum vect_def_type dt,
805 unsigned int inside_cost = 0, outside_cost = 0;
806 struct data_reference *first_dr;
809 /* The SLP costs were already calculated during SLP tree build. */
810 if (PURE_SLP_STMT (stmt_info))
813 if (dt == vect_constant_def || dt == vect_external_def)
814 outside_cost = vect_get_stmt_cost (scalar_to_vec);
816 /* Strided access? */
817 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
821 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
826 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
827 group_size = vect_cost_strided_group_size (stmt_info);
830 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
832 /* Not a strided access. */
836 first_dr = STMT_VINFO_DATA_REF (stmt_info);
839 /* We assume that the cost of a single store-lanes instruction is
840 equivalent to the cost of GROUP_SIZE separate stores. If a strided
841 access is instead being provided by a permute-and-store operation,
842 include the cost of the permutes. */
843 if (!store_lanes_p && group_size > 1)
845 /* Uses a high and low interleave operation for each needed permute. */
846 inside_cost = ncopies * exact_log2(group_size) * group_size
847 * vect_get_stmt_cost (vector_stmt);
849 if (vect_print_dump_info (REPORT_COST))
850 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
855 /* Costs of the stores. */
856 vect_get_store_cost (first_dr, ncopies, &inside_cost);
858 if (vect_print_dump_info (REPORT_COST))
859 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
860 "outside_cost = %d .", inside_cost, outside_cost);
862 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
863 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
864 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
868 /* Calculate cost of DR's memory access. */
870 vect_get_store_cost (struct data_reference *dr, int ncopies,
871 unsigned int *inside_cost)
873 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
875 switch (alignment_support_scheme)
879 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
881 if (vect_print_dump_info (REPORT_COST))
882 fprintf (vect_dump, "vect_model_store_cost: aligned.");
887 case dr_unaligned_supported:
889 gimple stmt = DR_STMT (dr);
890 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
891 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
893 /* Here, we assign an additional cost for the unaligned store. */
894 *inside_cost += ncopies
895 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
896 vectype, DR_MISALIGNMENT (dr));
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
911 /* Function vect_model_load_cost
913 Models cost for loads. In the case of strided accesses, the last access
914 has the overhead of the strided access attributed to it. Since unaligned
915 accesses are supported for loads, we also account for the costs of the
916 access scheme chosen. */
919 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
924 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
925 unsigned int inside_cost = 0, outside_cost = 0;
927 /* The SLP costs were already calculated during SLP tree build. */
928 if (PURE_SLP_STMT (stmt_info))
931 /* Strided accesses? */
932 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
933 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
935 group_size = vect_cost_strided_group_size (stmt_info);
936 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
938 /* Not a strided access. */
945 /* We assume that the cost of a single load-lanes instruction is
946 equivalent to the cost of GROUP_SIZE separate loads. If a strided
947 access is instead being provided by a load-and-permute operation,
948 include the cost of the permutes. */
949 if (!load_lanes_p && group_size > 1)
951 /* Uses an even and odd extract operations for each needed permute. */
952 inside_cost = ncopies * exact_log2(group_size) * group_size
953 * vect_get_stmt_cost (vector_stmt);
955 if (vect_print_dump_info (REPORT_COST))
956 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
960 /* The loads themselves. */
961 vect_get_load_cost (first_dr, ncopies,
962 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
964 &inside_cost, &outside_cost);
966 if (vect_print_dump_info (REPORT_COST))
967 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
968 "outside_cost = %d .", inside_cost, outside_cost);
970 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
971 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
972 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
976 /* Calculate cost of DR's memory access. */
978 vect_get_load_cost (struct data_reference *dr, int ncopies,
979 bool add_realign_cost, unsigned int *inside_cost,
980 unsigned int *outside_cost)
982 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
984 switch (alignment_support_scheme)
988 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
990 if (vect_print_dump_info (REPORT_COST))
991 fprintf (vect_dump, "vect_model_load_cost: aligned.");
995 case dr_unaligned_supported:
997 gimple stmt = DR_STMT (dr);
998 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
999 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1001 /* Here, we assign an additional cost for the unaligned load. */
1002 *inside_cost += ncopies
1003 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1004 vectype, DR_MISALIGNMENT (dr));
1005 if (vect_print_dump_info (REPORT_COST))
1006 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1011 case dr_explicit_realign:
1013 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1014 + vect_get_stmt_cost (vector_stmt));
1016 /* FIXME: If the misalignment remains fixed across the iterations of
1017 the containing loop, the following cost should be added to the
1019 if (targetm.vectorize.builtin_mask_for_load)
1020 *inside_cost += vect_get_stmt_cost (vector_stmt);
1024 case dr_explicit_realign_optimized:
1026 if (vect_print_dump_info (REPORT_COST))
1027 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1030 /* Unaligned software pipeline has a load of an address, an initial
1031 load, and possibly a mask operation to "prime" the loop. However,
1032 if this is an access in a group of loads, which provide strided
1033 access, then the above cost should only be considered for one
1034 access in the group. Inside the loop, there is a load op
1035 and a realignment op. */
1037 if (add_realign_cost)
1039 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1040 if (targetm.vectorize.builtin_mask_for_load)
1041 *outside_cost += vect_get_stmt_cost (vector_stmt);
1044 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1045 + vect_get_stmt_cost (vector_stmt));
1055 /* Function vect_init_vector.
1057 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1058 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1059 is not NULL. Otherwise, place the initialization at the loop preheader.
1060 Return the DEF of INIT_STMT.
1061 It will be used in the vectorization of STMT. */
1064 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1065 gimple_stmt_iterator *gsi)
1067 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1075 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1076 add_referenced_var (new_var);
1077 init_stmt = gimple_build_assign (new_var, vector_var);
1078 new_temp = make_ssa_name (new_var, init_stmt);
1079 gimple_assign_set_lhs (init_stmt, new_temp);
1082 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1085 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1089 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1091 if (nested_in_vect_loop_p (loop, stmt))
1094 pe = loop_preheader_edge (loop);
1095 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1096 gcc_assert (!new_bb);
1100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1102 gimple_stmt_iterator gsi_bb_start;
1104 gcc_assert (bb_vinfo);
1105 bb = BB_VINFO_BB (bb_vinfo);
1106 gsi_bb_start = gsi_after_labels (bb);
1107 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1111 if (vect_print_dump_info (REPORT_DETAILS))
1113 fprintf (vect_dump, "created new init_stmt: ");
1114 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1117 vec_oprnd = gimple_assign_lhs (init_stmt);
1122 /* Function vect_get_vec_def_for_operand.
1124 OP is an operand in STMT. This function returns a (vector) def that will be
1125 used in the vectorized stmt for STMT.
1127 In the case that OP is an SSA_NAME which is defined in the loop, then
1128 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1130 In case OP is an invariant or constant, a new stmt that creates a vector def
1131 needs to be introduced. */
1134 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1139 stmt_vec_info def_stmt_info = NULL;
1140 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1141 unsigned int nunits;
1142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1148 enum vect_def_type dt;
1152 if (vect_print_dump_info (REPORT_DETAILS))
1154 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1155 print_generic_expr (vect_dump, op, TDF_SLIM);
1158 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1160 gcc_assert (is_simple_use);
1161 if (vect_print_dump_info (REPORT_DETAILS))
1165 fprintf (vect_dump, "def = ");
1166 print_generic_expr (vect_dump, def, TDF_SLIM);
1170 fprintf (vect_dump, " def_stmt = ");
1171 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1177 /* Case 1: operand is a constant. */
1178 case vect_constant_def:
1180 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1181 gcc_assert (vector_type);
1182 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1187 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1188 if (vect_print_dump_info (REPORT_DETAILS))
1189 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1191 vec_cst = build_vector_from_val (vector_type, op);
1192 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1195 /* Case 2: operand is defined outside the loop - loop invariant. */
1196 case vect_external_def:
1198 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1199 gcc_assert (vector_type);
1200 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1205 /* Create 'vec_inv = {inv,inv,..,inv}' */
1206 if (vect_print_dump_info (REPORT_DETAILS))
1207 fprintf (vect_dump, "Create vector_inv.");
1209 for (i = nunits - 1; i >= 0; --i)
1211 t = tree_cons (NULL_TREE, def, t);
1214 /* FIXME: use build_constructor directly. */
1215 vec_inv = build_constructor_from_list (vector_type, t);
1216 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1219 /* Case 3: operand is defined inside the loop. */
1220 case vect_internal_def:
1223 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1225 /* Get the def from the vectorized stmt. */
1226 def_stmt_info = vinfo_for_stmt (def_stmt);
1228 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1229 /* Get vectorized pattern statement. */
1231 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1232 && !STMT_VINFO_RELEVANT (def_stmt_info))
1233 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1234 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1235 gcc_assert (vec_stmt);
1236 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1237 vec_oprnd = PHI_RESULT (vec_stmt);
1238 else if (is_gimple_call (vec_stmt))
1239 vec_oprnd = gimple_call_lhs (vec_stmt);
1241 vec_oprnd = gimple_assign_lhs (vec_stmt);
1245 /* Case 4: operand is defined by a loop header phi - reduction */
1246 case vect_reduction_def:
1247 case vect_double_reduction_def:
1248 case vect_nested_cycle:
1252 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1253 loop = (gimple_bb (def_stmt))->loop_father;
1255 /* Get the def before the loop */
1256 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1257 return get_initial_def_for_reduction (stmt, op, scalar_def);
1260 /* Case 5: operand is defined by loop-header phi - induction. */
1261 case vect_induction_def:
1263 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1265 /* Get the def from the vectorized stmt. */
1266 def_stmt_info = vinfo_for_stmt (def_stmt);
1267 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1268 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1269 vec_oprnd = PHI_RESULT (vec_stmt);
1271 vec_oprnd = gimple_get_lhs (vec_stmt);
1281 /* Function vect_get_vec_def_for_stmt_copy
1283 Return a vector-def for an operand. This function is used when the
1284 vectorized stmt to be created (by the caller to this function) is a "copy"
1285 created in case the vectorized result cannot fit in one vector, and several
1286 copies of the vector-stmt are required. In this case the vector-def is
1287 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1288 of the stmt that defines VEC_OPRND.
1289 DT is the type of the vector def VEC_OPRND.
1292 In case the vectorization factor (VF) is bigger than the number
1293 of elements that can fit in a vectype (nunits), we have to generate
1294 more than one vector stmt to vectorize the scalar stmt. This situation
1295 arises when there are multiple data-types operated upon in the loop; the
1296 smallest data-type determines the VF, and as a result, when vectorizing
1297 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1298 vector stmt (each computing a vector of 'nunits' results, and together
1299 computing 'VF' results in each iteration). This function is called when
1300 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1301 which VF=16 and nunits=4, so the number of copies required is 4):
1303 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1305 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1306 VS1.1: vx.1 = memref1 VS1.2
1307 VS1.2: vx.2 = memref2 VS1.3
1308 VS1.3: vx.3 = memref3
1310 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1311 VSnew.1: vz1 = vx.1 + ... VSnew.2
1312 VSnew.2: vz2 = vx.2 + ... VSnew.3
1313 VSnew.3: vz3 = vx.3 + ...
1315 The vectorization of S1 is explained in vectorizable_load.
1316 The vectorization of S2:
1317 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1318 the function 'vect_get_vec_def_for_operand' is called to
1319 get the relevant vector-def for each operand of S2. For operand x it
1320 returns the vector-def 'vx.0'.
1322 To create the remaining copies of the vector-stmt (VSnew.j), this
1323 function is called to get the relevant vector-def for each operand. It is
1324 obtained from the respective VS1.j stmt, which is recorded in the
1325 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1327 For example, to obtain the vector-def 'vx.1' in order to create the
1328 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1329 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1330 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1331 and return its def ('vx.1').
1332 Overall, to create the above sequence this function will be called 3 times:
1333 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1334 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1335 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1338 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1340 gimple vec_stmt_for_operand;
1341 stmt_vec_info def_stmt_info;
1343 /* Do nothing; can reuse same def. */
1344 if (dt == vect_external_def || dt == vect_constant_def )
1347 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1348 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1349 gcc_assert (def_stmt_info);
1350 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1351 gcc_assert (vec_stmt_for_operand);
1352 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1353 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1354 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1356 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1361 /* Get vectorized definitions for the operands to create a copy of an original
1362 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1365 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1366 VEC(tree,heap) **vec_oprnds0,
1367 VEC(tree,heap) **vec_oprnds1)
1369 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1371 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1372 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1374 if (vec_oprnds1 && *vec_oprnds1)
1376 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1377 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1378 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1383 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1387 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1388 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1392 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1397 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1398 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1399 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1403 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1404 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1405 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1411 /* Function vect_finish_stmt_generation.
1413 Insert a new stmt. */
1416 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1417 gimple_stmt_iterator *gsi)
1419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1421 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1423 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1425 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1427 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1430 if (vect_print_dump_info (REPORT_DETAILS))
1432 fprintf (vect_dump, "add new stmt: ");
1433 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1436 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1439 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1440 a function declaration if the target has a vectorized version
1441 of the function, or NULL_TREE if the function cannot be vectorized. */
1444 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1446 tree fndecl = gimple_call_fndecl (call);
1448 /* We only handle functions that do not read or clobber memory -- i.e.
1449 const or novops ones. */
1450 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1454 || TREE_CODE (fndecl) != FUNCTION_DECL
1455 || !DECL_BUILT_IN (fndecl))
1458 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1462 /* Function vectorizable_call.
1464 Check if STMT performs a function call that can be vectorized.
1465 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1466 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1470 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1475 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1477 tree vectype_out, vectype_in;
1480 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1481 tree fndecl, new_temp, def, rhs_type;
1483 enum vect_def_type dt[3]
1484 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1485 gimple new_stmt = NULL;
1487 VEC(tree, heap) *vargs = NULL;
1488 enum { NARROW, NONE, WIDEN } modifier;
1492 /* FORNOW: unsupported in basic block SLP. */
1493 gcc_assert (loop_vinfo);
1495 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1498 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1501 /* FORNOW: SLP not supported. */
1502 if (STMT_SLP_TYPE (stmt_info))
1505 /* Is STMT a vectorizable call? */
1506 if (!is_gimple_call (stmt))
1509 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1512 if (stmt_can_throw_internal (stmt))
1515 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1517 /* Process function arguments. */
1518 rhs_type = NULL_TREE;
1519 vectype_in = NULL_TREE;
1520 nargs = gimple_call_num_args (stmt);
1522 /* Bail out if the function has more than three arguments, we do not have
1523 interesting builtin functions to vectorize with more than two arguments
1524 except for fma. No arguments is also not good. */
1525 if (nargs == 0 || nargs > 3)
1528 for (i = 0; i < nargs; i++)
1532 op = gimple_call_arg (stmt, i);
1534 /* We can only handle calls with arguments of the same type. */
1536 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1538 if (vect_print_dump_info (REPORT_DETAILS))
1539 fprintf (vect_dump, "argument types differ.");
1543 rhs_type = TREE_TYPE (op);
1545 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1546 &def_stmt, &def, &dt[i], &opvectype))
1548 if (vect_print_dump_info (REPORT_DETAILS))
1549 fprintf (vect_dump, "use not simple.");
1554 vectype_in = opvectype;
1556 && opvectype != vectype_in)
1558 if (vect_print_dump_info (REPORT_DETAILS))
1559 fprintf (vect_dump, "argument vector types differ.");
1563 /* If all arguments are external or constant defs use a vector type with
1564 the same size as the output vector type. */
1566 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1568 gcc_assert (vectype_in);
1571 if (vect_print_dump_info (REPORT_DETAILS))
1573 fprintf (vect_dump, "no vectype for scalar type ");
1574 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1581 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1582 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1583 if (nunits_in == nunits_out / 2)
1585 else if (nunits_out == nunits_in)
1587 else if (nunits_out == nunits_in / 2)
1592 /* For now, we only vectorize functions if a target specific builtin
1593 is available. TODO -- in some cases, it might be profitable to
1594 insert the calls for pieces of the vector, in order to be able
1595 to vectorize other operations in the loop. */
1596 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1597 if (fndecl == NULL_TREE)
1599 if (vect_print_dump_info (REPORT_DETAILS))
1600 fprintf (vect_dump, "function is not vectorizable.");
1605 gcc_assert (!gimple_vuse (stmt));
1607 if (modifier == NARROW)
1608 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1610 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1612 /* Sanity check: make sure that at least one copy of the vectorized stmt
1613 needs to be generated. */
1614 gcc_assert (ncopies >= 1);
1616 if (!vec_stmt) /* transformation not required. */
1618 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1619 if (vect_print_dump_info (REPORT_DETAILS))
1620 fprintf (vect_dump, "=== vectorizable_call ===");
1621 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1627 if (vect_print_dump_info (REPORT_DETAILS))
1628 fprintf (vect_dump, "transform call.");
1631 scalar_dest = gimple_call_lhs (stmt);
1632 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1634 prev_stmt_info = NULL;
1638 for (j = 0; j < ncopies; ++j)
1640 /* Build argument list for the vectorized call. */
1642 vargs = VEC_alloc (tree, heap, nargs);
1644 VEC_truncate (tree, vargs, 0);
1646 for (i = 0; i < nargs; i++)
1648 op = gimple_call_arg (stmt, i);
1651 = vect_get_vec_def_for_operand (op, stmt, NULL);
1654 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1656 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1659 VEC_quick_push (tree, vargs, vec_oprnd0);
1662 new_stmt = gimple_build_call_vec (fndecl, vargs);
1663 new_temp = make_ssa_name (vec_dest, new_stmt);
1664 gimple_call_set_lhs (new_stmt, new_temp);
1666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1667 mark_symbols_for_renaming (new_stmt);
1670 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1672 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1674 prev_stmt_info = vinfo_for_stmt (new_stmt);
1680 for (j = 0; j < ncopies; ++j)
1682 /* Build argument list for the vectorized call. */
1684 vargs = VEC_alloc (tree, heap, nargs * 2);
1686 VEC_truncate (tree, vargs, 0);
1688 for (i = 0; i < nargs; i++)
1690 op = gimple_call_arg (stmt, i);
1694 = vect_get_vec_def_for_operand (op, stmt, NULL);
1696 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1700 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i);
1702 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1704 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1707 VEC_quick_push (tree, vargs, vec_oprnd0);
1708 VEC_quick_push (tree, vargs, vec_oprnd1);
1711 new_stmt = gimple_build_call_vec (fndecl, vargs);
1712 new_temp = make_ssa_name (vec_dest, new_stmt);
1713 gimple_call_set_lhs (new_stmt, new_temp);
1715 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1716 mark_symbols_for_renaming (new_stmt);
1719 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1721 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1723 prev_stmt_info = vinfo_for_stmt (new_stmt);
1726 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1731 /* No current target implements this case. */
1735 VEC_free (tree, heap, vargs);
1737 /* Update the exception handling table with the vector stmt if necessary. */
1738 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1739 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1741 /* The call in STMT might prevent it from being removed in dce.
1742 We however cannot remove it here, due to the way the ssa name
1743 it defines is mapped to the new definition. So just replace
1744 rhs of the statement with something harmless. */
1746 type = TREE_TYPE (scalar_dest);
1747 if (is_pattern_stmt_p (stmt_info))
1748 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1750 lhs = gimple_call_lhs (stmt);
1751 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1752 set_vinfo_for_stmt (new_stmt, stmt_info);
1753 set_vinfo_for_stmt (stmt, NULL);
1754 STMT_VINFO_STMT (stmt_info) = new_stmt;
1755 gsi_replace (gsi, new_stmt, false);
1756 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1762 /* Function vect_gen_widened_results_half
1764 Create a vector stmt whose code, type, number of arguments, and result
1765 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1766 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1767 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1768 needs to be created (DECL is a function-decl of a target-builtin).
1769 STMT is the original scalar stmt that we are vectorizing. */
1772 vect_gen_widened_results_half (enum tree_code code,
1774 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1775 tree vec_dest, gimple_stmt_iterator *gsi,
1781 /* Generate half of the widened result: */
1782 if (code == CALL_EXPR)
1784 /* Target specific support */
1785 if (op_type == binary_op)
1786 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1788 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1789 new_temp = make_ssa_name (vec_dest, new_stmt);
1790 gimple_call_set_lhs (new_stmt, new_temp);
1794 /* Generic support */
1795 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1796 if (op_type != binary_op)
1798 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1800 new_temp = make_ssa_name (vec_dest, new_stmt);
1801 gimple_assign_set_lhs (new_stmt, new_temp);
1803 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1809 /* Check if STMT performs a conversion operation, that can be vectorized.
1810 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1811 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1812 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1815 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1816 gimple *vec_stmt, slp_tree slp_node)
1821 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1822 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1823 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1824 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1825 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1829 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1830 gimple new_stmt = NULL;
1831 stmt_vec_info prev_stmt_info;
1834 tree vectype_out, vectype_in;
1838 enum { NARROW, NONE, WIDEN } modifier;
1840 VEC(tree,heap) *vec_oprnds0 = NULL;
1842 VEC(tree,heap) *dummy = NULL;
1845 /* Is STMT a vectorizable conversion? */
1847 /* FORNOW: unsupported in basic block SLP. */
1848 gcc_assert (loop_vinfo);
1850 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1853 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1856 if (!is_gimple_assign (stmt))
1859 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1862 code = gimple_assign_rhs_code (stmt);
1863 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1866 /* Check types of lhs and rhs. */
1867 scalar_dest = gimple_assign_lhs (stmt);
1868 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1870 op0 = gimple_assign_rhs1 (stmt);
1871 rhs_type = TREE_TYPE (op0);
1872 /* Check the operands of the operation. */
1873 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1874 &def_stmt, &def, &dt[0], &vectype_in))
1876 if (vect_print_dump_info (REPORT_DETAILS))
1877 fprintf (vect_dump, "use not simple.");
1880 /* If op0 is an external or constant defs use a vector type of
1881 the same size as the output vector type. */
1883 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1885 gcc_assert (vectype_in);
1888 if (vect_print_dump_info (REPORT_DETAILS))
1890 fprintf (vect_dump, "no vectype for scalar type ");
1891 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1898 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1899 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1900 if (nunits_in == nunits_out / 2)
1902 else if (nunits_out == nunits_in)
1904 else if (nunits_out == nunits_in / 2)
1909 if (modifier == NARROW)
1910 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1912 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1914 /* Multiple types in SLP are handled by creating the appropriate number of
1915 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1917 if (slp_node || PURE_SLP_STMT (stmt_info))
1920 /* Sanity check: make sure that at least one copy of the vectorized stmt
1921 needs to be generated. */
1922 gcc_assert (ncopies >= 1);
1924 /* Supportable by target? */
1925 if ((modifier == NONE
1926 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1927 || (modifier == WIDEN
1928 && !supportable_widening_operation (code, stmt,
1929 vectype_out, vectype_in,
1932 &dummy_int, &dummy))
1933 || (modifier == NARROW
1934 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1935 &code1, &dummy_int, &dummy)))
1937 if (vect_print_dump_info (REPORT_DETAILS))
1938 fprintf (vect_dump, "conversion not supported by target.");
1942 if (modifier != NONE)
1944 /* FORNOW: SLP not supported. */
1945 if (STMT_SLP_TYPE (stmt_info))
1949 if (!vec_stmt) /* transformation not required. */
1951 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1956 if (vect_print_dump_info (REPORT_DETAILS))
1957 fprintf (vect_dump, "transform conversion.");
1960 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1962 if (modifier == NONE && !slp_node)
1963 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1965 prev_stmt_info = NULL;
1969 for (j = 0; j < ncopies; j++)
1972 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1974 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1977 targetm.vectorize.builtin_conversion (code,
1978 vectype_out, vectype_in);
1979 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1981 /* Arguments are ready. create the new vector stmt. */
1982 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1983 new_temp = make_ssa_name (vec_dest, new_stmt);
1984 gimple_call_set_lhs (new_stmt, new_temp);
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1987 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1991 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1993 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1994 prev_stmt_info = vinfo_for_stmt (new_stmt);
1999 /* In case the vectorization factor (VF) is bigger than the number
2000 of elements that we can fit in a vectype (nunits), we have to
2001 generate more than one vector stmt - i.e - we need to "unroll"
2002 the vector stmt by a factor VF/nunits. */
2003 for (j = 0; j < ncopies; j++)
2006 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2008 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2010 /* Generate first half of the widened result: */
2012 = vect_gen_widened_results_half (code1, decl1,
2013 vec_oprnd0, vec_oprnd1,
2014 unary_op, vec_dest, gsi, stmt);
2016 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2018 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2019 prev_stmt_info = vinfo_for_stmt (new_stmt);
2021 /* Generate second half of the widened result: */
2023 = vect_gen_widened_results_half (code2, decl2,
2024 vec_oprnd0, vec_oprnd1,
2025 unary_op, vec_dest, gsi, stmt);
2026 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2027 prev_stmt_info = vinfo_for_stmt (new_stmt);
2032 /* In case the vectorization factor (VF) is bigger than the number
2033 of elements that we can fit in a vectype (nunits), we have to
2034 generate more than one vector stmt - i.e - we need to "unroll"
2035 the vector stmt by a factor VF/nunits. */
2036 for (j = 0; j < ncopies; j++)
2041 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2042 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2046 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2047 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2050 /* Arguments are ready. Create the new vector stmt. */
2051 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2053 new_temp = make_ssa_name (vec_dest, new_stmt);
2054 gimple_assign_set_lhs (new_stmt, new_temp);
2055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2058 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2060 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2062 prev_stmt_info = vinfo_for_stmt (new_stmt);
2065 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2069 VEC_free (tree, heap, vec_oprnds0);
2075 /* Function vectorizable_assignment.
2077 Check if STMT performs an assignment (copy) that can be vectorized.
2078 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2079 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2080 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2083 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2084 gimple *vec_stmt, slp_tree slp_node)
2089 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2091 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2095 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2096 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2099 VEC(tree,heap) *vec_oprnds = NULL;
2101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2102 gimple new_stmt = NULL;
2103 stmt_vec_info prev_stmt_info = NULL;
2104 enum tree_code code;
2107 /* Multiple types in SLP are handled by creating the appropriate number of
2108 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2110 if (slp_node || PURE_SLP_STMT (stmt_info))
2113 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2115 gcc_assert (ncopies >= 1);
2117 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2120 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2123 /* Is vectorizable assignment? */
2124 if (!is_gimple_assign (stmt))
2127 scalar_dest = gimple_assign_lhs (stmt);
2128 if (TREE_CODE (scalar_dest) != SSA_NAME)
2131 code = gimple_assign_rhs_code (stmt);
2132 if (gimple_assign_single_p (stmt)
2133 || code == PAREN_EXPR
2134 || CONVERT_EXPR_CODE_P (code))
2135 op = gimple_assign_rhs1 (stmt);
2139 if (code == VIEW_CONVERT_EXPR)
2140 op = TREE_OPERAND (op, 0);
2142 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2143 &def_stmt, &def, &dt[0], &vectype_in))
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "use not simple.");
2150 /* We can handle NOP_EXPR conversions that do not change the number
2151 of elements or the vector size. */
2152 if ((CONVERT_EXPR_CODE_P (code)
2153 || code == VIEW_CONVERT_EXPR)
2155 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2156 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2157 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2160 if (!vec_stmt) /* transformation not required. */
2162 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump, "=== vectorizable_assignment ===");
2165 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2170 if (vect_print_dump_info (REPORT_DETAILS))
2171 fprintf (vect_dump, "transform assignment.");
2174 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2177 for (j = 0; j < ncopies; j++)
2181 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2183 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2185 /* Arguments are ready. create the new vector stmt. */
2186 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2188 if (CONVERT_EXPR_CODE_P (code)
2189 || code == VIEW_CONVERT_EXPR)
2190 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2191 new_stmt = gimple_build_assign (vec_dest, vop);
2192 new_temp = make_ssa_name (vec_dest, new_stmt);
2193 gimple_assign_set_lhs (new_stmt, new_temp);
2194 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2196 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2203 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2205 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2207 prev_stmt_info = vinfo_for_stmt (new_stmt);
2210 VEC_free (tree, heap, vec_oprnds);
2215 /* Function vectorizable_shift.
2217 Check if STMT performs a shift operation that can be vectorized.
2218 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2219 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2220 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2223 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2224 gimple *vec_stmt, slp_tree slp_node)
2228 tree op0, op1 = NULL;
2229 tree vec_oprnd1 = NULL_TREE;
2230 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2233 enum tree_code code;
2234 enum machine_mode vec_mode;
2238 enum machine_mode optab_op2_mode;
2241 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2242 gimple new_stmt = NULL;
2243 stmt_vec_info prev_stmt_info;
2249 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2252 bool scalar_shift_arg = true;
2253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2256 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2259 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2262 /* Is STMT a vectorizable binary/unary operation? */
2263 if (!is_gimple_assign (stmt))
2266 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2269 code = gimple_assign_rhs_code (stmt);
2271 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2272 || code == RROTATE_EXPR))
2275 scalar_dest = gimple_assign_lhs (stmt);
2276 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2278 op0 = gimple_assign_rhs1 (stmt);
2279 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2280 &def_stmt, &def, &dt[0], &vectype))
2282 if (vect_print_dump_info (REPORT_DETAILS))
2283 fprintf (vect_dump, "use not simple.");
2286 /* If op0 is an external or constant def use a vector type with
2287 the same size as the output vector type. */
2289 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2291 gcc_assert (vectype);
2294 if (vect_print_dump_info (REPORT_DETAILS))
2296 fprintf (vect_dump, "no vectype for scalar type ");
2297 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2303 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2304 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2305 if (nunits_out != nunits_in)
2308 op1 = gimple_assign_rhs2 (stmt);
2309 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2311 if (vect_print_dump_info (REPORT_DETAILS))
2312 fprintf (vect_dump, "use not simple.");
2317 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2321 /* Multiple types in SLP are handled by creating the appropriate number of
2322 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2324 if (slp_node || PURE_SLP_STMT (stmt_info))
2327 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2329 gcc_assert (ncopies >= 1);
2331 /* Determine whether the shift amount is a vector, or scalar. If the
2332 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2334 if (dt[1] == vect_internal_def && !slp_node)
2335 scalar_shift_arg = false;
2336 else if (dt[1] == vect_constant_def
2337 || dt[1] == vect_external_def
2338 || dt[1] == vect_internal_def)
2340 /* In SLP, need to check whether the shift count is the same,
2341 in loops if it is a constant or invariant, it is always
2345 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2348 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2349 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2350 scalar_shift_arg = false;
2355 if (vect_print_dump_info (REPORT_DETAILS))
2356 fprintf (vect_dump, "operand mode requires invariant argument.");
2360 /* Vector shifted by vector. */
2361 if (!scalar_shift_arg)
2363 optab = optab_for_tree_code (code, vectype, optab_vector);
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "vector/vector shift/rotate found.");
2367 /* See if the machine has a vector shifted by scalar insn and if not
2368 then see if it has a vector shifted by vector insn. */
2371 optab = optab_for_tree_code (code, vectype, optab_scalar);
2373 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2375 if (vect_print_dump_info (REPORT_DETAILS))
2376 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2380 optab = optab_for_tree_code (code, vectype, optab_vector);
2382 && (optab_handler (optab, TYPE_MODE (vectype))
2383 != CODE_FOR_nothing))
2385 scalar_shift_arg = false;
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "vector/vector shift/rotate found.");
2390 /* Unlike the other binary operators, shifts/rotates have
2391 the rhs being int, instead of the same type as the lhs,
2392 so make sure the scalar is the right type if we are
2393 dealing with vectors of short/char. */
2394 if (dt[1] == vect_constant_def)
2395 op1 = fold_convert (TREE_TYPE (vectype), op1);
2400 /* Supportable by target? */
2403 if (vect_print_dump_info (REPORT_DETAILS))
2404 fprintf (vect_dump, "no optab.");
2407 vec_mode = TYPE_MODE (vectype);
2408 icode = (int) optab_handler (optab, vec_mode);
2409 if (icode == CODE_FOR_nothing)
2411 if (vect_print_dump_info (REPORT_DETAILS))
2412 fprintf (vect_dump, "op not supported by target.");
2413 /* Check only during analysis. */
2414 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2415 || (vf < vect_min_worthwhile_factor (code)
2418 if (vect_print_dump_info (REPORT_DETAILS))
2419 fprintf (vect_dump, "proceeding using word mode.");
2422 /* Worthwhile without SIMD support? Check only during analysis. */
2423 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2424 && vf < vect_min_worthwhile_factor (code)
2427 if (vect_print_dump_info (REPORT_DETAILS))
2428 fprintf (vect_dump, "not worthwhile without SIMD support.");
2432 if (!vec_stmt) /* transformation not required. */
2434 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2435 if (vect_print_dump_info (REPORT_DETAILS))
2436 fprintf (vect_dump, "=== vectorizable_shift ===");
2437 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2443 if (vect_print_dump_info (REPORT_DETAILS))
2444 fprintf (vect_dump, "transform binary/unary operation.");
2447 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2449 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2450 created in the previous stages of the recursion, so no allocation is
2451 needed, except for the case of shift with scalar shift argument. In that
2452 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2453 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2454 In case of loop-based vectorization we allocate VECs of size 1. We
2455 allocate VEC_OPRNDS1 only in case of binary operation. */
2458 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2459 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2461 else if (scalar_shift_arg)
2462 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2464 prev_stmt_info = NULL;
2465 for (j = 0; j < ncopies; j++)
2470 if (scalar_shift_arg)
2472 /* Vector shl and shr insn patterns can be defined with scalar
2473 operand 2 (shift operand). In this case, use constant or loop
2474 invariant op1 directly, without extending it to vector mode
2476 optab_op2_mode = insn_data[icode].operand[2].mode;
2477 if (!VECTOR_MODE_P (optab_op2_mode))
2479 if (vect_print_dump_info (REPORT_DETAILS))
2480 fprintf (vect_dump, "operand 1 using scalar mode.");
2482 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2485 /* Store vec_oprnd1 for every vector stmt to be created
2486 for SLP_NODE. We check during the analysis that all
2487 the shift arguments are the same.
2488 TODO: Allow different constants for different vector
2489 stmts generated for an SLP instance. */
2490 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2491 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2496 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2497 (a special case for certain kind of vector shifts); otherwise,
2498 operand 1 should be of a vector type (the usual case). */
2500 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2503 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2507 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2509 /* Arguments are ready. Create the new vector stmt. */
2510 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2512 vop1 = VEC_index (tree, vec_oprnds1, i);
2513 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2514 new_temp = make_ssa_name (vec_dest, new_stmt);
2515 gimple_assign_set_lhs (new_stmt, new_temp);
2516 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2518 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2525 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2527 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2528 prev_stmt_info = vinfo_for_stmt (new_stmt);
2531 VEC_free (tree, heap, vec_oprnds0);
2532 VEC_free (tree, heap, vec_oprnds1);
2538 /* Function vectorizable_operation.
2540 Check if STMT performs a binary, unary or ternary operation that can
2542 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2543 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2544 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2547 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2548 gimple *vec_stmt, slp_tree slp_node)
2552 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2553 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2555 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2556 enum tree_code code;
2557 enum machine_mode vec_mode;
2564 enum vect_def_type dt[3]
2565 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2566 gimple new_stmt = NULL;
2567 stmt_vec_info prev_stmt_info;
2573 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2574 tree vop0, vop1, vop2;
2575 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2578 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2581 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2584 /* Is STMT a vectorizable binary/unary operation? */
2585 if (!is_gimple_assign (stmt))
2588 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2591 code = gimple_assign_rhs_code (stmt);
2593 /* For pointer addition, we should use the normal plus for
2594 the vector addition. */
2595 if (code == POINTER_PLUS_EXPR)
2598 /* Support only unary or binary operations. */
2599 op_type = TREE_CODE_LENGTH (code);
2600 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2602 if (vect_print_dump_info (REPORT_DETAILS))
2603 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2608 scalar_dest = gimple_assign_lhs (stmt);
2609 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2611 op0 = gimple_assign_rhs1 (stmt);
2612 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2613 &def_stmt, &def, &dt[0], &vectype))
2615 if (vect_print_dump_info (REPORT_DETAILS))
2616 fprintf (vect_dump, "use not simple.");
2619 /* If op0 is an external or constant def use a vector type with
2620 the same size as the output vector type. */
2622 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2624 gcc_assert (vectype);
2627 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "no vectype for scalar type ");
2630 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2636 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2637 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2638 if (nunits_out != nunits_in)
2641 if (op_type == binary_op || op_type == ternary_op)
2643 op1 = gimple_assign_rhs2 (stmt);
2644 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2647 if (vect_print_dump_info (REPORT_DETAILS))
2648 fprintf (vect_dump, "use not simple.");
2652 if (op_type == ternary_op)
2654 op2 = gimple_assign_rhs3 (stmt);
2655 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2658 if (vect_print_dump_info (REPORT_DETAILS))
2659 fprintf (vect_dump, "use not simple.");
2665 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2669 /* Multiple types in SLP are handled by creating the appropriate number of
2670 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2672 if (slp_node || PURE_SLP_STMT (stmt_info))
2675 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2677 gcc_assert (ncopies >= 1);
2679 /* Shifts are handled in vectorizable_shift (). */
2680 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2681 || code == RROTATE_EXPR)
2684 optab = optab_for_tree_code (code, vectype, optab_default);
2686 /* Supportable by target? */
2689 if (vect_print_dump_info (REPORT_DETAILS))
2690 fprintf (vect_dump, "no optab.");
2693 vec_mode = TYPE_MODE (vectype);
2694 icode = (int) optab_handler (optab, vec_mode);
2695 if (icode == CODE_FOR_nothing)
2697 if (vect_print_dump_info (REPORT_DETAILS))
2698 fprintf (vect_dump, "op not supported by target.");
2699 /* Check only during analysis. */
2700 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2701 || (vf < vect_min_worthwhile_factor (code)
2704 if (vect_print_dump_info (REPORT_DETAILS))
2705 fprintf (vect_dump, "proceeding using word mode.");
2708 /* Worthwhile without SIMD support? Check only during analysis. */
2709 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2710 && vf < vect_min_worthwhile_factor (code)
2713 if (vect_print_dump_info (REPORT_DETAILS))
2714 fprintf (vect_dump, "not worthwhile without SIMD support.");
2718 if (!vec_stmt) /* transformation not required. */
2720 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2721 if (vect_print_dump_info (REPORT_DETAILS))
2722 fprintf (vect_dump, "=== vectorizable_operation ===");
2723 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2729 if (vect_print_dump_info (REPORT_DETAILS))
2730 fprintf (vect_dump, "transform binary/unary operation.");
2733 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2735 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2736 created in the previous stages of the recursion, so no allocation is
2737 needed, except for the case of shift with scalar shift argument. In that
2738 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2739 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2740 In case of loop-based vectorization we allocate VECs of size 1. We
2741 allocate VEC_OPRNDS1 only in case of binary operation. */
2744 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2745 if (op_type == binary_op || op_type == ternary_op)
2746 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2747 if (op_type == ternary_op)
2748 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2751 /* In case the vectorization factor (VF) is bigger than the number
2752 of elements that we can fit in a vectype (nunits), we have to generate
2753 more than one vector stmt - i.e - we need to "unroll" the
2754 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2755 from one copy of the vector stmt to the next, in the field
2756 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2757 stages to find the correct vector defs to be used when vectorizing
2758 stmts that use the defs of the current stmt. The example below
2759 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2760 we need to create 4 vectorized stmts):
2762 before vectorization:
2763 RELATED_STMT VEC_STMT
2767 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2769 RELATED_STMT VEC_STMT
2770 VS1_0: vx0 = memref0 VS1_1 -
2771 VS1_1: vx1 = memref1 VS1_2 -
2772 VS1_2: vx2 = memref2 VS1_3 -
2773 VS1_3: vx3 = memref3 - -
2774 S1: x = load - VS1_0
2777 step2: vectorize stmt S2 (done here):
2778 To vectorize stmt S2 we first need to find the relevant vector
2779 def for the first operand 'x'. This is, as usual, obtained from
2780 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2781 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2782 relevant vector def 'vx0'. Having found 'vx0' we can generate
2783 the vector stmt VS2_0, and as usual, record it in the
2784 STMT_VINFO_VEC_STMT of stmt S2.
2785 When creating the second copy (VS2_1), we obtain the relevant vector
2786 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2787 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2788 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2789 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2790 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2791 chain of stmts and pointers:
2792 RELATED_STMT VEC_STMT
2793 VS1_0: vx0 = memref0 VS1_1 -
2794 VS1_1: vx1 = memref1 VS1_2 -
2795 VS1_2: vx2 = memref2 VS1_3 -
2796 VS1_3: vx3 = memref3 - -
2797 S1: x = load - VS1_0
2798 VS2_0: vz0 = vx0 + v1 VS2_1 -
2799 VS2_1: vz1 = vx1 + v1 VS2_2 -
2800 VS2_2: vz2 = vx2 + v1 VS2_3 -
2801 VS2_3: vz3 = vx3 + v1 - -
2802 S2: z = x + 1 - VS2_0 */
2804 prev_stmt_info = NULL;
2805 for (j = 0; j < ncopies; j++)
2810 if (op_type == binary_op || op_type == ternary_op)
2811 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2814 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2816 if (op_type == ternary_op)
2818 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2819 VEC_quick_push (tree, vec_oprnds2,
2820 vect_get_vec_def_for_operand (op2, stmt, NULL));
2825 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2826 if (op_type == ternary_op)
2828 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2829 VEC_quick_push (tree, vec_oprnds2,
2830 vect_get_vec_def_for_stmt_copy (dt[2],
2835 /* Arguments are ready. Create the new vector stmt. */
2836 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2838 vop1 = ((op_type == binary_op || op_type == ternary_op)
2839 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2840 vop2 = ((op_type == ternary_op)
2841 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2842 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2844 new_temp = make_ssa_name (vec_dest, new_stmt);
2845 gimple_assign_set_lhs (new_stmt, new_temp);
2846 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2848 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2855 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2857 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2858 prev_stmt_info = vinfo_for_stmt (new_stmt);
2861 VEC_free (tree, heap, vec_oprnds0);
2863 VEC_free (tree, heap, vec_oprnds1);
2865 VEC_free (tree, heap, vec_oprnds2);
2871 /* Get vectorized definitions for loop-based vectorization. For the first
2872 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2873 scalar operand), and for the rest we get a copy with
2874 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2875 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2876 The vectors are collected into VEC_OPRNDS. */
2879 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2880 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2884 /* Get first vector operand. */
2885 /* All the vector operands except the very first one (that is scalar oprnd)
2887 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2888 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2890 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2892 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2894 /* Get second vector operand. */
2895 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2896 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2900 /* For conversion in multiple steps, continue to get operands
2903 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2907 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2908 For multi-step conversions store the resulting vectors and call the function
2912 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2913 int multi_step_cvt, gimple stmt,
2914 VEC (tree, heap) *vec_dsts,
2915 gimple_stmt_iterator *gsi,
2916 slp_tree slp_node, enum tree_code code,
2917 stmt_vec_info *prev_stmt_info)
2920 tree vop0, vop1, new_tmp, vec_dest;
2922 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2924 vec_dest = VEC_pop (tree, vec_dsts);
2926 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2928 /* Create demotion operation. */
2929 vop0 = VEC_index (tree, *vec_oprnds, i);
2930 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2931 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2932 new_tmp = make_ssa_name (vec_dest, new_stmt);
2933 gimple_assign_set_lhs (new_stmt, new_tmp);
2934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2937 /* Store the resulting vector for next recursive call. */
2938 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2941 /* This is the last step of the conversion sequence. Store the
2942 vectors in SLP_NODE or in vector info of the scalar statement
2943 (or in STMT_VINFO_RELATED_STMT chain). */
2945 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2948 if (!*prev_stmt_info)
2949 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2951 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2953 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2958 /* For multi-step demotion operations we first generate demotion operations
2959 from the source type to the intermediate types, and then combine the
2960 results (stored in VEC_OPRNDS) in demotion operation to the destination
2964 /* At each level of recursion we have have of the operands we had at the
2966 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2967 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2968 stmt, vec_dsts, gsi, slp_node,
2969 code, prev_stmt_info);
2974 /* Function vectorizable_type_demotion
2976 Check if STMT performs a binary or unary operation that involves
2977 type demotion, and if it can be vectorized.
2978 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2979 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2980 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2983 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2984 gimple *vec_stmt, slp_tree slp_node)
2989 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2990 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2991 enum tree_code code, code1 = ERROR_MARK;
2994 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2995 stmt_vec_info prev_stmt_info;
3002 int multi_step_cvt = 0;
3003 VEC (tree, heap) *vec_oprnds0 = NULL;
3004 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3005 tree last_oprnd, intermediate_type;
3007 /* FORNOW: not supported by basic block SLP vectorization. */
3008 gcc_assert (loop_vinfo);
3010 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3013 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3016 /* Is STMT a vectorizable type-demotion operation? */
3017 if (!is_gimple_assign (stmt))
3020 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3023 code = gimple_assign_rhs_code (stmt);
3024 if (!CONVERT_EXPR_CODE_P (code))
3027 scalar_dest = gimple_assign_lhs (stmt);
3028 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3030 /* Check the operands of the operation. */
3031 op0 = gimple_assign_rhs1 (stmt);
3032 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3033 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3034 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3035 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3036 && CONVERT_EXPR_CODE_P (code))))
3038 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
3039 &def_stmt, &def, &dt[0], &vectype_in))
3041 if (vect_print_dump_info (REPORT_DETAILS))
3042 fprintf (vect_dump, "use not simple.");
3045 /* If op0 is an external def use a vector type with the
3046 same size as the output vector type if possible. */
3048 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3050 gcc_assert (vectype_in);
3053 if (vect_print_dump_info (REPORT_DETAILS))
3055 fprintf (vect_dump, "no vectype for scalar type ");
3056 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3062 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3063 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3064 if (nunits_in >= nunits_out)
3067 /* Multiple types in SLP are handled by creating the appropriate number of
3068 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3070 if (slp_node || PURE_SLP_STMT (stmt_info))
3073 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3074 gcc_assert (ncopies >= 1);
3076 /* Supportable by target? */
3077 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3078 &code1, &multi_step_cvt, &interm_types))
3081 if (!vec_stmt) /* transformation not required. */
3083 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3084 if (vect_print_dump_info (REPORT_DETAILS))
3085 fprintf (vect_dump, "=== vectorizable_demotion ===");
3086 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3091 if (vect_print_dump_info (REPORT_DETAILS))
3092 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3095 /* In case of multi-step demotion, we first generate demotion operations to
3096 the intermediate types, and then from that types to the final one.
3097 We create vector destinations for the intermediate type (TYPES) received
3098 from supportable_narrowing_operation, and store them in the correct order
3099 for future use in vect_create_vectorized_demotion_stmts(). */
3101 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3103 vec_dsts = VEC_alloc (tree, heap, 1);
3105 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3106 VEC_quick_push (tree, vec_dsts, vec_dest);
3110 for (i = VEC_length (tree, interm_types) - 1;
3111 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3113 vec_dest = vect_create_destination_var (scalar_dest,
3115 VEC_quick_push (tree, vec_dsts, vec_dest);
3119 /* In case the vectorization factor (VF) is bigger than the number
3120 of elements that we can fit in a vectype (nunits), we have to generate
3121 more than one vector stmt - i.e - we need to "unroll" the
3122 vector stmt by a factor VF/nunits. */
3124 prev_stmt_info = NULL;
3125 for (j = 0; j < ncopies; j++)
3129 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3132 VEC_free (tree, heap, vec_oprnds0);
3133 vec_oprnds0 = VEC_alloc (tree, heap,
3134 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3135 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3136 vect_pow2 (multi_step_cvt) - 1);
3139 /* Arguments are ready. Create the new vector stmts. */
3140 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3141 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3142 multi_step_cvt, stmt, tmp_vec_dsts,
3143 gsi, slp_node, code1,
3147 VEC_free (tree, heap, vec_oprnds0);
3148 VEC_free (tree, heap, vec_dsts);
3149 VEC_free (tree, heap, tmp_vec_dsts);
3150 VEC_free (tree, heap, interm_types);
3152 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3157 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3158 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3159 the resulting vectors and call the function recursively. */
3162 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3163 VEC (tree, heap) **vec_oprnds1,
3164 int multi_step_cvt, gimple stmt,
3165 VEC (tree, heap) *vec_dsts,
3166 gimple_stmt_iterator *gsi,
3167 slp_tree slp_node, enum tree_code code1,
3168 enum tree_code code2, tree decl1,
3169 tree decl2, int op_type,
3170 stmt_vec_info *prev_stmt_info)
3173 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3174 gimple new_stmt1, new_stmt2;
3175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3176 VEC (tree, heap) *vec_tmp;
3178 vec_dest = VEC_pop (tree, vec_dsts);
3179 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3181 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3183 if (op_type == binary_op)
3184 vop1 = VEC_index (tree, *vec_oprnds1, i);
3188 /* Generate the two halves of promotion operation. */
3189 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3190 op_type, vec_dest, gsi, stmt);
3191 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3192 op_type, vec_dest, gsi, stmt);
3193 if (is_gimple_call (new_stmt1))
3195 new_tmp1 = gimple_call_lhs (new_stmt1);
3196 new_tmp2 = gimple_call_lhs (new_stmt2);
3200 new_tmp1 = gimple_assign_lhs (new_stmt1);
3201 new_tmp2 = gimple_assign_lhs (new_stmt2);
3206 /* Store the results for the recursive call. */
3207 VEC_quick_push (tree, vec_tmp, new_tmp1);
3208 VEC_quick_push (tree, vec_tmp, new_tmp2);
3212 /* Last step of promotion sequience - store the results. */
3215 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3216 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3220 if (!*prev_stmt_info)
3221 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3223 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3225 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3226 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3227 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3234 /* For multi-step promotion operation we first generate we call the
3235 function recurcively for every stage. We start from the input type,
3236 create promotion operations to the intermediate types, and then
3237 create promotions to the output type. */
3238 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3239 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3240 multi_step_cvt - 1, stmt,
3241 vec_dsts, gsi, slp_node, code1,
3242 code2, decl2, decl2, op_type,
3246 VEC_free (tree, heap, vec_tmp);
3250 /* Function vectorizable_type_promotion
3252 Check if STMT performs a binary or unary operation that involves
3253 type promotion, and if it can be vectorized.
3254 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3255 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3256 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3259 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3260 gimple *vec_stmt, slp_tree slp_node)
3264 tree op0, op1 = NULL;
3265 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3267 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3268 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3269 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3273 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3274 stmt_vec_info prev_stmt_info;
3281 tree intermediate_type = NULL_TREE;
3282 int multi_step_cvt = 0;
3283 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3284 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3286 /* FORNOW: not supported by basic block SLP vectorization. */
3287 gcc_assert (loop_vinfo);
3289 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3292 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3295 /* Is STMT a vectorizable type-promotion operation? */
3296 if (!is_gimple_assign (stmt))
3299 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3302 code = gimple_assign_rhs_code (stmt);
3303 if (!CONVERT_EXPR_CODE_P (code)
3304 && code != WIDEN_MULT_EXPR)
3307 scalar_dest = gimple_assign_lhs (stmt);
3308 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3310 /* Check the operands of the operation. */
3311 op0 = gimple_assign_rhs1 (stmt);
3312 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3313 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3314 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3315 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3316 && CONVERT_EXPR_CODE_P (code))))
3318 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
3319 &def_stmt, &def, &dt[0], &vectype_in))
3321 if (vect_print_dump_info (REPORT_DETAILS))
3322 fprintf (vect_dump, "use not simple.");
3326 op_type = TREE_CODE_LENGTH (code);
3327 if (op_type == binary_op)
3331 op1 = gimple_assign_rhs2 (stmt);
3332 if (code == WIDEN_MULT_EXPR)
3334 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3336 if (CONSTANT_CLASS_P (op0))
3337 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3338 &def_stmt, &def, &dt[1], &vectype_in);
3340 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3345 if (vect_print_dump_info (REPORT_DETAILS))
3346 fprintf (vect_dump, "use not simple.");
3352 /* If op0 is an external or constant def use a vector type with
3353 the same size as the output vector type. */
3355 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3357 gcc_assert (vectype_in);
3360 if (vect_print_dump_info (REPORT_DETAILS))
3362 fprintf (vect_dump, "no vectype for scalar type ");
3363 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3369 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3370 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3371 if (nunits_in <= nunits_out)
3374 /* Multiple types in SLP are handled by creating the appropriate number of
3375 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3377 if (slp_node || PURE_SLP_STMT (stmt_info))
3380 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3382 gcc_assert (ncopies >= 1);
3384 /* Supportable by target? */
3385 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3386 &decl1, &decl2, &code1, &code2,
3387 &multi_step_cvt, &interm_types))
3390 /* Binary widening operation can only be supported directly by the
3392 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3394 if (!vec_stmt) /* transformation not required. */
3396 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3397 if (vect_print_dump_info (REPORT_DETAILS))
3398 fprintf (vect_dump, "=== vectorizable_promotion ===");
3399 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3405 if (vect_print_dump_info (REPORT_DETAILS))
3406 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3409 if (code == WIDEN_MULT_EXPR)
3411 if (CONSTANT_CLASS_P (op0))
3412 op0 = fold_convert (TREE_TYPE (op1), op0);
3413 else if (CONSTANT_CLASS_P (op1))
3414 op1 = fold_convert (TREE_TYPE (op0), op1);
3418 /* In case of multi-step promotion, we first generate promotion operations
3419 to the intermediate types, and then from that types to the final one.
3420 We store vector destination in VEC_DSTS in the correct order for
3421 recursive creation of promotion operations in
3422 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3423 according to TYPES recieved from supportable_widening_operation(). */
3425 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3427 vec_dsts = VEC_alloc (tree, heap, 1);
3429 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3430 VEC_quick_push (tree, vec_dsts, vec_dest);
3434 for (i = VEC_length (tree, interm_types) - 1;
3435 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3437 vec_dest = vect_create_destination_var (scalar_dest,
3439 VEC_quick_push (tree, vec_dsts, vec_dest);
3445 vec_oprnds0 = VEC_alloc (tree, heap,
3446 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3447 if (op_type == binary_op)
3448 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3451 /* In case the vectorization factor (VF) is bigger than the number
3452 of elements that we can fit in a vectype (nunits), we have to generate
3453 more than one vector stmt - i.e - we need to "unroll" the
3454 vector stmt by a factor VF/nunits. */
3456 prev_stmt_info = NULL;
3457 for (j = 0; j < ncopies; j++)
3463 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3467 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3468 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3469 if (op_type == binary_op)
3471 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3472 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3478 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3479 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3480 if (op_type == binary_op)
3482 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3483 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3487 /* Arguments are ready. Create the new vector stmts. */
3488 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3489 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3490 multi_step_cvt, stmt,
3492 gsi, slp_node, code1, code2,
3493 decl1, decl2, op_type,
3497 VEC_free (tree, heap, vec_dsts);
3498 VEC_free (tree, heap, tmp_vec_dsts);
3499 VEC_free (tree, heap, interm_types);
3500 VEC_free (tree, heap, vec_oprnds0);
3501 VEC_free (tree, heap, vec_oprnds1);
3503 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3508 /* Function vectorizable_store.
3510 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3512 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3513 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3514 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3517 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3523 tree vec_oprnd = NULL_TREE;
3524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3525 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3526 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3528 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3529 struct loop *loop = NULL;
3530 enum machine_mode vec_mode;
3532 enum dr_alignment_support alignment_support_scheme;
3535 enum vect_def_type dt;
3536 stmt_vec_info prev_stmt_info = NULL;
3537 tree dataref_ptr = NULL_TREE;
3538 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3541 gimple next_stmt, first_stmt = NULL;
3542 bool strided_store = false;
3543 bool store_lanes_p = false;
3544 unsigned int group_size, i;
3545 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3547 VEC(tree,heap) *vec_oprnds = NULL;
3548 bool slp = (slp_node != NULL);
3549 unsigned int vec_num;
3550 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3554 loop = LOOP_VINFO_LOOP (loop_vinfo);
3556 /* Multiple types in SLP are handled by creating the appropriate number of
3557 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3559 if (slp || PURE_SLP_STMT (stmt_info))
3562 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3564 gcc_assert (ncopies >= 1);
3566 /* FORNOW. This restriction should be relaxed. */
3567 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3569 if (vect_print_dump_info (REPORT_DETAILS))
3570 fprintf (vect_dump, "multiple types in nested loop.");
3574 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3577 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3580 /* Is vectorizable store? */
3582 if (!is_gimple_assign (stmt))
3585 scalar_dest = gimple_assign_lhs (stmt);
3586 if (TREE_CODE (scalar_dest) != ARRAY_REF
3587 && TREE_CODE (scalar_dest) != INDIRECT_REF
3588 && TREE_CODE (scalar_dest) != COMPONENT_REF
3589 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3590 && TREE_CODE (scalar_dest) != REALPART_EXPR
3591 && TREE_CODE (scalar_dest) != MEM_REF)
3594 gcc_assert (gimple_assign_single_p (stmt));
3595 op = gimple_assign_rhs1 (stmt);
3596 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3598 if (vect_print_dump_info (REPORT_DETAILS))
3599 fprintf (vect_dump, "use not simple.");
3603 /* The scalar rhs type needs to be trivially convertible to the vector
3604 component type. This should always be the case. */
3605 elem_type = TREE_TYPE (vectype);
3606 if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
3608 if (vect_print_dump_info (REPORT_DETAILS))
3609 fprintf (vect_dump, "??? operands of different types");
3613 vec_mode = TYPE_MODE (vectype);
3614 /* FORNOW. In some cases can vectorize even if data-type not supported
3615 (e.g. - array initialization with 0). */
3616 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3619 if (!STMT_VINFO_DATA_REF (stmt_info))
3622 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3624 if (vect_print_dump_info (REPORT_DETAILS))
3625 fprintf (vect_dump, "negative step for store.");
3629 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3631 strided_store = true;
3632 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3633 if (!slp && !PURE_SLP_STMT (stmt_info))
3635 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3636 if (vect_store_lanes_supported (vectype, group_size))
3637 store_lanes_p = true;
3638 else if (!vect_strided_store_supported (vectype, group_size))
3642 if (first_stmt == stmt)
3644 /* STMT is the leader of the group. Check the operands of all the
3645 stmts of the group. */
3646 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3649 gcc_assert (gimple_assign_single_p (next_stmt));
3650 op = gimple_assign_rhs1 (next_stmt);
3651 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3654 if (vect_print_dump_info (REPORT_DETAILS))
3655 fprintf (vect_dump, "use not simple.");
3658 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3663 if (!vec_stmt) /* transformation not required. */
3665 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3666 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3674 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3675 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3677 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3680 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3682 /* We vectorize all the stmts of the interleaving group when we
3683 reach the last stmt in the group. */
3684 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3685 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3694 strided_store = false;
3695 /* VEC_NUM is the number of vect stmts to be created for this
3697 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3698 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3699 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3702 /* VEC_NUM is the number of vect stmts to be created for this
3704 vec_num = group_size;
3710 group_size = vec_num = 1;
3713 if (vect_print_dump_info (REPORT_DETAILS))
3714 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3716 dr_chain = VEC_alloc (tree, heap, group_size);
3717 oprnds = VEC_alloc (tree, heap, group_size);
3719 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3720 gcc_assert (alignment_support_scheme);
3721 /* Targets with store-lane instructions must not require explicit
3723 gcc_assert (!store_lanes_p
3724 || alignment_support_scheme == dr_aligned
3725 || alignment_support_scheme == dr_unaligned_supported);
3728 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3730 aggr_type = vectype;
3732 /* In case the vectorization factor (VF) is bigger than the number
3733 of elements that we can fit in a vectype (nunits), we have to generate
3734 more than one vector stmt - i.e - we need to "unroll" the
3735 vector stmt by a factor VF/nunits. For more details see documentation in
3736 vect_get_vec_def_for_copy_stmt. */
3738 /* In case of interleaving (non-unit strided access):
3745 We create vectorized stores starting from base address (the access of the
3746 first stmt in the chain (S2 in the above example), when the last store stmt
3747 of the chain (S4) is reached:
3750 VS2: &base + vec_size*1 = vx0
3751 VS3: &base + vec_size*2 = vx1
3752 VS4: &base + vec_size*3 = vx3
3754 Then permutation statements are generated:
3756 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3757 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3760 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3761 (the order of the data-refs in the output of vect_permute_store_chain
3762 corresponds to the order of scalar stmts in the interleaving chain - see
3763 the documentation of vect_permute_store_chain()).
3765 In case of both multiple types and interleaving, above vector stores and
3766 permutation stmts are created for every copy. The result vector stmts are
3767 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3768 STMT_VINFO_RELATED_STMT for the next copies.
3771 prev_stmt_info = NULL;
3772 for (j = 0; j < ncopies; j++)
3781 /* Get vectorized arguments for SLP_NODE. */
3782 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3785 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3789 /* For interleaved stores we collect vectorized defs for all the
3790 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3791 used as an input to vect_permute_store_chain(), and OPRNDS as
3792 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3794 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3795 OPRNDS are of size 1. */
3796 next_stmt = first_stmt;
3797 for (i = 0; i < group_size; i++)
3799 /* Since gaps are not supported for interleaved stores,
3800 GROUP_SIZE is the exact number of stmts in the chain.
3801 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3802 there is no interleaving, GROUP_SIZE is 1, and only one
3803 iteration of the loop will be executed. */
3804 gcc_assert (next_stmt
3805 && gimple_assign_single_p (next_stmt));
3806 op = gimple_assign_rhs1 (next_stmt);
3808 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3810 VEC_quick_push(tree, dr_chain, vec_oprnd);
3811 VEC_quick_push(tree, oprnds, vec_oprnd);
3812 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3816 /* We should have catched mismatched types earlier. */
3817 gcc_assert (useless_type_conversion_p (vectype,
3818 TREE_TYPE (vec_oprnd)));
3819 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3820 NULL_TREE, &dummy, gsi,
3821 &ptr_incr, false, &inv_p);
3822 gcc_assert (bb_vinfo || !inv_p);
3826 /* For interleaved stores we created vectorized defs for all the
3827 defs stored in OPRNDS in the previous iteration (previous copy).
3828 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3829 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3831 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3832 OPRNDS are of size 1. */
3833 for (i = 0; i < group_size; i++)
3835 op = VEC_index (tree, oprnds, i);
3836 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3838 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3839 VEC_replace(tree, dr_chain, i, vec_oprnd);
3840 VEC_replace(tree, oprnds, i, vec_oprnd);
3842 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3843 TYPE_SIZE_UNIT (aggr_type));
3850 /* Combine all the vectors into an array. */
3851 vec_array = create_vector_array (vectype, vec_num);
3852 for (i = 0; i < vec_num; i++)
3854 vec_oprnd = VEC_index (tree, dr_chain, i);
3855 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3859 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3860 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3861 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3862 gimple_call_set_lhs (new_stmt, data_ref);
3863 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3864 mark_symbols_for_renaming (new_stmt);
3871 result_chain = VEC_alloc (tree, heap, group_size);
3873 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3877 next_stmt = first_stmt;
3878 for (i = 0; i < vec_num; i++)
3880 struct ptr_info_def *pi;
3883 /* Bump the vector pointer. */
3884 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3888 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3889 else if (strided_store)
3890 /* For strided stores vectorized defs are interleaved in
3891 vect_permute_store_chain(). */
3892 vec_oprnd = VEC_index (tree, result_chain, i);
3894 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3895 build_int_cst (reference_alias_ptr_type
3896 (DR_REF (first_dr)), 0));
3897 pi = get_ptr_info (dataref_ptr);
3898 pi->align = TYPE_ALIGN_UNIT (vectype);
3899 if (aligned_access_p (first_dr))
3901 else if (DR_MISALIGNMENT (first_dr) == -1)
3903 TREE_TYPE (data_ref)
3904 = build_aligned_type (TREE_TYPE (data_ref),
3905 TYPE_ALIGN (elem_type));
3906 pi->align = TYPE_ALIGN_UNIT (elem_type);
3911 TREE_TYPE (data_ref)
3912 = build_aligned_type (TREE_TYPE (data_ref),
3913 TYPE_ALIGN (elem_type));
3914 pi->misalign = DR_MISALIGNMENT (first_dr);
3917 /* Arguments are ready. Create the new vector stmt. */
3918 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3919 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3920 mark_symbols_for_renaming (new_stmt);
3925 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3933 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3935 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3936 prev_stmt_info = vinfo_for_stmt (new_stmt);
3940 VEC_free (tree, heap, dr_chain);
3941 VEC_free (tree, heap, oprnds);
3943 VEC_free (tree, heap, result_chain);
3945 VEC_free (tree, heap, vec_oprnds);
3950 /* Given a vector type VECTYPE returns a builtin DECL to be used
3951 for vector permutation and stores a mask into *MASK that implements
3952 reversal of the vector elements. If that is impossible to do
3953 returns NULL (and *MASK is unchanged). */
3956 perm_mask_for_reverse (tree vectype, tree *mask)
3959 tree mask_element_type, mask_type;
3960 tree mask_vec = NULL;
3963 if (!targetm.vectorize.builtin_vec_perm)
3966 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
3967 &mask_element_type);
3968 if (!builtin_decl || !mask_element_type)
3971 mask_type = get_vectype_for_scalar_type (mask_element_type);
3972 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3974 || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
3977 for (i = 0; i < nunits; i++)
3978 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
3979 mask_vec = build_vector (mask_type, mask_vec);
3981 if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
3985 return builtin_decl;
3988 /* Given a vector variable X, that was generated for the scalar LHS of
3989 STMT, generate instructions to reverse the vector elements of X,
3990 insert them a *GSI and return the permuted vector variable. */
3993 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
3995 tree vectype = TREE_TYPE (x);
3996 tree mask_vec, builtin_decl;
3997 tree perm_dest, data_ref;
4000 builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
4002 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4004 /* Generate the permute statement. */
4005 perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
4006 if (!useless_type_conversion_p (vectype,
4007 TREE_TYPE (TREE_TYPE (builtin_decl))))
4009 tree tem = create_tmp_reg (TREE_TYPE (TREE_TYPE (builtin_decl)), NULL);
4010 tem = make_ssa_name (tem, perm_stmt);
4011 gimple_call_set_lhs (perm_stmt, tem);
4012 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4013 perm_stmt = gimple_build_assign (NULL_TREE,
4014 build1 (VIEW_CONVERT_EXPR,
4017 data_ref = make_ssa_name (perm_dest, perm_stmt);
4018 gimple_set_lhs (perm_stmt, data_ref);
4019 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4024 /* vectorizable_load.
4026 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4028 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4029 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4030 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4033 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4034 slp_tree slp_node, slp_instance slp_node_instance)
4037 tree vec_dest = NULL;
4038 tree data_ref = NULL;
4039 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4040 stmt_vec_info prev_stmt_info;
4041 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4042 struct loop *loop = NULL;
4043 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4044 bool nested_in_vect_loop = false;
4045 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4046 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4049 enum machine_mode mode;
4050 gimple new_stmt = NULL;
4052 enum dr_alignment_support alignment_support_scheme;
4053 tree dataref_ptr = NULL_TREE;
4055 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4057 int i, j, group_size;
4058 tree msq = NULL_TREE, lsq;
4059 tree offset = NULL_TREE;
4060 tree realignment_token = NULL_TREE;
4062 VEC(tree,heap) *dr_chain = NULL;
4063 bool strided_load = false;
4064 bool load_lanes_p = false;
4069 bool compute_in_loop = false;
4070 struct loop *at_loop;
4072 bool slp = (slp_node != NULL);
4073 bool slp_perm = false;
4074 enum tree_code code;
4075 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4081 loop = LOOP_VINFO_LOOP (loop_vinfo);
4082 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4083 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4088 /* Multiple types in SLP are handled by creating the appropriate number of
4089 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4091 if (slp || PURE_SLP_STMT (stmt_info))
4094 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4096 gcc_assert (ncopies >= 1);
4098 /* FORNOW. This restriction should be relaxed. */
4099 if (nested_in_vect_loop && ncopies > 1)
4101 if (vect_print_dump_info (REPORT_DETAILS))
4102 fprintf (vect_dump, "multiple types in nested loop.");
4106 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4109 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4112 /* Is vectorizable load? */
4113 if (!is_gimple_assign (stmt))
4116 scalar_dest = gimple_assign_lhs (stmt);
4117 if (TREE_CODE (scalar_dest) != SSA_NAME)
4120 code = gimple_assign_rhs_code (stmt);
4121 if (code != ARRAY_REF
4122 && code != INDIRECT_REF
4123 && code != COMPONENT_REF
4124 && code != IMAGPART_EXPR
4125 && code != REALPART_EXPR
4127 && TREE_CODE_CLASS (code) != tcc_declaration)
4130 if (!STMT_VINFO_DATA_REF (stmt_info))
4133 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4134 if (negative && ncopies > 1)
4136 if (vect_print_dump_info (REPORT_DETAILS))
4137 fprintf (vect_dump, "multiple types with negative step.");
4141 scalar_type = TREE_TYPE (DR_REF (dr));
4142 mode = TYPE_MODE (vectype);
4144 /* FORNOW. In some cases can vectorize even if data-type not supported
4145 (e.g. - data copies). */
4146 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4148 if (vect_print_dump_info (REPORT_DETAILS))
4149 fprintf (vect_dump, "Aligned load, but unsupported type.");
4153 /* The vector component type needs to be trivially convertible to the
4154 scalar lhs. This should always be the case. */
4155 elem_type = TREE_TYPE (vectype);
4156 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
4158 if (vect_print_dump_info (REPORT_DETAILS))
4159 fprintf (vect_dump, "??? operands of different types");
4163 /* Check if the load is a part of an interleaving chain. */
4164 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4166 strided_load = true;
4168 gcc_assert (! nested_in_vect_loop);
4170 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4171 if (!slp && !PURE_SLP_STMT (stmt_info))
4173 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4174 if (vect_load_lanes_supported (vectype, group_size))
4175 load_lanes_p = true;
4176 else if (!vect_strided_load_supported (vectype, group_size))
4183 gcc_assert (!strided_load);
4184 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4185 if (alignment_support_scheme != dr_aligned
4186 && alignment_support_scheme != dr_unaligned_supported)
4188 if (vect_print_dump_info (REPORT_DETAILS))
4189 fprintf (vect_dump, "negative step but alignment required.");
4192 if (!perm_mask_for_reverse (vectype, NULL))
4194 if (vect_print_dump_info (REPORT_DETAILS))
4195 fprintf (vect_dump, "negative step and reversing not supported.");
4200 if (!vec_stmt) /* transformation not required. */
4202 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4203 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4207 if (vect_print_dump_info (REPORT_DETAILS))
4208 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4214 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4215 /* Check if the chain of loads is already vectorized. */
4216 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4218 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4221 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4222 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4224 /* VEC_NUM is the number of vect stmts to be created for this group. */
4227 strided_load = false;
4228 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4229 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4233 vec_num = group_size;
4239 group_size = vec_num = 1;
4242 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4243 gcc_assert (alignment_support_scheme);
4244 /* Targets with load-lane instructions must not require explicit
4246 gcc_assert (!load_lanes_p
4247 || alignment_support_scheme == dr_aligned
4248 || alignment_support_scheme == dr_unaligned_supported);
4250 /* In case the vectorization factor (VF) is bigger than the number
4251 of elements that we can fit in a vectype (nunits), we have to generate
4252 more than one vector stmt - i.e - we need to "unroll" the
4253 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4254 from one copy of the vector stmt to the next, in the field
4255 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4256 stages to find the correct vector defs to be used when vectorizing
4257 stmts that use the defs of the current stmt. The example below
4258 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4259 need to create 4 vectorized stmts):
4261 before vectorization:
4262 RELATED_STMT VEC_STMT
4266 step 1: vectorize stmt S1:
4267 We first create the vector stmt VS1_0, and, as usual, record a
4268 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4269 Next, we create the vector stmt VS1_1, and record a pointer to
4270 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4271 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4273 RELATED_STMT VEC_STMT
4274 VS1_0: vx0 = memref0 VS1_1 -
4275 VS1_1: vx1 = memref1 VS1_2 -
4276 VS1_2: vx2 = memref2 VS1_3 -
4277 VS1_3: vx3 = memref3 - -
4278 S1: x = load - VS1_0
4281 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4282 information we recorded in RELATED_STMT field is used to vectorize
4285 /* In case of interleaving (non-unit strided access):
4292 Vectorized loads are created in the order of memory accesses
4293 starting from the access of the first stmt of the chain:
4296 VS2: vx1 = &base + vec_size*1
4297 VS3: vx3 = &base + vec_size*2
4298 VS4: vx4 = &base + vec_size*3
4300 Then permutation statements are generated:
4302 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4303 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4306 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4307 (the order of the data-refs in the output of vect_permute_load_chain
4308 corresponds to the order of scalar stmts in the interleaving chain - see
4309 the documentation of vect_permute_load_chain()).
4310 The generation of permutation stmts and recording them in
4311 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4313 In case of both multiple types and interleaving, the vector loads and
4314 permutation stmts above are created for every copy. The result vector
4315 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4316 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4318 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4319 on a target that supports unaligned accesses (dr_unaligned_supported)
4320 we generate the following code:
4324 p = p + indx * vectype_size;
4329 Otherwise, the data reference is potentially unaligned on a target that
4330 does not support unaligned accesses (dr_explicit_realign_optimized) -
4331 then generate the following code, in which the data in each iteration is
4332 obtained by two vector loads, one from the previous iteration, and one
4333 from the current iteration:
4335 msq_init = *(floor(p1))
4336 p2 = initial_addr + VS - 1;
4337 realignment_token = call target_builtin;
4340 p2 = p2 + indx * vectype_size
4342 vec_dest = realign_load (msq, lsq, realignment_token)
4347 /* If the misalignment remains the same throughout the execution of the
4348 loop, we can create the init_addr and permutation mask at the loop
4349 preheader. Otherwise, it needs to be created inside the loop.
4350 This can only occur when vectorizing memory accesses in the inner-loop
4351 nested within an outer-loop that is being vectorized. */
4353 if (loop && nested_in_vect_loop_p (loop, stmt)
4354 && (TREE_INT_CST_LOW (DR_STEP (dr))
4355 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4357 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4358 compute_in_loop = true;
4361 if ((alignment_support_scheme == dr_explicit_realign_optimized
4362 || alignment_support_scheme == dr_explicit_realign)
4363 && !compute_in_loop)
4365 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4366 alignment_support_scheme, NULL_TREE,
4368 if (alignment_support_scheme == dr_explicit_realign_optimized)
4370 phi = SSA_NAME_DEF_STMT (msq);
4371 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4378 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4381 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4383 aggr_type = vectype;
4385 prev_stmt_info = NULL;
4386 for (j = 0; j < ncopies; j++)
4388 /* 1. Create the vector or array pointer update chain. */
4390 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4391 offset, &dummy, gsi,
4392 &ptr_incr, false, &inv_p);
4394 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4395 TYPE_SIZE_UNIT (aggr_type));
4397 if (strided_load || slp_perm)
4398 dr_chain = VEC_alloc (tree, heap, vec_num);
4404 vec_array = create_vector_array (vectype, vec_num);
4407 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4408 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4409 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4410 gimple_call_set_lhs (new_stmt, vec_array);
4411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4412 mark_symbols_for_renaming (new_stmt);
4414 /* Extract each vector into an SSA_NAME. */
4415 for (i = 0; i < vec_num; i++)
4417 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4419 VEC_quick_push (tree, dr_chain, new_temp);
4422 /* Record the mapping between SSA_NAMEs and statements. */
4423 vect_record_strided_load_vectors (stmt, dr_chain);
4427 for (i = 0; i < vec_num; i++)
4430 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4433 /* 2. Create the vector-load in the loop. */
4434 switch (alignment_support_scheme)
4437 case dr_unaligned_supported:
4439 struct ptr_info_def *pi;
4441 = build2 (MEM_REF, vectype, dataref_ptr,
4442 build_int_cst (reference_alias_ptr_type
4443 (DR_REF (first_dr)), 0));
4444 pi = get_ptr_info (dataref_ptr);
4445 pi->align = TYPE_ALIGN_UNIT (vectype);
4446 if (alignment_support_scheme == dr_aligned)
4448 gcc_assert (aligned_access_p (first_dr));
4451 else if (DR_MISALIGNMENT (first_dr) == -1)
4453 TREE_TYPE (data_ref)
4454 = build_aligned_type (TREE_TYPE (data_ref),
4455 TYPE_ALIGN (elem_type));
4456 pi->align = TYPE_ALIGN_UNIT (elem_type);
4461 TREE_TYPE (data_ref)
4462 = build_aligned_type (TREE_TYPE (data_ref),
4463 TYPE_ALIGN (elem_type));
4464 pi->misalign = DR_MISALIGNMENT (first_dr);
4468 case dr_explicit_realign:
4473 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4475 if (compute_in_loop)
4476 msq = vect_setup_realignment (first_stmt, gsi,
4478 dr_explicit_realign,
4481 new_stmt = gimple_build_assign_with_ops
4482 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4484 (TREE_TYPE (dataref_ptr),
4485 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4486 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4487 gimple_assign_set_lhs (new_stmt, ptr);
4488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4490 = build2 (MEM_REF, vectype, ptr,
4491 build_int_cst (reference_alias_ptr_type
4492 (DR_REF (first_dr)), 0));
4493 vec_dest = vect_create_destination_var (scalar_dest,
4495 new_stmt = gimple_build_assign (vec_dest, data_ref);
4496 new_temp = make_ssa_name (vec_dest, new_stmt);
4497 gimple_assign_set_lhs (new_stmt, new_temp);
4498 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4499 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4500 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4503 bump = size_binop (MULT_EXPR, vs_minus_1,
4504 TYPE_SIZE_UNIT (scalar_type));
4505 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4506 new_stmt = gimple_build_assign_with_ops
4507 (BIT_AND_EXPR, NULL_TREE, ptr,
4510 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4511 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4512 gimple_assign_set_lhs (new_stmt, ptr);
4513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4515 = build2 (MEM_REF, vectype, ptr,
4516 build_int_cst (reference_alias_ptr_type
4517 (DR_REF (first_dr)), 0));
4520 case dr_explicit_realign_optimized:
4521 new_stmt = gimple_build_assign_with_ops
4522 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4524 (TREE_TYPE (dataref_ptr),
4525 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4526 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4528 gimple_assign_set_lhs (new_stmt, new_temp);
4529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4531 = build2 (MEM_REF, vectype, new_temp,
4532 build_int_cst (reference_alias_ptr_type
4533 (DR_REF (first_dr)), 0));
4538 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4539 new_stmt = gimple_build_assign (vec_dest, data_ref);
4540 new_temp = make_ssa_name (vec_dest, new_stmt);
4541 gimple_assign_set_lhs (new_stmt, new_temp);
4542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4543 mark_symbols_for_renaming (new_stmt);
4545 /* 3. Handle explicit realignment if necessary/supported.
4547 vec_dest = realign_load (msq, lsq, realignment_token) */
4548 if (alignment_support_scheme == dr_explicit_realign_optimized
4549 || alignment_support_scheme == dr_explicit_realign)
4551 lsq = gimple_assign_lhs (new_stmt);
4552 if (!realignment_token)
4553 realignment_token = dataref_ptr;
4554 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4556 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4559 new_temp = make_ssa_name (vec_dest, new_stmt);
4560 gimple_assign_set_lhs (new_stmt, new_temp);
4561 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4563 if (alignment_support_scheme == dr_explicit_realign_optimized)
4566 if (i == vec_num - 1 && j == ncopies - 1)
4567 add_phi_arg (phi, lsq,
4568 loop_latch_edge (containing_loop),
4574 /* 4. Handle invariant-load. */
4575 if (inv_p && !bb_vinfo)
4578 gimple_stmt_iterator gsi2 = *gsi;
4579 gcc_assert (!strided_load);
4581 vec_inv = build_vector_from_val (vectype, scalar_dest);
4582 new_temp = vect_init_vector (stmt, vec_inv,
4584 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4589 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4590 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4593 /* Collect vector loads and later create their permutation in
4594 vect_transform_strided_load (). */
4595 if (strided_load || slp_perm)
4596 VEC_quick_push (tree, dr_chain, new_temp);
4598 /* Store vector loads in the corresponding SLP_NODE. */
4599 if (slp && !slp_perm)
4600 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4605 if (slp && !slp_perm)
4610 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4611 slp_node_instance, false))
4613 VEC_free (tree, heap, dr_chain);
4622 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4623 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4628 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4630 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4631 prev_stmt_info = vinfo_for_stmt (new_stmt);
4635 VEC_free (tree, heap, dr_chain);
4641 /* Function vect_is_simple_cond.
4644 LOOP - the loop that is being vectorized.
4645 COND - Condition that is checked for simple use.
4647 Returns whether a COND can be vectorized. Checks whether
4648 condition operands are supportable using vec_is_simple_use. */
4651 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4655 enum vect_def_type dt;
4657 if (!COMPARISON_CLASS_P (cond))
4660 lhs = TREE_OPERAND (cond, 0);
4661 rhs = TREE_OPERAND (cond, 1);
4663 if (TREE_CODE (lhs) == SSA_NAME)
4665 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4666 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4670 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4671 && TREE_CODE (lhs) != FIXED_CST)
4674 if (TREE_CODE (rhs) == SSA_NAME)
4676 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4677 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4681 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4682 && TREE_CODE (rhs) != FIXED_CST)
4688 /* vectorizable_condition.
4690 Check if STMT is conditional modify expression that can be vectorized.
4691 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4692 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4695 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4696 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4697 else caluse if it is 2).
4699 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4702 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4703 gimple *vec_stmt, tree reduc_def, int reduc_index)
4705 tree scalar_dest = NULL_TREE;
4706 tree vec_dest = NULL_TREE;
4707 tree op = NULL_TREE;
4708 tree cond_expr, then_clause, else_clause;
4709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4710 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4711 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4712 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4713 tree vec_compare, vec_cond_expr;
4715 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4716 enum machine_mode vec_mode;
4718 enum vect_def_type dt, dts[4];
4719 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4720 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4721 enum tree_code code;
4722 stmt_vec_info prev_stmt_info = NULL;
4725 /* FORNOW: unsupported in basic block SLP. */
4726 gcc_assert (loop_vinfo);
4728 /* FORNOW: SLP not supported. */
4729 if (STMT_SLP_TYPE (stmt_info))
4732 gcc_assert (ncopies >= 1);
4733 if (reduc_index && ncopies > 1)
4734 return false; /* FORNOW */
4736 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4739 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4740 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4744 /* FORNOW: not yet supported. */
4745 if (STMT_VINFO_LIVE_P (stmt_info))
4747 if (vect_print_dump_info (REPORT_DETAILS))
4748 fprintf (vect_dump, "value used after loop.");
4752 /* Is vectorizable conditional operation? */
4753 if (!is_gimple_assign (stmt))
4756 code = gimple_assign_rhs_code (stmt);
4758 if (code != COND_EXPR)
4761 gcc_assert (gimple_assign_single_p (stmt));
4762 op = gimple_assign_rhs1 (stmt);
4763 cond_expr = TREE_OPERAND (op, 0);
4764 then_clause = TREE_OPERAND (op, 1);
4765 else_clause = TREE_OPERAND (op, 2);
4767 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4770 /* We do not handle two different vector types for the condition
4772 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
4773 TREE_TYPE (vectype)))
4776 if (TREE_CODE (then_clause) == SSA_NAME)
4778 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4779 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4780 &then_def_stmt, &def, &dt))
4783 else if (TREE_CODE (then_clause) != INTEGER_CST
4784 && TREE_CODE (then_clause) != REAL_CST
4785 && TREE_CODE (then_clause) != FIXED_CST)
4788 if (TREE_CODE (else_clause) == SSA_NAME)
4790 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4791 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4792 &else_def_stmt, &def, &dt))
4795 else if (TREE_CODE (else_clause) != INTEGER_CST
4796 && TREE_CODE (else_clause) != REAL_CST
4797 && TREE_CODE (else_clause) != FIXED_CST)
4801 vec_mode = TYPE_MODE (vectype);
4805 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4806 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
4812 scalar_dest = gimple_assign_lhs (stmt);
4813 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4815 /* Handle cond expr. */
4816 for (j = 0; j < ncopies; j++)
4823 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4825 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4826 NULL, >emp, &def, &dts[0]);
4828 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4830 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4831 NULL, >emp, &def, &dts[1]);
4832 if (reduc_index == 1)
4833 vec_then_clause = reduc_def;
4836 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4838 vect_is_simple_use (then_clause, loop_vinfo,
4839 NULL, >emp, &def, &dts[2]);
4841 if (reduc_index == 2)
4842 vec_else_clause = reduc_def;
4845 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4847 vect_is_simple_use (else_clause, loop_vinfo,
4848 NULL, >emp, &def, &dts[3]);
4853 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4854 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4855 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4857 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4861 /* Arguments are ready. Create the new vector stmt. */
4862 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4863 vec_cond_lhs, vec_cond_rhs);
4864 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4865 vec_compare, vec_then_clause, vec_else_clause);
4867 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4868 new_temp = make_ssa_name (vec_dest, new_stmt);
4869 gimple_assign_set_lhs (new_stmt, new_temp);
4870 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4872 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4874 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4876 prev_stmt_info = vinfo_for_stmt (new_stmt);
4883 /* Make sure the statement is vectorizable. */
4886 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4889 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4890 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4892 tree scalar_type, vectype;
4893 gimple pattern_stmt;
4895 if (vect_print_dump_info (REPORT_DETAILS))
4897 fprintf (vect_dump, "==> examining statement: ");
4898 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4901 if (gimple_has_volatile_ops (stmt))
4903 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4904 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4909 /* Skip stmts that do not need to be vectorized. In loops this is expected
4911 - the COND_EXPR which is the loop exit condition
4912 - any LABEL_EXPRs in the loop
4913 - computations that are used only for array indexing or loop control.
4914 In basic blocks we only analyze statements that are a part of some SLP
4915 instance, therefore, all the statements are relevant.
4917 Pattern statement need to be analyzed instead of the original statement
4918 if the original statement is not relevant. Otherwise, we analyze both
4921 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4922 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4923 && !STMT_VINFO_LIVE_P (stmt_info))
4925 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4927 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4928 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4930 /* Analyze PATTERN_STMT instead of the original stmt. */
4931 stmt = pattern_stmt;
4932 stmt_info = vinfo_for_stmt (pattern_stmt);
4933 if (vect_print_dump_info (REPORT_DETAILS))
4935 fprintf (vect_dump, "==> examining pattern statement: ");
4936 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4941 if (vect_print_dump_info (REPORT_DETAILS))
4942 fprintf (vect_dump, "irrelevant.");
4947 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4949 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4950 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4952 /* Analyze PATTERN_STMT too. */
4953 if (vect_print_dump_info (REPORT_DETAILS))
4955 fprintf (vect_dump, "==> examining pattern statement: ");
4956 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4959 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
4963 switch (STMT_VINFO_DEF_TYPE (stmt_info))
4965 case vect_internal_def:
4968 case vect_reduction_def:
4969 case vect_nested_cycle:
4970 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
4971 || relevance == vect_used_in_outer_by_reduction
4972 || relevance == vect_unused_in_scope));
4975 case vect_induction_def:
4976 case vect_constant_def:
4977 case vect_external_def:
4978 case vect_unknown_def_type:
4985 gcc_assert (PURE_SLP_STMT (stmt_info));
4987 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
4988 if (vect_print_dump_info (REPORT_DETAILS))
4990 fprintf (vect_dump, "get vectype for scalar type: ");
4991 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4994 vectype = get_vectype_for_scalar_type (scalar_type);
4997 if (vect_print_dump_info (REPORT_DETAILS))
4999 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5000 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5005 if (vect_print_dump_info (REPORT_DETAILS))
5007 fprintf (vect_dump, "vectype: ");
5008 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5011 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5014 if (STMT_VINFO_RELEVANT_P (stmt_info))
5016 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5017 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5018 *need_to_vectorize = true;
5023 && (STMT_VINFO_RELEVANT_P (stmt_info)
5024 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5025 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5026 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5027 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5028 || vectorizable_shift (stmt, NULL, NULL, NULL)
5029 || vectorizable_operation (stmt, NULL, NULL, NULL)
5030 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5031 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5032 || vectorizable_call (stmt, NULL, NULL)
5033 || vectorizable_store (stmt, NULL, NULL, NULL)
5034 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5035 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5039 ok = (vectorizable_shift (stmt, NULL, NULL, node)
5040 || vectorizable_operation (stmt, NULL, NULL, node)
5041 || vectorizable_assignment (stmt, NULL, NULL, node)
5042 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5043 || vectorizable_store (stmt, NULL, NULL, node));
5048 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5050 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5051 fprintf (vect_dump, "supported: ");
5052 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5061 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5062 need extra handling, except for vectorizable reductions. */
5063 if (STMT_VINFO_LIVE_P (stmt_info)
5064 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5065 ok = vectorizable_live_operation (stmt, NULL, NULL);
5069 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5071 fprintf (vect_dump, "not vectorized: live stmt not ");
5072 fprintf (vect_dump, "supported: ");
5073 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5083 /* Function vect_transform_stmt.
5085 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5088 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5089 bool *strided_store, slp_tree slp_node,
5090 slp_instance slp_node_instance)
5092 bool is_store = false;
5093 gimple vec_stmt = NULL;
5094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5097 switch (STMT_VINFO_TYPE (stmt_info))
5099 case type_demotion_vec_info_type:
5100 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5104 case type_promotion_vec_info_type:
5105 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5109 case type_conversion_vec_info_type:
5110 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5114 case induc_vec_info_type:
5115 gcc_assert (!slp_node);
5116 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5120 case shift_vec_info_type:
5121 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5125 case op_vec_info_type:
5126 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5130 case assignment_vec_info_type:
5131 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5135 case load_vec_info_type:
5136 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5141 case store_vec_info_type:
5142 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5144 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5146 /* In case of interleaving, the whole chain is vectorized when the
5147 last store in the chain is reached. Store stmts before the last
5148 one are skipped, and there vec_stmt_info shouldn't be freed
5150 *strided_store = true;
5151 if (STMT_VINFO_VEC_STMT (stmt_info))
5158 case condition_vec_info_type:
5159 gcc_assert (!slp_node);
5160 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5164 case call_vec_info_type:
5165 gcc_assert (!slp_node);
5166 done = vectorizable_call (stmt, gsi, &vec_stmt);
5167 stmt = gsi_stmt (*gsi);
5170 case reduc_vec_info_type:
5171 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5176 if (!STMT_VINFO_LIVE_P (stmt_info))
5178 if (vect_print_dump_info (REPORT_DETAILS))
5179 fprintf (vect_dump, "stmt not supported.");
5184 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5185 is being vectorized, but outside the immediately enclosing loop. */
5187 && STMT_VINFO_LOOP_VINFO (stmt_info)
5188 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5189 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5190 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5191 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5192 || STMT_VINFO_RELEVANT (stmt_info) ==
5193 vect_used_in_outer_by_reduction))
5195 struct loop *innerloop = LOOP_VINFO_LOOP (
5196 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5197 imm_use_iterator imm_iter;
5198 use_operand_p use_p;
5202 if (vect_print_dump_info (REPORT_DETAILS))
5203 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5205 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5206 (to be used when vectorizing outer-loop stmts that use the DEF of
5208 if (gimple_code (stmt) == GIMPLE_PHI)
5209 scalar_dest = PHI_RESULT (stmt);
5211 scalar_dest = gimple_assign_lhs (stmt);
5213 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5215 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5217 exit_phi = USE_STMT (use_p);
5218 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5223 /* Handle stmts whose DEF is used outside the loop-nest that is
5224 being vectorized. */
5225 if (STMT_VINFO_LIVE_P (stmt_info)
5226 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5228 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5233 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5239 /* Remove a group of stores (for SLP or interleaving), free their
5243 vect_remove_stores (gimple first_stmt)
5245 gimple next = first_stmt;
5247 gimple_stmt_iterator next_si;
5251 /* Free the attached stmt_vec_info and remove the stmt. */
5252 next_si = gsi_for_stmt (next);
5253 gsi_remove (&next_si, true);
5254 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5255 free_stmt_vec_info (next);
5261 /* Function new_stmt_vec_info.
5263 Create and initialize a new stmt_vec_info struct for STMT. */
5266 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5267 bb_vec_info bb_vinfo)
5270 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5272 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5273 STMT_VINFO_STMT (res) = stmt;
5274 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5275 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5276 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5277 STMT_VINFO_LIVE_P (res) = false;
5278 STMT_VINFO_VECTYPE (res) = NULL;
5279 STMT_VINFO_VEC_STMT (res) = NULL;
5280 STMT_VINFO_VECTORIZABLE (res) = true;
5281 STMT_VINFO_IN_PATTERN_P (res) = false;
5282 STMT_VINFO_RELATED_STMT (res) = NULL;
5283 STMT_VINFO_DATA_REF (res) = NULL;
5285 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5286 STMT_VINFO_DR_OFFSET (res) = NULL;
5287 STMT_VINFO_DR_INIT (res) = NULL;
5288 STMT_VINFO_DR_STEP (res) = NULL;
5289 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5291 if (gimple_code (stmt) == GIMPLE_PHI
5292 && is_loop_header_bb_p (gimple_bb (stmt)))
5293 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5295 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5297 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5298 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5299 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5300 STMT_SLP_TYPE (res) = loop_vect;
5301 GROUP_FIRST_ELEMENT (res) = NULL;
5302 GROUP_NEXT_ELEMENT (res) = NULL;
5303 GROUP_SIZE (res) = 0;
5304 GROUP_STORE_COUNT (res) = 0;
5305 GROUP_GAP (res) = 0;
5306 GROUP_SAME_DR_STMT (res) = NULL;
5307 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5313 /* Create a hash table for stmt_vec_info. */
5316 init_stmt_vec_info_vec (void)
5318 gcc_assert (!stmt_vec_info_vec);
5319 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5323 /* Free hash table for stmt_vec_info. */
5326 free_stmt_vec_info_vec (void)
5328 gcc_assert (stmt_vec_info_vec);
5329 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5333 /* Free stmt vectorization related info. */
5336 free_stmt_vec_info (gimple stmt)
5338 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5343 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5344 set_vinfo_for_stmt (stmt, NULL);
5349 /* Function get_vectype_for_scalar_type_and_size.
5351 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5355 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5357 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5358 enum machine_mode simd_mode;
5359 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5366 /* We can't build a vector type of elements with alignment bigger than
5368 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5371 /* If we'd build a vector type of elements whose mode precision doesn't
5372 match their types precision we'll get mismatched types on vector
5373 extracts via BIT_FIELD_REFs. This effectively means we disable
5374 vectorization of bool and/or enum types in some languages. */
5375 if (INTEGRAL_TYPE_P (scalar_type)
5376 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5379 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5380 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5383 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5384 When the component mode passes the above test simply use a type
5385 corresponding to that mode. The theory is that any use that
5386 would cause problems with this will disable vectorization anyway. */
5387 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5388 && !INTEGRAL_TYPE_P (scalar_type)
5389 && !POINTER_TYPE_P (scalar_type))
5390 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5392 /* If no size was supplied use the mode the target prefers. Otherwise
5393 lookup a vector mode of the specified size. */
5395 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5397 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5398 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5402 vectype = build_vector_type (scalar_type, nunits);
5403 if (vect_print_dump_info (REPORT_DETAILS))
5405 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5406 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5412 if (vect_print_dump_info (REPORT_DETAILS))
5414 fprintf (vect_dump, "vectype: ");
5415 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5418 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5419 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5421 if (vect_print_dump_info (REPORT_DETAILS))
5422 fprintf (vect_dump, "mode not supported by target.");
5429 unsigned int current_vector_size;
5431 /* Function get_vectype_for_scalar_type.
5433 Returns the vector type corresponding to SCALAR_TYPE as supported
5437 get_vectype_for_scalar_type (tree scalar_type)
5440 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5441 current_vector_size);
5443 && current_vector_size == 0)
5444 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5448 /* Function get_same_sized_vectype
5450 Returns a vector type corresponding to SCALAR_TYPE of size
5451 VECTOR_TYPE if supported by the target. */
5454 get_same_sized_vectype (tree scalar_type, tree vector_type)
5456 return get_vectype_for_scalar_type_and_size
5457 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5460 /* Function vect_is_simple_use.
5463 LOOP_VINFO - the vect info of the loop that is being vectorized.
5464 BB_VINFO - the vect info of the basic block that is being vectorized.
5465 OPERAND - operand of a stmt in the loop or bb.
5466 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5468 Returns whether a stmt with OPERAND can be vectorized.
5469 For loops, supportable operands are constants, loop invariants, and operands
5470 that are defined by the current iteration of the loop. Unsupportable
5471 operands are those that are defined by a previous iteration of the loop (as
5472 is the case in reduction/induction computations).
5473 For basic blocks, supportable operands are constants and bb invariants.
5474 For now, operands defined outside the basic block are not supported. */
5477 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5478 bb_vec_info bb_vinfo, gimple *def_stmt,
5479 tree *def, enum vect_def_type *dt)
5482 stmt_vec_info stmt_vinfo;
5483 struct loop *loop = NULL;
5486 loop = LOOP_VINFO_LOOP (loop_vinfo);
5491 if (vect_print_dump_info (REPORT_DETAILS))
5493 fprintf (vect_dump, "vect_is_simple_use: operand ");
5494 print_generic_expr (vect_dump, operand, TDF_SLIM);
5497 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5499 *dt = vect_constant_def;
5503 if (is_gimple_min_invariant (operand))
5506 *dt = vect_external_def;
5510 if (TREE_CODE (operand) == PAREN_EXPR)
5512 if (vect_print_dump_info (REPORT_DETAILS))
5513 fprintf (vect_dump, "non-associatable copy.");
5514 operand = TREE_OPERAND (operand, 0);
5517 if (TREE_CODE (operand) != SSA_NAME)
5519 if (vect_print_dump_info (REPORT_DETAILS))
5520 fprintf (vect_dump, "not ssa-name.");
5524 *def_stmt = SSA_NAME_DEF_STMT (operand);
5525 if (*def_stmt == NULL)
5527 if (vect_print_dump_info (REPORT_DETAILS))
5528 fprintf (vect_dump, "no def_stmt.");
5532 if (vect_print_dump_info (REPORT_DETAILS))
5534 fprintf (vect_dump, "def_stmt: ");
5535 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5538 /* Empty stmt is expected only in case of a function argument.
5539 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5540 if (gimple_nop_p (*def_stmt))
5543 *dt = vect_external_def;
5547 bb = gimple_bb (*def_stmt);
5549 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5550 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5551 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5552 *dt = vect_external_def;
5555 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5556 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5559 if (*dt == vect_unknown_def_type)
5561 if (vect_print_dump_info (REPORT_DETAILS))
5562 fprintf (vect_dump, "Unsupported pattern.");
5566 if (vect_print_dump_info (REPORT_DETAILS))
5567 fprintf (vect_dump, "type of def: %d.",*dt);
5569 switch (gimple_code (*def_stmt))
5572 *def = gimple_phi_result (*def_stmt);
5576 *def = gimple_assign_lhs (*def_stmt);
5580 *def = gimple_call_lhs (*def_stmt);
5585 if (vect_print_dump_info (REPORT_DETAILS))
5586 fprintf (vect_dump, "unsupported defining stmt: ");
5593 /* Function vect_is_simple_use_1.
5595 Same as vect_is_simple_use_1 but also determines the vector operand
5596 type of OPERAND and stores it to *VECTYPE. If the definition of
5597 OPERAND is vect_uninitialized_def, vect_constant_def or
5598 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5599 is responsible to compute the best suited vector type for the
5603 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5604 bb_vec_info bb_vinfo, gimple *def_stmt,
5605 tree *def, enum vect_def_type *dt, tree *vectype)
5607 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5610 /* Now get a vector type if the def is internal, otherwise supply
5611 NULL_TREE and leave it up to the caller to figure out a proper
5612 type for the use stmt. */
5613 if (*dt == vect_internal_def
5614 || *dt == vect_induction_def
5615 || *dt == vect_reduction_def
5616 || *dt == vect_double_reduction_def
5617 || *dt == vect_nested_cycle)
5619 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5621 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5622 && !STMT_VINFO_RELEVANT (stmt_info)
5623 && !STMT_VINFO_LIVE_P (stmt_info))
5624 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5626 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5627 gcc_assert (*vectype != NULL_TREE);
5629 else if (*dt == vect_uninitialized_def
5630 || *dt == vect_constant_def
5631 || *dt == vect_external_def)
5632 *vectype = NULL_TREE;
5640 /* Function supportable_widening_operation
5642 Check whether an operation represented by the code CODE is a
5643 widening operation that is supported by the target platform in
5644 vector form (i.e., when operating on arguments of type VECTYPE_IN
5645 producing a result of type VECTYPE_OUT).
5647 Widening operations we currently support are NOP (CONVERT), FLOAT
5648 and WIDEN_MULT. This function checks if these operations are supported
5649 by the target platform either directly (via vector tree-codes), or via
5653 - CODE1 and CODE2 are codes of vector operations to be used when
5654 vectorizing the operation, if available.
5655 - DECL1 and DECL2 are decls of target builtin functions to be used
5656 when vectorizing the operation, if available. In this case,
5657 CODE1 and CODE2 are CALL_EXPR.
5658 - MULTI_STEP_CVT determines the number of required intermediate steps in
5659 case of multi-step conversion (like char->short->int - in that case
5660 MULTI_STEP_CVT will be 1).
5661 - INTERM_TYPES contains the intermediate type required to perform the
5662 widening operation (short in the above example). */
5665 supportable_widening_operation (enum tree_code code, gimple stmt,
5666 tree vectype_out, tree vectype_in,
5667 tree *decl1, tree *decl2,
5668 enum tree_code *code1, enum tree_code *code2,
5669 int *multi_step_cvt,
5670 VEC (tree, heap) **interm_types)
5672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5673 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5674 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
5676 enum machine_mode vec_mode;
5677 enum insn_code icode1, icode2;
5678 optab optab1, optab2;
5679 tree vectype = vectype_in;
5680 tree wide_vectype = vectype_out;
5681 enum tree_code c1, c2;
5683 /* The result of a vectorized widening operation usually requires two vectors
5684 (because the widened results do not fit int one vector). The generated
5685 vector results would normally be expected to be generated in the same
5686 order as in the original scalar computation, i.e. if 8 results are
5687 generated in each vector iteration, they are to be organized as follows:
5688 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5690 However, in the special case that the result of the widening operation is
5691 used in a reduction computation only, the order doesn't matter (because
5692 when vectorizing a reduction we change the order of the computation).
5693 Some targets can take advantage of this and generate more efficient code.
5694 For example, targets like Altivec, that support widen_mult using a sequence
5695 of {mult_even,mult_odd} generate the following vectors:
5696 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5698 When vectorizing outer-loops, we execute the inner-loop sequentially
5699 (each vectorized inner-loop iteration contributes to VF outer-loop
5700 iterations in parallel). We therefore don't allow to change the order
5701 of the computation in the inner-loop during outer-loop vectorization. */
5703 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5704 && !nested_in_vect_loop_p (vect_loop, stmt))
5710 && code == WIDEN_MULT_EXPR
5711 && targetm.vectorize.builtin_mul_widen_even
5712 && targetm.vectorize.builtin_mul_widen_even (vectype)
5713 && targetm.vectorize.builtin_mul_widen_odd
5714 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5716 if (vect_print_dump_info (REPORT_DETAILS))
5717 fprintf (vect_dump, "Unordered widening operation detected.");
5719 *code1 = *code2 = CALL_EXPR;
5720 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5721 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5727 case WIDEN_MULT_EXPR:
5728 if (BYTES_BIG_ENDIAN)
5730 c1 = VEC_WIDEN_MULT_HI_EXPR;
5731 c2 = VEC_WIDEN_MULT_LO_EXPR;
5735 c2 = VEC_WIDEN_MULT_HI_EXPR;
5736 c1 = VEC_WIDEN_MULT_LO_EXPR;
5741 if (BYTES_BIG_ENDIAN)
5743 c1 = VEC_UNPACK_HI_EXPR;
5744 c2 = VEC_UNPACK_LO_EXPR;
5748 c2 = VEC_UNPACK_HI_EXPR;
5749 c1 = VEC_UNPACK_LO_EXPR;
5754 if (BYTES_BIG_ENDIAN)
5756 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5757 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5761 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5762 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5766 case FIX_TRUNC_EXPR:
5767 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5768 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5769 computing the operation. */
5776 if (code == FIX_TRUNC_EXPR)
5778 /* The signedness is determined from output operand. */
5779 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5780 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5784 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5785 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5788 if (!optab1 || !optab2)
5791 vec_mode = TYPE_MODE (vectype);
5792 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5793 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5796 /* Check if it's a multi-step conversion that can be done using intermediate
5798 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5799 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5802 tree prev_type = vectype, intermediate_type;
5803 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5804 optab optab3, optab4;
5806 if (!CONVERT_EXPR_CODE_P (code))
5812 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5813 intermediate steps in promotion sequence. We try
5814 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5816 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5817 for (i = 0; i < 3; i++)
5819 intermediate_mode = insn_data[icode1].operand[0].mode;
5820 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5821 TYPE_UNSIGNED (prev_type));
5822 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5823 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5825 if (!optab3 || !optab4
5826 || ((icode1 = optab_handler (optab1, prev_mode))
5827 == CODE_FOR_nothing)
5828 || insn_data[icode1].operand[0].mode != intermediate_mode
5829 || ((icode2 = optab_handler (optab2, prev_mode))
5830 == CODE_FOR_nothing)
5831 || insn_data[icode2].operand[0].mode != intermediate_mode
5832 || ((icode1 = optab_handler (optab3, intermediate_mode))
5833 == CODE_FOR_nothing)
5834 || ((icode2 = optab_handler (optab4, intermediate_mode))
5835 == CODE_FOR_nothing))
5838 VEC_quick_push (tree, *interm_types, intermediate_type);
5839 (*multi_step_cvt)++;
5841 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5842 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5845 prev_type = intermediate_type;
5846 prev_mode = intermediate_mode;
5858 /* Function supportable_narrowing_operation
5860 Check whether an operation represented by the code CODE is a
5861 narrowing operation that is supported by the target platform in
5862 vector form (i.e., when operating on arguments of type VECTYPE_IN
5863 and producing a result of type VECTYPE_OUT).
5865 Narrowing operations we currently support are NOP (CONVERT) and
5866 FIX_TRUNC. This function checks if these operations are supported by
5867 the target platform directly via vector tree-codes.
5870 - CODE1 is the code of a vector operation to be used when
5871 vectorizing the operation, if available.
5872 - MULTI_STEP_CVT determines the number of required intermediate steps in
5873 case of multi-step conversion (like int->short->char - in that case
5874 MULTI_STEP_CVT will be 1).
5875 - INTERM_TYPES contains the intermediate type required to perform the
5876 narrowing operation (short in the above example). */
5879 supportable_narrowing_operation (enum tree_code code,
5880 tree vectype_out, tree vectype_in,
5881 enum tree_code *code1, int *multi_step_cvt,
5882 VEC (tree, heap) **interm_types)
5884 enum machine_mode vec_mode;
5885 enum insn_code icode1;
5886 optab optab1, interm_optab;
5887 tree vectype = vectype_in;
5888 tree narrow_vectype = vectype_out;
5890 tree intermediate_type, prev_type;
5896 c1 = VEC_PACK_TRUNC_EXPR;
5899 case FIX_TRUNC_EXPR:
5900 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5904 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5905 tree code and optabs used for computing the operation. */
5912 if (code == FIX_TRUNC_EXPR)
5913 /* The signedness is determined from output operand. */
5914 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5916 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5921 vec_mode = TYPE_MODE (vectype);
5922 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
5925 /* Check if it's a multi-step conversion that can be done using intermediate
5927 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5929 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5932 prev_type = vectype;
5933 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5934 intermediate steps in promotion sequence. We try
5935 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5937 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5938 for (i = 0; i < 3; i++)
5940 intermediate_mode = insn_data[icode1].operand[0].mode;
5941 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5942 TYPE_UNSIGNED (prev_type));
5943 interm_optab = optab_for_tree_code (c1, intermediate_type,
5946 || ((icode1 = optab_handler (optab1, prev_mode))
5947 == CODE_FOR_nothing)
5948 || insn_data[icode1].operand[0].mode != intermediate_mode
5949 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
5950 == CODE_FOR_nothing))
5953 VEC_quick_push (tree, *interm_types, intermediate_type);
5954 (*multi_step_cvt)++;
5956 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5959 prev_type = intermediate_type;
5960 prev_mode = intermediate_mode;