1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
153 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
156 if (is_gimple_assign (stmt))
157 lhs = gimple_assign_lhs (stmt);
159 lhs = gimple_call_lhs (stmt);
161 /* This use is out of pattern use, if LHS has other uses that are
162 pattern uses, we should mark the stmt itself, and not the pattern
164 if (TREE_CODE (lhs) == SSA_NAME)
165 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
167 if (is_gimple_debug (USE_STMT (use_p)))
169 use_stmt = USE_STMT (use_p);
171 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
174 if (vinfo_for_stmt (use_stmt)
175 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
185 /* This is the last stmt in a sequence that was detected as a
186 pattern that can potentially be vectorized. Don't mark the stmt
187 as relevant/live because it's not going to be vectorized.
188 Instead mark the pattern-stmt that replaces it. */
190 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
192 if (vect_print_dump_info (REPORT_DETAILS))
193 fprintf (vect_dump, "last stmt in pattern. don't mark"
195 stmt_info = vinfo_for_stmt (pattern_stmt);
196 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
197 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
198 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
203 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
204 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
205 STMT_VINFO_RELEVANT (stmt_info) = relevant;
207 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
208 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
210 if (vect_print_dump_info (REPORT_DETAILS))
211 fprintf (vect_dump, "already marked relevant/live.");
215 VEC_safe_push (gimple, heap, *worklist, stmt);
219 /* Function vect_stmt_relevant_p.
221 Return true if STMT in loop that is represented by LOOP_VINFO is
222 "relevant for vectorization".
224 A stmt is considered "relevant for vectorization" if:
225 - it has uses outside the loop.
226 - it has vdefs (it alters memory).
227 - control stmts in the loop (except for the exit condition).
229 CHECKME: what other side effects would the vectorizer allow? */
232 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
233 enum vect_relevant *relevant, bool *live_p)
235 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
237 imm_use_iterator imm_iter;
241 *relevant = vect_unused_in_scope;
244 /* cond stmt other than loop exit cond. */
245 if (is_ctrl_stmt (stmt)
246 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
247 != loop_exit_ctrl_vec_info_type)
248 *relevant = vect_used_in_scope;
250 /* changing memory. */
251 if (gimple_code (stmt) != GIMPLE_PHI)
252 if (gimple_vdef (stmt))
254 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
256 *relevant = vect_used_in_scope;
259 /* uses outside the loop. */
260 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
262 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
264 basic_block bb = gimple_bb (USE_STMT (use_p));
265 if (!flow_bb_inside_loop_p (loop, bb))
267 if (vect_print_dump_info (REPORT_DETAILS))
268 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
270 if (is_gimple_debug (USE_STMT (use_p)))
273 /* We expect all such uses to be in the loop exit phis
274 (because of loop closed form) */
275 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
276 gcc_assert (bb == single_exit (loop)->dest);
283 return (*live_p || *relevant);
287 /* Function exist_non_indexing_operands_for_use_p
289 USE is one of the uses attached to STMT. Check if USE is
290 used in STMT for anything other than indexing an array. */
293 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
296 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
298 /* USE corresponds to some operand in STMT. If there is no data
299 reference in STMT, then any operand that corresponds to USE
300 is not indexing an array. */
301 if (!STMT_VINFO_DATA_REF (stmt_info))
304 /* STMT has a data_ref. FORNOW this means that its of one of
308 (This should have been verified in analyze_data_refs).
310 'var' in the second case corresponds to a def, not a use,
311 so USE cannot correspond to any operands that are not used
314 Therefore, all we need to check is if STMT falls into the
315 first case, and whether var corresponds to USE. */
317 if (!gimple_assign_copy_p (stmt))
319 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
321 operand = gimple_assign_rhs1 (stmt);
322 if (TREE_CODE (operand) != SSA_NAME)
333 Function process_use.
336 - a USE in STMT in a loop represented by LOOP_VINFO
337 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
338 that defined USE. This is done by calling mark_relevant and passing it
339 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
340 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
344 Generally, LIVE_P and RELEVANT are used to define the liveness and
345 relevance info of the DEF_STMT of this USE:
346 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
347 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
349 - case 1: If USE is used only for address computations (e.g. array indexing),
350 which does not need to be directly vectorized, then the liveness/relevance
351 of the respective DEF_STMT is left unchanged.
352 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
353 skip DEF_STMT cause it had already been processed.
354 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
355 be modified accordingly.
357 Return true if everything is as expected. Return false otherwise. */
360 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
361 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
364 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
365 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
366 stmt_vec_info dstmt_vinfo;
367 basic_block bb, def_bb;
370 enum vect_def_type dt;
372 /* case 1: we are only interested in uses that need to be vectorized. Uses
373 that are used for address computation are not considered relevant. */
374 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
377 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
380 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
384 if (!def_stmt || gimple_nop_p (def_stmt))
387 def_bb = gimple_bb (def_stmt);
388 if (!flow_bb_inside_loop_p (loop, def_bb))
390 if (vect_print_dump_info (REPORT_DETAILS))
391 fprintf (vect_dump, "def_stmt is out of loop.");
395 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
396 DEF_STMT must have already been processed, because this should be the
397 only way that STMT, which is a reduction-phi, was put in the worklist,
398 as there should be no other uses for DEF_STMT in the loop. So we just
399 check that everything is as expected, and we are done. */
400 dstmt_vinfo = vinfo_for_stmt (def_stmt);
401 bb = gimple_bb (stmt);
402 if (gimple_code (stmt) == GIMPLE_PHI
403 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
404 && gimple_code (def_stmt) != GIMPLE_PHI
405 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
406 && bb->loop_father == def_bb->loop_father)
408 if (vect_print_dump_info (REPORT_DETAILS))
409 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
410 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
411 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
412 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
413 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
414 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
418 /* case 3a: outer-loop stmt defining an inner-loop stmt:
419 outer-loop-header-bb:
425 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
427 if (vect_print_dump_info (REPORT_DETAILS))
428 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
432 case vect_unused_in_scope:
433 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
434 vect_used_in_scope : vect_unused_in_scope;
437 case vect_used_in_outer_by_reduction:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_by_reduction;
442 case vect_used_in_outer:
443 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
444 relevant = vect_used_in_scope;
447 case vect_used_in_scope:
455 /* case 3b: inner-loop stmt defining an outer-loop stmt:
456 outer-loop-header-bb:
460 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
462 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
464 if (vect_print_dump_info (REPORT_DETAILS))
465 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
469 case vect_unused_in_scope:
470 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
471 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
472 vect_used_in_outer_by_reduction : vect_unused_in_scope;
475 case vect_used_by_reduction:
476 relevant = vect_used_in_outer_by_reduction;
479 case vect_used_in_scope:
480 relevant = vect_used_in_outer;
488 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
489 is_pattern_stmt_p (stmt_vinfo));
494 /* Function vect_mark_stmts_to_be_vectorized.
496 Not all stmts in the loop need to be vectorized. For example:
505 Stmt 1 and 3 do not need to be vectorized, because loop control and
506 addressing of vectorized data-refs are handled differently.
508 This pass detects such stmts. */
511 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
513 VEC(gimple,heap) *worklist;
514 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
515 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
516 unsigned int nbbs = loop->num_nodes;
517 gimple_stmt_iterator si;
520 stmt_vec_info stmt_vinfo;
524 enum vect_relevant relevant, tmp_relevant;
525 enum vect_def_type def_type;
527 if (vect_print_dump_info (REPORT_DETAILS))
528 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
530 worklist = VEC_alloc (gimple, heap, 64);
532 /* 1. Init worklist. */
533 for (i = 0; i < nbbs; i++)
536 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
539 if (vect_print_dump_info (REPORT_DETAILS))
541 fprintf (vect_dump, "init: phi relevant? ");
542 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
545 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
546 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
548 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
550 stmt = gsi_stmt (si);
551 if (vect_print_dump_info (REPORT_DETAILS))
553 fprintf (vect_dump, "init: stmt relevant? ");
554 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
557 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
558 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
562 /* 2. Process_worklist */
563 while (VEC_length (gimple, worklist) > 0)
568 stmt = VEC_pop (gimple, worklist);
569 if (vect_print_dump_info (REPORT_DETAILS))
571 fprintf (vect_dump, "worklist: examine stmt: ");
572 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
575 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
576 (DEF_STMT) as relevant/irrelevant and live/dead according to the
577 liveness and relevance properties of STMT. */
578 stmt_vinfo = vinfo_for_stmt (stmt);
579 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
580 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
582 /* Generally, the liveness and relevance properties of STMT are
583 propagated as is to the DEF_STMTs of its USEs:
584 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
585 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
587 One exception is when STMT has been identified as defining a reduction
588 variable; in this case we set the liveness/relevance as follows:
590 relevant = vect_used_by_reduction
591 This is because we distinguish between two kinds of relevant stmts -
592 those that are used by a reduction computation, and those that are
593 (also) used by a regular computation. This allows us later on to
594 identify stmts that are used solely by a reduction, and therefore the
595 order of the results that they produce does not have to be kept. */
597 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
598 tmp_relevant = relevant;
601 case vect_reduction_def:
602 switch (tmp_relevant)
604 case vect_unused_in_scope:
605 relevant = vect_used_by_reduction;
608 case vect_used_by_reduction:
609 if (gimple_code (stmt) == GIMPLE_PHI)
614 if (vect_print_dump_info (REPORT_DETAILS))
615 fprintf (vect_dump, "unsupported use of reduction.");
617 VEC_free (gimple, heap, worklist);
624 case vect_nested_cycle:
625 if (tmp_relevant != vect_unused_in_scope
626 && tmp_relevant != vect_used_in_outer_by_reduction
627 && tmp_relevant != vect_used_in_outer)
629 if (vect_print_dump_info (REPORT_DETAILS))
630 fprintf (vect_dump, "unsupported use of nested cycle.");
632 VEC_free (gimple, heap, worklist);
639 case vect_double_reduction_def:
640 if (tmp_relevant != vect_unused_in_scope
641 && tmp_relevant != vect_used_by_reduction)
643 if (vect_print_dump_info (REPORT_DETAILS))
644 fprintf (vect_dump, "unsupported use of double reduction.");
646 VEC_free (gimple, heap, worklist);
657 if (is_pattern_stmt_p (stmt_vinfo))
659 /* Pattern statements are not inserted into the code, so
660 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
661 have to scan the RHS or function arguments instead. */
662 if (is_gimple_assign (stmt))
664 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
665 tree op = gimple_assign_rhs1 (stmt);
668 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
670 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
671 live_p, relevant, &worklist, false)
672 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
673 live_p, relevant, &worklist, false))
675 VEC_free (gimple, heap, worklist);
680 for (; i < gimple_num_ops (stmt); i++)
682 op = gimple_op (stmt, i);
683 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
686 VEC_free (gimple, heap, worklist);
691 else if (is_gimple_call (stmt))
693 for (i = 0; i < gimple_call_num_args (stmt); i++)
695 tree arg = gimple_call_arg (stmt, i);
696 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
699 VEC_free (gimple, heap, worklist);
706 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
708 tree op = USE_FROM_PTR (use_p);
709 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
712 VEC_free (gimple, heap, worklist);
717 if (STMT_VINFO_GATHER_P (stmt_vinfo))
720 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
722 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
725 VEC_free (gimple, heap, worklist);
729 } /* while worklist */
731 VEC_free (gimple, heap, worklist);
736 /* Get cost by calling cost target builtin. */
739 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
741 tree dummy_type = NULL;
744 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
749 /* Get cost for STMT. */
752 cost_for_stmt (gimple stmt)
754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
756 switch (STMT_VINFO_TYPE (stmt_info))
758 case load_vec_info_type:
759 return vect_get_stmt_cost (scalar_load);
760 case store_vec_info_type:
761 return vect_get_stmt_cost (scalar_store);
762 case op_vec_info_type:
763 case condition_vec_info_type:
764 case assignment_vec_info_type:
765 case reduc_vec_info_type:
766 case induc_vec_info_type:
767 case type_promotion_vec_info_type:
768 case type_demotion_vec_info_type:
769 case type_conversion_vec_info_type:
770 case call_vec_info_type:
771 return vect_get_stmt_cost (scalar_stmt);
772 case undef_vec_info_type:
778 /* Function vect_model_simple_cost.
780 Models cost for simple operations, i.e. those that only emit ncopies of a
781 single op. Right now, this does not account for multiple insns that could
782 be generated for the single vector op. We will handle that shortly. */
785 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
786 enum vect_def_type *dt, slp_tree slp_node)
789 int inside_cost = 0, outside_cost = 0;
791 /* The SLP costs were already calculated during SLP tree build. */
792 if (PURE_SLP_STMT (stmt_info))
795 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
797 /* FORNOW: Assuming maximum 2 args per stmts. */
798 for (i = 0; i < 2; i++)
800 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
801 outside_cost += vect_get_stmt_cost (vector_stmt);
804 if (vect_print_dump_info (REPORT_COST))
805 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
806 "outside_cost = %d .", inside_cost, outside_cost);
808 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
809 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
810 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
814 /* Function vect_cost_strided_group_size
816 For strided load or store, return the group_size only if it is the first
817 load or store of a group, else return 1. This ensures that group size is
818 only returned once per group. */
821 vect_cost_strided_group_size (stmt_vec_info stmt_info)
823 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
825 if (first_stmt == STMT_VINFO_STMT (stmt_info))
826 return GROUP_SIZE (stmt_info);
832 /* Function vect_model_store_cost
834 Models cost for stores. In the case of strided accesses, one access
835 has the overhead of the strided access attributed to it. */
838 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
839 bool store_lanes_p, enum vect_def_type dt,
843 unsigned int inside_cost = 0, outside_cost = 0;
844 struct data_reference *first_dr;
847 /* The SLP costs were already calculated during SLP tree build. */
848 if (PURE_SLP_STMT (stmt_info))
851 if (dt == vect_constant_def || dt == vect_external_def)
852 outside_cost = vect_get_stmt_cost (scalar_to_vec);
854 /* Strided access? */
855 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
859 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
864 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
865 group_size = vect_cost_strided_group_size (stmt_info);
868 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
870 /* Not a strided access. */
874 first_dr = STMT_VINFO_DATA_REF (stmt_info);
877 /* We assume that the cost of a single store-lanes instruction is
878 equivalent to the cost of GROUP_SIZE separate stores. If a strided
879 access is instead being provided by a permute-and-store operation,
880 include the cost of the permutes. */
881 if (!store_lanes_p && group_size > 1)
883 /* Uses a high and low interleave operation for each needed permute. */
884 inside_cost = ncopies * exact_log2(group_size) * group_size
885 * vect_get_stmt_cost (vector_stmt);
887 if (vect_print_dump_info (REPORT_COST))
888 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
893 /* Costs of the stores. */
894 vect_get_store_cost (first_dr, ncopies, &inside_cost);
896 if (vect_print_dump_info (REPORT_COST))
897 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
898 "outside_cost = %d .", inside_cost, outside_cost);
900 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
901 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
902 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
906 /* Calculate cost of DR's memory access. */
908 vect_get_store_cost (struct data_reference *dr, int ncopies,
909 unsigned int *inside_cost)
911 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
913 switch (alignment_support_scheme)
917 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
919 if (vect_print_dump_info (REPORT_COST))
920 fprintf (vect_dump, "vect_model_store_cost: aligned.");
925 case dr_unaligned_supported:
927 gimple stmt = DR_STMT (dr);
928 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
931 /* Here, we assign an additional cost for the unaligned store. */
932 *inside_cost += ncopies
933 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
934 vectype, DR_MISALIGNMENT (dr));
936 if (vect_print_dump_info (REPORT_COST))
937 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
949 /* Function vect_model_load_cost
951 Models cost for loads. In the case of strided accesses, the last access
952 has the overhead of the strided access attributed to it. Since unaligned
953 accesses are supported for loads, we also account for the costs of the
954 access scheme chosen. */
957 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
962 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
963 unsigned int inside_cost = 0, outside_cost = 0;
965 /* The SLP costs were already calculated during SLP tree build. */
966 if (PURE_SLP_STMT (stmt_info))
969 /* Strided accesses? */
970 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
971 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
973 group_size = vect_cost_strided_group_size (stmt_info);
974 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
976 /* Not a strided access. */
983 /* We assume that the cost of a single load-lanes instruction is
984 equivalent to the cost of GROUP_SIZE separate loads. If a strided
985 access is instead being provided by a load-and-permute operation,
986 include the cost of the permutes. */
987 if (!load_lanes_p && group_size > 1)
989 /* Uses an even and odd extract operations for each needed permute. */
990 inside_cost = ncopies * exact_log2(group_size) * group_size
991 * vect_get_stmt_cost (vector_stmt);
993 if (vect_print_dump_info (REPORT_COST))
994 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
998 /* The loads themselves. */
999 vect_get_load_cost (first_dr, ncopies,
1000 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
1002 &inside_cost, &outside_cost);
1004 if (vect_print_dump_info (REPORT_COST))
1005 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1006 "outside_cost = %d .", inside_cost, outside_cost);
1008 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1009 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1010 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1014 /* Calculate cost of DR's memory access. */
1016 vect_get_load_cost (struct data_reference *dr, int ncopies,
1017 bool add_realign_cost, unsigned int *inside_cost,
1018 unsigned int *outside_cost)
1020 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1022 switch (alignment_support_scheme)
1026 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1028 if (vect_print_dump_info (REPORT_COST))
1029 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1033 case dr_unaligned_supported:
1035 gimple stmt = DR_STMT (dr);
1036 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1037 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1039 /* Here, we assign an additional cost for the unaligned load. */
1040 *inside_cost += ncopies
1041 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1042 vectype, DR_MISALIGNMENT (dr));
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1049 case dr_explicit_realign:
1051 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1052 + vect_get_stmt_cost (vector_stmt));
1054 /* FIXME: If the misalignment remains fixed across the iterations of
1055 the containing loop, the following cost should be added to the
1057 if (targetm.vectorize.builtin_mask_for_load)
1058 *inside_cost += vect_get_stmt_cost (vector_stmt);
1062 case dr_explicit_realign_optimized:
1064 if (vect_print_dump_info (REPORT_COST))
1065 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1068 /* Unaligned software pipeline has a load of an address, an initial
1069 load, and possibly a mask operation to "prime" the loop. However,
1070 if this is an access in a group of loads, which provide strided
1071 access, then the above cost should only be considered for one
1072 access in the group. Inside the loop, there is a load op
1073 and a realignment op. */
1075 if (add_realign_cost)
1077 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1078 if (targetm.vectorize.builtin_mask_for_load)
1079 *outside_cost += vect_get_stmt_cost (vector_stmt);
1082 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1083 + vect_get_stmt_cost (vector_stmt));
1093 /* Function vect_init_vector.
1095 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1096 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1097 is not NULL. Otherwise, place the initialization at the loop preheader.
1098 Return the DEF of INIT_STMT.
1099 It will be used in the vectorization of STMT. */
1102 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1103 gimple_stmt_iterator *gsi)
1105 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1113 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1114 add_referenced_var (new_var);
1115 init_stmt = gimple_build_assign (new_var, vector_var);
1116 new_temp = make_ssa_name (new_var, init_stmt);
1117 gimple_assign_set_lhs (init_stmt, new_temp);
1120 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1127 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1129 if (nested_in_vect_loop_p (loop, stmt))
1132 pe = loop_preheader_edge (loop);
1133 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1134 gcc_assert (!new_bb);
1138 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1140 gimple_stmt_iterator gsi_bb_start;
1142 gcc_assert (bb_vinfo);
1143 bb = BB_VINFO_BB (bb_vinfo);
1144 gsi_bb_start = gsi_after_labels (bb);
1145 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1149 if (vect_print_dump_info (REPORT_DETAILS))
1151 fprintf (vect_dump, "created new init_stmt: ");
1152 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1155 vec_oprnd = gimple_assign_lhs (init_stmt);
1160 /* Function vect_get_vec_def_for_operand.
1162 OP is an operand in STMT. This function returns a (vector) def that will be
1163 used in the vectorized stmt for STMT.
1165 In the case that OP is an SSA_NAME which is defined in the loop, then
1166 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1168 In case OP is an invariant or constant, a new stmt that creates a vector def
1169 needs to be introduced. */
1172 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1177 stmt_vec_info def_stmt_info = NULL;
1178 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1179 unsigned int nunits;
1180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1186 enum vect_def_type dt;
1190 if (vect_print_dump_info (REPORT_DETAILS))
1192 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1193 print_generic_expr (vect_dump, op, TDF_SLIM);
1196 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1198 gcc_assert (is_simple_use);
1199 if (vect_print_dump_info (REPORT_DETAILS))
1203 fprintf (vect_dump, "def = ");
1204 print_generic_expr (vect_dump, def, TDF_SLIM);
1208 fprintf (vect_dump, " def_stmt = ");
1209 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1215 /* Case 1: operand is a constant. */
1216 case vect_constant_def:
1218 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1219 gcc_assert (vector_type);
1220 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1225 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1226 if (vect_print_dump_info (REPORT_DETAILS))
1227 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1229 vec_cst = build_vector_from_val (vector_type,
1230 fold_convert (TREE_TYPE (vector_type),
1232 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1235 /* Case 2: operand is defined outside the loop - loop invariant. */
1236 case vect_external_def:
1238 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1239 gcc_assert (vector_type);
1240 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1245 /* Create 'vec_inv = {inv,inv,..,inv}' */
1246 if (vect_print_dump_info (REPORT_DETAILS))
1247 fprintf (vect_dump, "Create vector_inv.");
1249 for (i = nunits - 1; i >= 0; --i)
1251 t = tree_cons (NULL_TREE, def, t);
1254 /* FIXME: use build_constructor directly. */
1255 vec_inv = build_constructor_from_list (vector_type, t);
1256 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1259 /* Case 3: operand is defined inside the loop. */
1260 case vect_internal_def:
1263 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1265 /* Get the def from the vectorized stmt. */
1266 def_stmt_info = vinfo_for_stmt (def_stmt);
1268 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1269 /* Get vectorized pattern statement. */
1271 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1272 && !STMT_VINFO_RELEVANT (def_stmt_info))
1273 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1274 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1275 gcc_assert (vec_stmt);
1276 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1277 vec_oprnd = PHI_RESULT (vec_stmt);
1278 else if (is_gimple_call (vec_stmt))
1279 vec_oprnd = gimple_call_lhs (vec_stmt);
1281 vec_oprnd = gimple_assign_lhs (vec_stmt);
1285 /* Case 4: operand is defined by a loop header phi - reduction */
1286 case vect_reduction_def:
1287 case vect_double_reduction_def:
1288 case vect_nested_cycle:
1292 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1293 loop = (gimple_bb (def_stmt))->loop_father;
1295 /* Get the def before the loop */
1296 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1297 return get_initial_def_for_reduction (stmt, op, scalar_def);
1300 /* Case 5: operand is defined by loop-header phi - induction. */
1301 case vect_induction_def:
1303 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1305 /* Get the def from the vectorized stmt. */
1306 def_stmt_info = vinfo_for_stmt (def_stmt);
1307 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1308 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1309 vec_oprnd = PHI_RESULT (vec_stmt);
1311 vec_oprnd = gimple_get_lhs (vec_stmt);
1321 /* Function vect_get_vec_def_for_stmt_copy
1323 Return a vector-def for an operand. This function is used when the
1324 vectorized stmt to be created (by the caller to this function) is a "copy"
1325 created in case the vectorized result cannot fit in one vector, and several
1326 copies of the vector-stmt are required. In this case the vector-def is
1327 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1328 of the stmt that defines VEC_OPRND.
1329 DT is the type of the vector def VEC_OPRND.
1332 In case the vectorization factor (VF) is bigger than the number
1333 of elements that can fit in a vectype (nunits), we have to generate
1334 more than one vector stmt to vectorize the scalar stmt. This situation
1335 arises when there are multiple data-types operated upon in the loop; the
1336 smallest data-type determines the VF, and as a result, when vectorizing
1337 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1338 vector stmt (each computing a vector of 'nunits' results, and together
1339 computing 'VF' results in each iteration). This function is called when
1340 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1341 which VF=16 and nunits=4, so the number of copies required is 4):
1343 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1345 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1346 VS1.1: vx.1 = memref1 VS1.2
1347 VS1.2: vx.2 = memref2 VS1.3
1348 VS1.3: vx.3 = memref3
1350 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1351 VSnew.1: vz1 = vx.1 + ... VSnew.2
1352 VSnew.2: vz2 = vx.2 + ... VSnew.3
1353 VSnew.3: vz3 = vx.3 + ...
1355 The vectorization of S1 is explained in vectorizable_load.
1356 The vectorization of S2:
1357 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1358 the function 'vect_get_vec_def_for_operand' is called to
1359 get the relevant vector-def for each operand of S2. For operand x it
1360 returns the vector-def 'vx.0'.
1362 To create the remaining copies of the vector-stmt (VSnew.j), this
1363 function is called to get the relevant vector-def for each operand. It is
1364 obtained from the respective VS1.j stmt, which is recorded in the
1365 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1367 For example, to obtain the vector-def 'vx.1' in order to create the
1368 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1369 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1370 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1371 and return its def ('vx.1').
1372 Overall, to create the above sequence this function will be called 3 times:
1373 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1374 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1375 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1378 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1380 gimple vec_stmt_for_operand;
1381 stmt_vec_info def_stmt_info;
1383 /* Do nothing; can reuse same def. */
1384 if (dt == vect_external_def || dt == vect_constant_def )
1387 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1388 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1389 gcc_assert (def_stmt_info);
1390 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1391 gcc_assert (vec_stmt_for_operand);
1392 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1393 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1394 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1396 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1401 /* Get vectorized definitions for the operands to create a copy of an original
1402 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1405 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1406 VEC(tree,heap) **vec_oprnds0,
1407 VEC(tree,heap) **vec_oprnds1)
1409 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1411 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1412 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1414 if (vec_oprnds1 && *vec_oprnds1)
1416 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1417 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1418 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1423 /* Get vectorized definitions for OP0 and OP1.
1424 REDUC_INDEX is the index of reduction operand in case of reduction,
1425 and -1 otherwise. */
1428 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1429 VEC (tree, heap) **vec_oprnds0,
1430 VEC (tree, heap) **vec_oprnds1,
1431 slp_tree slp_node, int reduc_index)
1435 int nops = (op1 == NULL_TREE) ? 1 : 2;
1436 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1437 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1439 VEC_quick_push (tree, ops, op0);
1441 VEC_quick_push (tree, ops, op1);
1443 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1445 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1447 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1449 VEC_free (tree, heap, ops);
1450 VEC_free (slp_void_p, heap, vec_defs);
1456 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1457 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1458 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1462 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1463 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1464 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1470 /* Function vect_finish_stmt_generation.
1472 Insert a new stmt. */
1475 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1476 gimple_stmt_iterator *gsi)
1478 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1479 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1480 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1482 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1484 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1486 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1489 if (vect_print_dump_info (REPORT_DETAILS))
1491 fprintf (vect_dump, "add new stmt: ");
1492 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1495 gimple_set_location (vec_stmt, gimple_location (stmt));
1498 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1499 a function declaration if the target has a vectorized version
1500 of the function, or NULL_TREE if the function cannot be vectorized. */
1503 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1505 tree fndecl = gimple_call_fndecl (call);
1507 /* We only handle functions that do not read or clobber memory -- i.e.
1508 const or novops ones. */
1509 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1513 || TREE_CODE (fndecl) != FUNCTION_DECL
1514 || !DECL_BUILT_IN (fndecl))
1517 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1521 /* Function vectorizable_call.
1523 Check if STMT performs a function call that can be vectorized.
1524 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1525 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1526 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1529 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1535 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1536 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1537 tree vectype_out, vectype_in;
1540 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1541 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1542 tree fndecl, new_temp, def, rhs_type;
1544 enum vect_def_type dt[3]
1545 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1546 gimple new_stmt = NULL;
1548 VEC(tree, heap) *vargs = NULL;
1549 enum { NARROW, NONE, WIDEN } modifier;
1553 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1556 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1559 /* Is STMT a vectorizable call? */
1560 if (!is_gimple_call (stmt))
1563 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1566 if (stmt_can_throw_internal (stmt))
1569 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1571 /* Process function arguments. */
1572 rhs_type = NULL_TREE;
1573 vectype_in = NULL_TREE;
1574 nargs = gimple_call_num_args (stmt);
1576 /* Bail out if the function has more than three arguments, we do not have
1577 interesting builtin functions to vectorize with more than two arguments
1578 except for fma. No arguments is also not good. */
1579 if (nargs == 0 || nargs > 3)
1582 for (i = 0; i < nargs; i++)
1586 op = gimple_call_arg (stmt, i);
1588 /* We can only handle calls with arguments of the same type. */
1590 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1592 if (vect_print_dump_info (REPORT_DETAILS))
1593 fprintf (vect_dump, "argument types differ.");
1597 rhs_type = TREE_TYPE (op);
1599 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1600 &def_stmt, &def, &dt[i], &opvectype))
1602 if (vect_print_dump_info (REPORT_DETAILS))
1603 fprintf (vect_dump, "use not simple.");
1608 vectype_in = opvectype;
1610 && opvectype != vectype_in)
1612 if (vect_print_dump_info (REPORT_DETAILS))
1613 fprintf (vect_dump, "argument vector types differ.");
1617 /* If all arguments are external or constant defs use a vector type with
1618 the same size as the output vector type. */
1620 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1622 gcc_assert (vectype_in);
1625 if (vect_print_dump_info (REPORT_DETAILS))
1627 fprintf (vect_dump, "no vectype for scalar type ");
1628 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1635 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1636 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1637 if (nunits_in == nunits_out / 2)
1639 else if (nunits_out == nunits_in)
1641 else if (nunits_out == nunits_in / 2)
1646 /* For now, we only vectorize functions if a target specific builtin
1647 is available. TODO -- in some cases, it might be profitable to
1648 insert the calls for pieces of the vector, in order to be able
1649 to vectorize other operations in the loop. */
1650 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1651 if (fndecl == NULL_TREE)
1653 if (vect_print_dump_info (REPORT_DETAILS))
1654 fprintf (vect_dump, "function is not vectorizable.");
1659 gcc_assert (!gimple_vuse (stmt));
1661 if (slp_node || PURE_SLP_STMT (stmt_info))
1663 else if (modifier == NARROW)
1664 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1666 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1668 /* Sanity check: make sure that at least one copy of the vectorized stmt
1669 needs to be generated. */
1670 gcc_assert (ncopies >= 1);
1672 if (!vec_stmt) /* transformation not required. */
1674 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1675 if (vect_print_dump_info (REPORT_DETAILS))
1676 fprintf (vect_dump, "=== vectorizable_call ===");
1677 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1683 if (vect_print_dump_info (REPORT_DETAILS))
1684 fprintf (vect_dump, "transform call.");
1687 scalar_dest = gimple_call_lhs (stmt);
1688 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1690 prev_stmt_info = NULL;
1694 for (j = 0; j < ncopies; ++j)
1696 /* Build argument list for the vectorized call. */
1698 vargs = VEC_alloc (tree, heap, nargs);
1700 VEC_truncate (tree, vargs, 0);
1704 VEC (slp_void_p, heap) *vec_defs
1705 = VEC_alloc (slp_void_p, heap, nargs);
1706 VEC (tree, heap) *vec_oprnds0;
1708 for (i = 0; i < nargs; i++)
1709 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1710 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1712 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1714 /* Arguments are ready. Create the new vector stmt. */
1715 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1718 for (k = 0; k < nargs; k++)
1720 VEC (tree, heap) *vec_oprndsk
1721 = (VEC (tree, heap) *)
1722 VEC_index (slp_void_p, vec_defs, k);
1723 VEC_replace (tree, vargs, k,
1724 VEC_index (tree, vec_oprndsk, i));
1726 new_stmt = gimple_build_call_vec (fndecl, vargs);
1727 new_temp = make_ssa_name (vec_dest, new_stmt);
1728 gimple_call_set_lhs (new_stmt, new_temp);
1729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1730 mark_symbols_for_renaming (new_stmt);
1731 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1735 for (i = 0; i < nargs; i++)
1737 VEC (tree, heap) *vec_oprndsi
1738 = (VEC (tree, heap) *)
1739 VEC_index (slp_void_p, vec_defs, i);
1740 VEC_free (tree, heap, vec_oprndsi);
1742 VEC_free (slp_void_p, heap, vec_defs);
1746 for (i = 0; i < nargs; i++)
1748 op = gimple_call_arg (stmt, i);
1751 = vect_get_vec_def_for_operand (op, stmt, NULL);
1754 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1756 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1759 VEC_quick_push (tree, vargs, vec_oprnd0);
1762 new_stmt = gimple_build_call_vec (fndecl, vargs);
1763 new_temp = make_ssa_name (vec_dest, new_stmt);
1764 gimple_call_set_lhs (new_stmt, new_temp);
1766 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1767 mark_symbols_for_renaming (new_stmt);
1770 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1772 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1774 prev_stmt_info = vinfo_for_stmt (new_stmt);
1780 for (j = 0; j < ncopies; ++j)
1782 /* Build argument list for the vectorized call. */
1784 vargs = VEC_alloc (tree, heap, nargs * 2);
1786 VEC_truncate (tree, vargs, 0);
1790 VEC (slp_void_p, heap) *vec_defs
1791 = VEC_alloc (slp_void_p, heap, nargs);
1792 VEC (tree, heap) *vec_oprnds0;
1794 for (i = 0; i < nargs; i++)
1795 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1796 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1798 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1800 /* Arguments are ready. Create the new vector stmt. */
1801 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1805 VEC_truncate (tree, vargs, 0);
1806 for (k = 0; k < nargs; k++)
1808 VEC (tree, heap) *vec_oprndsk
1809 = (VEC (tree, heap) *)
1810 VEC_index (slp_void_p, vec_defs, k);
1811 VEC_quick_push (tree, vargs,
1812 VEC_index (tree, vec_oprndsk, i));
1813 VEC_quick_push (tree, vargs,
1814 VEC_index (tree, vec_oprndsk, i + 1));
1816 new_stmt = gimple_build_call_vec (fndecl, vargs);
1817 new_temp = make_ssa_name (vec_dest, new_stmt);
1818 gimple_call_set_lhs (new_stmt, new_temp);
1819 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1820 mark_symbols_for_renaming (new_stmt);
1821 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1825 for (i = 0; i < nargs; i++)
1827 VEC (tree, heap) *vec_oprndsi
1828 = (VEC (tree, heap) *)
1829 VEC_index (slp_void_p, vec_defs, i);
1830 VEC_free (tree, heap, vec_oprndsi);
1832 VEC_free (slp_void_p, heap, vec_defs);
1836 for (i = 0; i < nargs; i++)
1838 op = gimple_call_arg (stmt, i);
1842 = vect_get_vec_def_for_operand (op, stmt, NULL);
1844 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1848 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1850 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1852 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1855 VEC_quick_push (tree, vargs, vec_oprnd0);
1856 VEC_quick_push (tree, vargs, vec_oprnd1);
1859 new_stmt = gimple_build_call_vec (fndecl, vargs);
1860 new_temp = make_ssa_name (vec_dest, new_stmt);
1861 gimple_call_set_lhs (new_stmt, new_temp);
1863 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1864 mark_symbols_for_renaming (new_stmt);
1867 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1869 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1871 prev_stmt_info = vinfo_for_stmt (new_stmt);
1874 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1879 /* No current target implements this case. */
1883 VEC_free (tree, heap, vargs);
1885 /* Update the exception handling table with the vector stmt if necessary. */
1886 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1887 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1889 /* The call in STMT might prevent it from being removed in dce.
1890 We however cannot remove it here, due to the way the ssa name
1891 it defines is mapped to the new definition. So just replace
1892 rhs of the statement with something harmless. */
1897 type = TREE_TYPE (scalar_dest);
1898 if (is_pattern_stmt_p (stmt_info))
1899 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1901 lhs = gimple_call_lhs (stmt);
1902 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1903 set_vinfo_for_stmt (new_stmt, stmt_info);
1904 set_vinfo_for_stmt (stmt, NULL);
1905 STMT_VINFO_STMT (stmt_info) = new_stmt;
1906 gsi_replace (gsi, new_stmt, false);
1907 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1913 /* Function vect_gen_widened_results_half
1915 Create a vector stmt whose code, type, number of arguments, and result
1916 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1917 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1918 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1919 needs to be created (DECL is a function-decl of a target-builtin).
1920 STMT is the original scalar stmt that we are vectorizing. */
1923 vect_gen_widened_results_half (enum tree_code code,
1925 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1926 tree vec_dest, gimple_stmt_iterator *gsi,
1932 /* Generate half of the widened result: */
1933 if (code == CALL_EXPR)
1935 /* Target specific support */
1936 if (op_type == binary_op)
1937 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1939 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1940 new_temp = make_ssa_name (vec_dest, new_stmt);
1941 gimple_call_set_lhs (new_stmt, new_temp);
1945 /* Generic support */
1946 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1947 if (op_type != binary_op)
1949 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1951 new_temp = make_ssa_name (vec_dest, new_stmt);
1952 gimple_assign_set_lhs (new_stmt, new_temp);
1954 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1960 /* Get vectorized definitions for loop-based vectorization. For the first
1961 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1962 scalar operand), and for the rest we get a copy with
1963 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1964 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1965 The vectors are collected into VEC_OPRNDS. */
1968 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1969 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1973 /* Get first vector operand. */
1974 /* All the vector operands except the very first one (that is scalar oprnd)
1976 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1977 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1979 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1981 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1983 /* Get second vector operand. */
1984 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1985 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1989 /* For conversion in multiple steps, continue to get operands
1992 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1996 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1997 For multi-step conversions store the resulting vectors and call the function
2001 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2002 int multi_step_cvt, gimple stmt,
2003 VEC (tree, heap) *vec_dsts,
2004 gimple_stmt_iterator *gsi,
2005 slp_tree slp_node, enum tree_code code,
2006 stmt_vec_info *prev_stmt_info)
2009 tree vop0, vop1, new_tmp, vec_dest;
2011 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2013 vec_dest = VEC_pop (tree, vec_dsts);
2015 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2017 /* Create demotion operation. */
2018 vop0 = VEC_index (tree, *vec_oprnds, i);
2019 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2020 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2021 new_tmp = make_ssa_name (vec_dest, new_stmt);
2022 gimple_assign_set_lhs (new_stmt, new_tmp);
2023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2026 /* Store the resulting vector for next recursive call. */
2027 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2030 /* This is the last step of the conversion sequence. Store the
2031 vectors in SLP_NODE or in vector info of the scalar statement
2032 (or in STMT_VINFO_RELATED_STMT chain). */
2034 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2037 if (!*prev_stmt_info)
2038 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2040 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2042 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2047 /* For multi-step demotion operations we first generate demotion operations
2048 from the source type to the intermediate types, and then combine the
2049 results (stored in VEC_OPRNDS) in demotion operation to the destination
2053 /* At each level of recursion we have half of the operands we had at the
2055 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2056 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2057 stmt, vec_dsts, gsi, slp_node,
2058 VEC_PACK_TRUNC_EXPR,
2062 VEC_quick_push (tree, vec_dsts, vec_dest);
2066 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2067 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2068 the resulting vectors and call the function recursively. */
2071 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2072 VEC (tree, heap) **vec_oprnds1,
2073 gimple stmt, tree vec_dest,
2074 gimple_stmt_iterator *gsi,
2075 enum tree_code code1,
2076 enum tree_code code2, tree decl1,
2077 tree decl2, int op_type)
2080 tree vop0, vop1, new_tmp1, new_tmp2;
2081 gimple new_stmt1, new_stmt2;
2082 VEC (tree, heap) *vec_tmp = NULL;
2084 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2085 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2087 if (op_type == binary_op)
2088 vop1 = VEC_index (tree, *vec_oprnds1, i);
2092 /* Generate the two halves of promotion operation. */
2093 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2094 op_type, vec_dest, gsi, stmt);
2095 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2096 op_type, vec_dest, gsi, stmt);
2097 if (is_gimple_call (new_stmt1))
2099 new_tmp1 = gimple_call_lhs (new_stmt1);
2100 new_tmp2 = gimple_call_lhs (new_stmt2);
2104 new_tmp1 = gimple_assign_lhs (new_stmt1);
2105 new_tmp2 = gimple_assign_lhs (new_stmt2);
2108 /* Store the results for the next step. */
2109 VEC_quick_push (tree, vec_tmp, new_tmp1);
2110 VEC_quick_push (tree, vec_tmp, new_tmp2);
2113 VEC_free (tree, heap, *vec_oprnds0);
2114 *vec_oprnds0 = vec_tmp;
2118 /* Check if STMT performs a conversion operation, that can be vectorized.
2119 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2120 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2121 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2124 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2125 gimple *vec_stmt, slp_tree slp_node)
2129 tree op0, op1 = NULL_TREE;
2130 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2131 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2132 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2133 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2134 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2135 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2139 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2140 gimple new_stmt = NULL;
2141 stmt_vec_info prev_stmt_info;
2144 tree vectype_out, vectype_in;
2146 tree lhs_type, rhs_type;
2147 enum { NARROW, NONE, WIDEN } modifier;
2148 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2150 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2151 int multi_step_cvt = 0;
2152 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2153 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2155 enum machine_mode rhs_mode;
2156 unsigned short fltsz;
2158 /* Is STMT a vectorizable conversion? */
2160 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2163 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2166 if (!is_gimple_assign (stmt))
2169 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2172 code = gimple_assign_rhs_code (stmt);
2173 if (!CONVERT_EXPR_CODE_P (code)
2174 && code != FIX_TRUNC_EXPR
2175 && code != FLOAT_EXPR
2176 && code != WIDEN_MULT_EXPR
2177 && code != WIDEN_LSHIFT_EXPR)
2180 op_type = TREE_CODE_LENGTH (code);
2182 /* Check types of lhs and rhs. */
2183 scalar_dest = gimple_assign_lhs (stmt);
2184 lhs_type = TREE_TYPE (scalar_dest);
2185 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2187 op0 = gimple_assign_rhs1 (stmt);
2188 rhs_type = TREE_TYPE (op0);
2190 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2191 && !((INTEGRAL_TYPE_P (lhs_type)
2192 && INTEGRAL_TYPE_P (rhs_type))
2193 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2194 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2197 if ((INTEGRAL_TYPE_P (lhs_type)
2198 && (TYPE_PRECISION (lhs_type)
2199 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2200 || (INTEGRAL_TYPE_P (rhs_type)
2201 && (TYPE_PRECISION (rhs_type)
2202 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2204 if (vect_print_dump_info (REPORT_DETAILS))
2206 "type conversion to/from bit-precision unsupported.");
2210 /* Check the operands of the operation. */
2211 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2212 &def_stmt, &def, &dt[0], &vectype_in))
2214 if (vect_print_dump_info (REPORT_DETAILS))
2215 fprintf (vect_dump, "use not simple.");
2218 if (op_type == binary_op)
2222 op1 = gimple_assign_rhs2 (stmt);
2223 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2224 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2226 if (CONSTANT_CLASS_P (op0))
2227 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2228 &def_stmt, &def, &dt[1], &vectype_in);
2230 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2235 if (vect_print_dump_info (REPORT_DETAILS))
2236 fprintf (vect_dump, "use not simple.");
2241 /* If op0 is an external or constant defs use a vector type of
2242 the same size as the output vector type. */
2244 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2246 gcc_assert (vectype_in);
2249 if (vect_print_dump_info (REPORT_DETAILS))
2251 fprintf (vect_dump, "no vectype for scalar type ");
2252 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2258 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2259 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2260 if (nunits_in < nunits_out)
2262 else if (nunits_out == nunits_in)
2267 /* Multiple types in SLP are handled by creating the appropriate number of
2268 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2270 if (slp_node || PURE_SLP_STMT (stmt_info))
2272 else if (modifier == NARROW)
2273 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2275 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2277 /* Sanity check: make sure that at least one copy of the vectorized stmt
2278 needs to be generated. */
2279 gcc_assert (ncopies >= 1);
2281 /* Supportable by target? */
2285 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2287 if (supportable_convert_operation (code, vectype_out, vectype_in,
2292 if (vect_print_dump_info (REPORT_DETAILS))
2293 fprintf (vect_dump, "conversion not supported by target.");
2297 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2298 &decl1, &decl2, &code1, &code2,
2299 &multi_step_cvt, &interm_types))
2301 /* Binary widening operation can only be supported directly by the
2303 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2307 if (code != FLOAT_EXPR
2308 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2309 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2312 rhs_mode = TYPE_MODE (rhs_type);
2313 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2314 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2315 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2316 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2319 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2320 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2321 if (cvt_type == NULL_TREE)
2324 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2326 if (!supportable_convert_operation (code, vectype_out,
2327 cvt_type, &decl1, &codecvt1))
2330 else if (!supportable_widening_operation (code, stmt, vectype_out,
2331 cvt_type, &decl1, &decl2,
2332 &codecvt1, &codecvt2,
2337 gcc_assert (multi_step_cvt == 0);
2339 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2340 vectype_in, NULL, NULL, &code1,
2341 &code2, &multi_step_cvt,
2346 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2349 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2350 codecvt2 = ERROR_MARK;
2354 VEC_safe_push (tree, heap, interm_types, cvt_type);
2355 cvt_type = NULL_TREE;
2360 gcc_assert (op_type == unary_op);
2361 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2362 &code1, &multi_step_cvt,
2366 if (code != FIX_TRUNC_EXPR
2367 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2368 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2371 rhs_mode = TYPE_MODE (rhs_type);
2373 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2374 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2375 if (cvt_type == NULL_TREE)
2377 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2380 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2381 &code1, &multi_step_cvt,
2390 if (!vec_stmt) /* transformation not required. */
2392 if (vect_print_dump_info (REPORT_DETAILS))
2393 fprintf (vect_dump, "=== vectorizable_conversion ===");
2394 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2395 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2396 else if (modifier == NARROW)
2398 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2399 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2403 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2404 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2406 VEC_free (tree, heap, interm_types);
2411 if (vect_print_dump_info (REPORT_DETAILS))
2412 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2414 if (op_type == binary_op)
2416 if (CONSTANT_CLASS_P (op0))
2417 op0 = fold_convert (TREE_TYPE (op1), op0);
2418 else if (CONSTANT_CLASS_P (op1))
2419 op1 = fold_convert (TREE_TYPE (op0), op1);
2422 /* In case of multi-step conversion, we first generate conversion operations
2423 to the intermediate types, and then from that types to the final one.
2424 We create vector destinations for the intermediate type (TYPES) received
2425 from supportable_*_operation, and store them in the correct order
2426 for future use in vect_create_vectorized_*_stmts (). */
2427 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2428 vec_dest = vect_create_destination_var (scalar_dest,
2429 (cvt_type && modifier == WIDEN)
2430 ? cvt_type : vectype_out);
2431 VEC_quick_push (tree, vec_dsts, vec_dest);
2435 for (i = VEC_length (tree, interm_types) - 1;
2436 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2438 vec_dest = vect_create_destination_var (scalar_dest,
2440 VEC_quick_push (tree, vec_dsts, vec_dest);
2445 vec_dest = vect_create_destination_var (scalar_dest,
2447 ? vectype_out : cvt_type);
2451 if (modifier == NONE)
2452 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2453 else if (modifier == WIDEN)
2455 vec_oprnds0 = VEC_alloc (tree, heap,
2457 ? vect_pow2 (multi_step_cvt) : 1));
2458 if (op_type == binary_op)
2459 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2462 vec_oprnds0 = VEC_alloc (tree, heap,
2464 ? vect_pow2 (multi_step_cvt) : 1));
2466 else if (code == WIDEN_LSHIFT_EXPR)
2467 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2470 prev_stmt_info = NULL;
2474 for (j = 0; j < ncopies; j++)
2477 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2480 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2482 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2484 /* Arguments are ready, create the new vector stmt. */
2485 if (code1 == CALL_EXPR)
2487 new_stmt = gimple_build_call (decl1, 1, vop0);
2488 new_temp = make_ssa_name (vec_dest, new_stmt);
2489 gimple_call_set_lhs (new_stmt, new_temp);
2493 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2494 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2496 new_temp = make_ssa_name (vec_dest, new_stmt);
2497 gimple_assign_set_lhs (new_stmt, new_temp);
2500 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2502 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2507 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2509 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2510 prev_stmt_info = vinfo_for_stmt (new_stmt);
2515 /* In case the vectorization factor (VF) is bigger than the number
2516 of elements that we can fit in a vectype (nunits), we have to
2517 generate more than one vector stmt - i.e - we need to "unroll"
2518 the vector stmt by a factor VF/nunits. */
2519 for (j = 0; j < ncopies; j++)
2526 if (code == WIDEN_LSHIFT_EXPR)
2531 /* Store vec_oprnd1 for every vector stmt to be created
2532 for SLP_NODE. We check during the analysis that all
2533 the shift arguments are the same. */
2534 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2535 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2537 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2541 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2542 &vec_oprnds1, slp_node, -1);
2546 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2547 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2548 if (op_type == binary_op)
2550 if (code == WIDEN_LSHIFT_EXPR)
2553 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2555 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2561 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2562 VEC_truncate (tree, vec_oprnds0, 0);
2563 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2564 if (op_type == binary_op)
2566 if (code == WIDEN_LSHIFT_EXPR)
2569 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2571 VEC_truncate (tree, vec_oprnds1, 0);
2572 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2576 /* Arguments are ready. Create the new vector stmts. */
2577 for (i = multi_step_cvt; i >= 0; i--)
2579 tree this_dest = VEC_index (tree, vec_dsts, i);
2580 enum tree_code c1 = code1, c2 = code2;
2581 if (i == 0 && codecvt2 != ERROR_MARK)
2586 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2588 stmt, this_dest, gsi,
2589 c1, c2, decl1, decl2,
2593 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2597 if (codecvt1 == CALL_EXPR)
2599 new_stmt = gimple_build_call (decl1, 1, vop0);
2600 new_temp = make_ssa_name (vec_dest, new_stmt);
2601 gimple_call_set_lhs (new_stmt, new_temp);
2605 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2606 new_temp = make_ssa_name (vec_dest, NULL);
2607 new_stmt = gimple_build_assign_with_ops (codecvt1,
2612 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2615 new_stmt = SSA_NAME_DEF_STMT (vop0);
2618 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2622 if (!prev_stmt_info)
2623 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2625 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2626 prev_stmt_info = vinfo_for_stmt (new_stmt);
2631 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2635 /* In case the vectorization factor (VF) is bigger than the number
2636 of elements that we can fit in a vectype (nunits), we have to
2637 generate more than one vector stmt - i.e - we need to "unroll"
2638 the vector stmt by a factor VF/nunits. */
2639 for (j = 0; j < ncopies; j++)
2643 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2647 VEC_truncate (tree, vec_oprnds0, 0);
2648 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2649 vect_pow2 (multi_step_cvt) - 1);
2652 /* Arguments are ready. Create the new vector stmts. */
2654 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2656 if (codecvt1 == CALL_EXPR)
2658 new_stmt = gimple_build_call (decl1, 1, vop0);
2659 new_temp = make_ssa_name (vec_dest, new_stmt);
2660 gimple_call_set_lhs (new_stmt, new_temp);
2664 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2665 new_temp = make_ssa_name (vec_dest, NULL);
2666 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2670 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2671 VEC_replace (tree, vec_oprnds0, i, new_temp);
2674 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2675 stmt, vec_dsts, gsi,
2680 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2684 VEC_free (tree, heap, vec_oprnds0);
2685 VEC_free (tree, heap, vec_oprnds1);
2686 VEC_free (tree, heap, vec_dsts);
2687 VEC_free (tree, heap, interm_types);
2693 /* Function vectorizable_assignment.
2695 Check if STMT performs an assignment (copy) that can be vectorized.
2696 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2697 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2698 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2701 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2702 gimple *vec_stmt, slp_tree slp_node)
2707 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2708 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2709 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2713 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2714 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2717 VEC(tree,heap) *vec_oprnds = NULL;
2719 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2720 gimple new_stmt = NULL;
2721 stmt_vec_info prev_stmt_info = NULL;
2722 enum tree_code code;
2725 /* Multiple types in SLP are handled by creating the appropriate number of
2726 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2728 if (slp_node || PURE_SLP_STMT (stmt_info))
2731 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2733 gcc_assert (ncopies >= 1);
2735 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2738 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2741 /* Is vectorizable assignment? */
2742 if (!is_gimple_assign (stmt))
2745 scalar_dest = gimple_assign_lhs (stmt);
2746 if (TREE_CODE (scalar_dest) != SSA_NAME)
2749 code = gimple_assign_rhs_code (stmt);
2750 if (gimple_assign_single_p (stmt)
2751 || code == PAREN_EXPR
2752 || CONVERT_EXPR_CODE_P (code))
2753 op = gimple_assign_rhs1 (stmt);
2757 if (code == VIEW_CONVERT_EXPR)
2758 op = TREE_OPERAND (op, 0);
2760 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2761 &def_stmt, &def, &dt[0], &vectype_in))
2763 if (vect_print_dump_info (REPORT_DETAILS))
2764 fprintf (vect_dump, "use not simple.");
2768 /* We can handle NOP_EXPR conversions that do not change the number
2769 of elements or the vector size. */
2770 if ((CONVERT_EXPR_CODE_P (code)
2771 || code == VIEW_CONVERT_EXPR)
2773 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2774 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2775 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2778 /* We do not handle bit-precision changes. */
2779 if ((CONVERT_EXPR_CODE_P (code)
2780 || code == VIEW_CONVERT_EXPR)
2781 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2782 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2783 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2784 || ((TYPE_PRECISION (TREE_TYPE (op))
2785 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2786 /* But a conversion that does not change the bit-pattern is ok. */
2787 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2788 > TYPE_PRECISION (TREE_TYPE (op)))
2789 && TYPE_UNSIGNED (TREE_TYPE (op))))
2791 if (vect_print_dump_info (REPORT_DETAILS))
2792 fprintf (vect_dump, "type conversion to/from bit-precision "
2797 if (!vec_stmt) /* transformation not required. */
2799 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2800 if (vect_print_dump_info (REPORT_DETAILS))
2801 fprintf (vect_dump, "=== vectorizable_assignment ===");
2802 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2807 if (vect_print_dump_info (REPORT_DETAILS))
2808 fprintf (vect_dump, "transform assignment.");
2811 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2814 for (j = 0; j < ncopies; j++)
2818 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2820 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2822 /* Arguments are ready. create the new vector stmt. */
2823 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2825 if (CONVERT_EXPR_CODE_P (code)
2826 || code == VIEW_CONVERT_EXPR)
2827 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2828 new_stmt = gimple_build_assign (vec_dest, vop);
2829 new_temp = make_ssa_name (vec_dest, new_stmt);
2830 gimple_assign_set_lhs (new_stmt, new_temp);
2831 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2833 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2840 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2842 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2844 prev_stmt_info = vinfo_for_stmt (new_stmt);
2847 VEC_free (tree, heap, vec_oprnds);
2852 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2853 either as shift by a scalar or by a vector. */
2856 vect_supportable_shift (enum tree_code code, tree scalar_type)
2859 enum machine_mode vec_mode;
2864 vectype = get_vectype_for_scalar_type (scalar_type);
2868 optab = optab_for_tree_code (code, vectype, optab_scalar);
2870 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2872 optab = optab_for_tree_code (code, vectype, optab_vector);
2874 || (optab_handler (optab, TYPE_MODE (vectype))
2875 == CODE_FOR_nothing))
2879 vec_mode = TYPE_MODE (vectype);
2880 icode = (int) optab_handler (optab, vec_mode);
2881 if (icode == CODE_FOR_nothing)
2888 /* Function vectorizable_shift.
2890 Check if STMT performs a shift operation that can be vectorized.
2891 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2892 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2893 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2896 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2897 gimple *vec_stmt, slp_tree slp_node)
2901 tree op0, op1 = NULL;
2902 tree vec_oprnd1 = NULL_TREE;
2903 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2905 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2906 enum tree_code code;
2907 enum machine_mode vec_mode;
2911 enum machine_mode optab_op2_mode;
2914 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2915 gimple new_stmt = NULL;
2916 stmt_vec_info prev_stmt_info;
2923 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2926 bool scalar_shift_arg = true;
2927 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2930 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2933 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2936 /* Is STMT a vectorizable binary/unary operation? */
2937 if (!is_gimple_assign (stmt))
2940 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2943 code = gimple_assign_rhs_code (stmt);
2945 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2946 || code == RROTATE_EXPR))
2949 scalar_dest = gimple_assign_lhs (stmt);
2950 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2951 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2952 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2954 if (vect_print_dump_info (REPORT_DETAILS))
2955 fprintf (vect_dump, "bit-precision shifts not supported.");
2959 op0 = gimple_assign_rhs1 (stmt);
2960 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2961 &def_stmt, &def, &dt[0], &vectype))
2963 if (vect_print_dump_info (REPORT_DETAILS))
2964 fprintf (vect_dump, "use not simple.");
2967 /* If op0 is an external or constant def use a vector type with
2968 the same size as the output vector type. */
2970 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2972 gcc_assert (vectype);
2975 if (vect_print_dump_info (REPORT_DETAILS))
2977 fprintf (vect_dump, "no vectype for scalar type ");
2978 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2984 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2985 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2986 if (nunits_out != nunits_in)
2989 op1 = gimple_assign_rhs2 (stmt);
2990 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2991 &dt[1], &op1_vectype))
2993 if (vect_print_dump_info (REPORT_DETAILS))
2994 fprintf (vect_dump, "use not simple.");
2999 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3003 /* Multiple types in SLP are handled by creating the appropriate number of
3004 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3006 if (slp_node || PURE_SLP_STMT (stmt_info))
3009 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3011 gcc_assert (ncopies >= 1);
3013 /* Determine whether the shift amount is a vector, or scalar. If the
3014 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3016 if (dt[1] == vect_internal_def && !slp_node)
3017 scalar_shift_arg = false;
3018 else if (dt[1] == vect_constant_def
3019 || dt[1] == vect_external_def
3020 || dt[1] == vect_internal_def)
3022 /* In SLP, need to check whether the shift count is the same,
3023 in loops if it is a constant or invariant, it is always
3027 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3030 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3031 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3032 scalar_shift_arg = false;
3037 if (vect_print_dump_info (REPORT_DETAILS))
3038 fprintf (vect_dump, "operand mode requires invariant argument.");
3042 /* Vector shifted by vector. */
3043 if (!scalar_shift_arg)
3045 optab = optab_for_tree_code (code, vectype, optab_vector);
3046 if (vect_print_dump_info (REPORT_DETAILS))
3047 fprintf (vect_dump, "vector/vector shift/rotate found.");
3049 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3050 if (op1_vectype == NULL_TREE
3051 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3053 if (vect_print_dump_info (REPORT_DETAILS))
3054 fprintf (vect_dump, "unusable type for last operand in"
3055 " vector/vector shift/rotate.");
3059 /* See if the machine has a vector shifted by scalar insn and if not
3060 then see if it has a vector shifted by vector insn. */
3063 optab = optab_for_tree_code (code, vectype, optab_scalar);
3065 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3067 if (vect_print_dump_info (REPORT_DETAILS))
3068 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3072 optab = optab_for_tree_code (code, vectype, optab_vector);
3074 && (optab_handler (optab, TYPE_MODE (vectype))
3075 != CODE_FOR_nothing))
3077 scalar_shift_arg = false;
3079 if (vect_print_dump_info (REPORT_DETAILS))
3080 fprintf (vect_dump, "vector/vector shift/rotate found.");
3082 /* Unlike the other binary operators, shifts/rotates have
3083 the rhs being int, instead of the same type as the lhs,
3084 so make sure the scalar is the right type if we are
3085 dealing with vectors of long long/long/short/char. */
3086 if (dt[1] == vect_constant_def)
3087 op1 = fold_convert (TREE_TYPE (vectype), op1);
3088 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3092 && TYPE_MODE (TREE_TYPE (vectype))
3093 != TYPE_MODE (TREE_TYPE (op1)))
3095 if (vect_print_dump_info (REPORT_DETAILS))
3096 fprintf (vect_dump, "unusable type for last operand in"
3097 " vector/vector shift/rotate.");
3100 if (vec_stmt && !slp_node)
3102 op1 = fold_convert (TREE_TYPE (vectype), op1);
3103 op1 = vect_init_vector (stmt, op1,
3104 TREE_TYPE (vectype), NULL);
3111 /* Supportable by target? */
3114 if (vect_print_dump_info (REPORT_DETAILS))
3115 fprintf (vect_dump, "no optab.");
3118 vec_mode = TYPE_MODE (vectype);
3119 icode = (int) optab_handler (optab, vec_mode);
3120 if (icode == CODE_FOR_nothing)
3122 if (vect_print_dump_info (REPORT_DETAILS))
3123 fprintf (vect_dump, "op not supported by target.");
3124 /* Check only during analysis. */
3125 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3126 || (vf < vect_min_worthwhile_factor (code)
3129 if (vect_print_dump_info (REPORT_DETAILS))
3130 fprintf (vect_dump, "proceeding using word mode.");
3133 /* Worthwhile without SIMD support? Check only during analysis. */
3134 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3135 && vf < vect_min_worthwhile_factor (code)
3138 if (vect_print_dump_info (REPORT_DETAILS))
3139 fprintf (vect_dump, "not worthwhile without SIMD support.");
3143 if (!vec_stmt) /* transformation not required. */
3145 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3146 if (vect_print_dump_info (REPORT_DETAILS))
3147 fprintf (vect_dump, "=== vectorizable_shift ===");
3148 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3154 if (vect_print_dump_info (REPORT_DETAILS))
3155 fprintf (vect_dump, "transform binary/unary operation.");
3158 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3160 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3161 created in the previous stages of the recursion, so no allocation is
3162 needed, except for the case of shift with scalar shift argument. In that
3163 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3164 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3165 In case of loop-based vectorization we allocate VECs of size 1. We
3166 allocate VEC_OPRNDS1 only in case of binary operation. */
3169 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3170 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3172 else if (scalar_shift_arg)
3173 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3175 prev_stmt_info = NULL;
3176 for (j = 0; j < ncopies; j++)
3181 if (scalar_shift_arg)
3183 /* Vector shl and shr insn patterns can be defined with scalar
3184 operand 2 (shift operand). In this case, use constant or loop
3185 invariant op1 directly, without extending it to vector mode
3187 optab_op2_mode = insn_data[icode].operand[2].mode;
3188 if (!VECTOR_MODE_P (optab_op2_mode))
3190 if (vect_print_dump_info (REPORT_DETAILS))
3191 fprintf (vect_dump, "operand 1 using scalar mode.");
3193 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3196 /* Store vec_oprnd1 for every vector stmt to be created
3197 for SLP_NODE. We check during the analysis that all
3198 the shift arguments are the same.
3199 TODO: Allow different constants for different vector
3200 stmts generated for an SLP instance. */
3201 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3202 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3207 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3208 (a special case for certain kind of vector shifts); otherwise,
3209 operand 1 should be of a vector type (the usual case). */
3211 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3214 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3218 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3220 /* Arguments are ready. Create the new vector stmt. */
3221 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3223 vop1 = VEC_index (tree, vec_oprnds1, i);
3224 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3225 new_temp = make_ssa_name (vec_dest, new_stmt);
3226 gimple_assign_set_lhs (new_stmt, new_temp);
3227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3229 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3236 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3238 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3239 prev_stmt_info = vinfo_for_stmt (new_stmt);
3242 VEC_free (tree, heap, vec_oprnds0);
3243 VEC_free (tree, heap, vec_oprnds1);
3249 /* Function vectorizable_operation.
3251 Check if STMT performs a binary, unary or ternary operation that can
3253 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3254 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3255 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3258 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3259 gimple *vec_stmt, slp_tree slp_node)
3263 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3264 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3266 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3267 enum tree_code code;
3268 enum machine_mode vec_mode;
3275 enum vect_def_type dt[3]
3276 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3277 gimple new_stmt = NULL;
3278 stmt_vec_info prev_stmt_info;
3284 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3285 tree vop0, vop1, vop2;
3286 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3289 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3292 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3295 /* Is STMT a vectorizable binary/unary operation? */
3296 if (!is_gimple_assign (stmt))
3299 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3302 code = gimple_assign_rhs_code (stmt);
3304 /* For pointer addition, we should use the normal plus for
3305 the vector addition. */
3306 if (code == POINTER_PLUS_EXPR)
3309 /* Support only unary or binary operations. */
3310 op_type = TREE_CODE_LENGTH (code);
3311 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3313 if (vect_print_dump_info (REPORT_DETAILS))
3314 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3319 scalar_dest = gimple_assign_lhs (stmt);
3320 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3322 /* Most operations cannot handle bit-precision types without extra
3324 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3325 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3326 /* Exception are bitwise binary operations. */
3327 && code != BIT_IOR_EXPR
3328 && code != BIT_XOR_EXPR
3329 && code != BIT_AND_EXPR)
3331 if (vect_print_dump_info (REPORT_DETAILS))
3332 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3336 op0 = gimple_assign_rhs1 (stmt);
3337 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3338 &def_stmt, &def, &dt[0], &vectype))
3340 if (vect_print_dump_info (REPORT_DETAILS))
3341 fprintf (vect_dump, "use not simple.");
3344 /* If op0 is an external or constant def use a vector type with
3345 the same size as the output vector type. */
3347 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3349 gcc_assert (vectype);
3352 if (vect_print_dump_info (REPORT_DETAILS))
3354 fprintf (vect_dump, "no vectype for scalar type ");
3355 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3361 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3362 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3363 if (nunits_out != nunits_in)
3366 if (op_type == binary_op || op_type == ternary_op)
3368 op1 = gimple_assign_rhs2 (stmt);
3369 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
3372 if (vect_print_dump_info (REPORT_DETAILS))
3373 fprintf (vect_dump, "use not simple.");
3377 if (op_type == ternary_op)
3379 op2 = gimple_assign_rhs3 (stmt);
3380 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3383 if (vect_print_dump_info (REPORT_DETAILS))
3384 fprintf (vect_dump, "use not simple.");
3390 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3394 /* Multiple types in SLP are handled by creating the appropriate number of
3395 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3397 if (slp_node || PURE_SLP_STMT (stmt_info))
3400 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3402 gcc_assert (ncopies >= 1);
3404 /* Shifts are handled in vectorizable_shift (). */
3405 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3406 || code == RROTATE_EXPR)
3409 optab = optab_for_tree_code (code, vectype, optab_default);
3411 /* Supportable by target? */
3414 if (vect_print_dump_info (REPORT_DETAILS))
3415 fprintf (vect_dump, "no optab.");
3418 vec_mode = TYPE_MODE (vectype);
3419 icode = (int) optab_handler (optab, vec_mode);
3420 if (icode == CODE_FOR_nothing)
3422 if (vect_print_dump_info (REPORT_DETAILS))
3423 fprintf (vect_dump, "op not supported by target.");
3424 /* Check only during analysis. */
3425 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3426 || (vf < vect_min_worthwhile_factor (code)
3429 if (vect_print_dump_info (REPORT_DETAILS))
3430 fprintf (vect_dump, "proceeding using word mode.");
3433 /* Worthwhile without SIMD support? Check only during analysis. */
3434 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3435 && vf < vect_min_worthwhile_factor (code)
3438 if (vect_print_dump_info (REPORT_DETAILS))
3439 fprintf (vect_dump, "not worthwhile without SIMD support.");
3443 if (!vec_stmt) /* transformation not required. */
3445 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3446 if (vect_print_dump_info (REPORT_DETAILS))
3447 fprintf (vect_dump, "=== vectorizable_operation ===");
3448 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3454 if (vect_print_dump_info (REPORT_DETAILS))
3455 fprintf (vect_dump, "transform binary/unary operation.");
3458 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3460 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3461 created in the previous stages of the recursion, so no allocation is
3462 needed, except for the case of shift with scalar shift argument. In that
3463 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3464 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3465 In case of loop-based vectorization we allocate VECs of size 1. We
3466 allocate VEC_OPRNDS1 only in case of binary operation. */
3469 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3470 if (op_type == binary_op || op_type == ternary_op)
3471 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3472 if (op_type == ternary_op)
3473 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3476 /* In case the vectorization factor (VF) is bigger than the number
3477 of elements that we can fit in a vectype (nunits), we have to generate
3478 more than one vector stmt - i.e - we need to "unroll" the
3479 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3480 from one copy of the vector stmt to the next, in the field
3481 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3482 stages to find the correct vector defs to be used when vectorizing
3483 stmts that use the defs of the current stmt. The example below
3484 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3485 we need to create 4 vectorized stmts):
3487 before vectorization:
3488 RELATED_STMT VEC_STMT
3492 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3494 RELATED_STMT VEC_STMT
3495 VS1_0: vx0 = memref0 VS1_1 -
3496 VS1_1: vx1 = memref1 VS1_2 -
3497 VS1_2: vx2 = memref2 VS1_3 -
3498 VS1_3: vx3 = memref3 - -
3499 S1: x = load - VS1_0
3502 step2: vectorize stmt S2 (done here):
3503 To vectorize stmt S2 we first need to find the relevant vector
3504 def for the first operand 'x'. This is, as usual, obtained from
3505 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3506 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3507 relevant vector def 'vx0'. Having found 'vx0' we can generate
3508 the vector stmt VS2_0, and as usual, record it in the
3509 STMT_VINFO_VEC_STMT of stmt S2.
3510 When creating the second copy (VS2_1), we obtain the relevant vector
3511 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3512 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3513 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3514 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3515 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3516 chain of stmts and pointers:
3517 RELATED_STMT VEC_STMT
3518 VS1_0: vx0 = memref0 VS1_1 -
3519 VS1_1: vx1 = memref1 VS1_2 -
3520 VS1_2: vx2 = memref2 VS1_3 -
3521 VS1_3: vx3 = memref3 - -
3522 S1: x = load - VS1_0
3523 VS2_0: vz0 = vx0 + v1 VS2_1 -
3524 VS2_1: vz1 = vx1 + v1 VS2_2 -
3525 VS2_2: vz2 = vx2 + v1 VS2_3 -
3526 VS2_3: vz3 = vx3 + v1 - -
3527 S2: z = x + 1 - VS2_0 */
3529 prev_stmt_info = NULL;
3530 for (j = 0; j < ncopies; j++)
3535 if (op_type == binary_op || op_type == ternary_op)
3536 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3539 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3541 if (op_type == ternary_op)
3543 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3544 VEC_quick_push (tree, vec_oprnds2,
3545 vect_get_vec_def_for_operand (op2, stmt, NULL));
3550 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3551 if (op_type == ternary_op)
3553 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3554 VEC_quick_push (tree, vec_oprnds2,
3555 vect_get_vec_def_for_stmt_copy (dt[2],
3560 /* Arguments are ready. Create the new vector stmt. */
3561 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3563 vop1 = ((op_type == binary_op || op_type == ternary_op)
3564 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3565 vop2 = ((op_type == ternary_op)
3566 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3567 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3569 new_temp = make_ssa_name (vec_dest, new_stmt);
3570 gimple_assign_set_lhs (new_stmt, new_temp);
3571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3573 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3580 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3582 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3583 prev_stmt_info = vinfo_for_stmt (new_stmt);
3586 VEC_free (tree, heap, vec_oprnds0);
3588 VEC_free (tree, heap, vec_oprnds1);
3590 VEC_free (tree, heap, vec_oprnds2);
3596 /* Function vectorizable_store.
3598 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3600 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3601 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3602 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3605 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3611 tree vec_oprnd = NULL_TREE;
3612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3613 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3614 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3616 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3617 struct loop *loop = NULL;
3618 enum machine_mode vec_mode;
3620 enum dr_alignment_support alignment_support_scheme;
3623 enum vect_def_type dt;
3624 stmt_vec_info prev_stmt_info = NULL;
3625 tree dataref_ptr = NULL_TREE;
3626 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3629 gimple next_stmt, first_stmt = NULL;
3630 bool strided_store = false;
3631 bool store_lanes_p = false;
3632 unsigned int group_size, i;
3633 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3635 VEC(tree,heap) *vec_oprnds = NULL;
3636 bool slp = (slp_node != NULL);
3637 unsigned int vec_num;
3638 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3642 loop = LOOP_VINFO_LOOP (loop_vinfo);
3644 /* Multiple types in SLP are handled by creating the appropriate number of
3645 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3647 if (slp || PURE_SLP_STMT (stmt_info))
3650 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3652 gcc_assert (ncopies >= 1);
3654 /* FORNOW. This restriction should be relaxed. */
3655 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3657 if (vect_print_dump_info (REPORT_DETAILS))
3658 fprintf (vect_dump, "multiple types in nested loop.");
3662 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3665 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3668 /* Is vectorizable store? */
3670 if (!is_gimple_assign (stmt))
3673 scalar_dest = gimple_assign_lhs (stmt);
3674 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3675 && is_pattern_stmt_p (stmt_info))
3676 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3677 if (TREE_CODE (scalar_dest) != ARRAY_REF
3678 && TREE_CODE (scalar_dest) != INDIRECT_REF
3679 && TREE_CODE (scalar_dest) != COMPONENT_REF
3680 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3681 && TREE_CODE (scalar_dest) != REALPART_EXPR
3682 && TREE_CODE (scalar_dest) != MEM_REF)
3685 gcc_assert (gimple_assign_single_p (stmt));
3686 op = gimple_assign_rhs1 (stmt);
3687 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3689 if (vect_print_dump_info (REPORT_DETAILS))
3690 fprintf (vect_dump, "use not simple.");
3694 elem_type = TREE_TYPE (vectype);
3695 vec_mode = TYPE_MODE (vectype);
3697 /* FORNOW. In some cases can vectorize even if data-type not supported
3698 (e.g. - array initialization with 0). */
3699 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3702 if (!STMT_VINFO_DATA_REF (stmt_info))
3705 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3707 if (vect_print_dump_info (REPORT_DETAILS))
3708 fprintf (vect_dump, "negative step for store.");
3712 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3714 strided_store = true;
3715 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3716 if (!slp && !PURE_SLP_STMT (stmt_info))
3718 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3719 if (vect_store_lanes_supported (vectype, group_size))
3720 store_lanes_p = true;
3721 else if (!vect_strided_store_supported (vectype, group_size))
3725 if (first_stmt == stmt)
3727 /* STMT is the leader of the group. Check the operands of all the
3728 stmts of the group. */
3729 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3732 gcc_assert (gimple_assign_single_p (next_stmt));
3733 op = gimple_assign_rhs1 (next_stmt);
3734 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3737 if (vect_print_dump_info (REPORT_DETAILS))
3738 fprintf (vect_dump, "use not simple.");
3741 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3746 if (!vec_stmt) /* transformation not required. */
3748 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3749 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3757 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3758 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3760 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3763 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3765 /* We vectorize all the stmts of the interleaving group when we
3766 reach the last stmt in the group. */
3767 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3768 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3777 strided_store = false;
3778 /* VEC_NUM is the number of vect stmts to be created for this
3780 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3781 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3782 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3783 op = gimple_assign_rhs1 (first_stmt);
3786 /* VEC_NUM is the number of vect stmts to be created for this
3788 vec_num = group_size;
3794 group_size = vec_num = 1;
3797 if (vect_print_dump_info (REPORT_DETAILS))
3798 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3800 dr_chain = VEC_alloc (tree, heap, group_size);
3801 oprnds = VEC_alloc (tree, heap, group_size);
3803 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3804 gcc_assert (alignment_support_scheme);
3805 /* Targets with store-lane instructions must not require explicit
3807 gcc_assert (!store_lanes_p
3808 || alignment_support_scheme == dr_aligned
3809 || alignment_support_scheme == dr_unaligned_supported);
3812 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3814 aggr_type = vectype;
3816 /* In case the vectorization factor (VF) is bigger than the number
3817 of elements that we can fit in a vectype (nunits), we have to generate
3818 more than one vector stmt - i.e - we need to "unroll" the
3819 vector stmt by a factor VF/nunits. For more details see documentation in
3820 vect_get_vec_def_for_copy_stmt. */
3822 /* In case of interleaving (non-unit strided access):
3829 We create vectorized stores starting from base address (the access of the
3830 first stmt in the chain (S2 in the above example), when the last store stmt
3831 of the chain (S4) is reached:
3834 VS2: &base + vec_size*1 = vx0
3835 VS3: &base + vec_size*2 = vx1
3836 VS4: &base + vec_size*3 = vx3
3838 Then permutation statements are generated:
3840 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3841 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3844 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3845 (the order of the data-refs in the output of vect_permute_store_chain
3846 corresponds to the order of scalar stmts in the interleaving chain - see
3847 the documentation of vect_permute_store_chain()).
3849 In case of both multiple types and interleaving, above vector stores and
3850 permutation stmts are created for every copy. The result vector stmts are
3851 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3852 STMT_VINFO_RELATED_STMT for the next copies.
3855 prev_stmt_info = NULL;
3856 for (j = 0; j < ncopies; j++)
3865 /* Get vectorized arguments for SLP_NODE. */
3866 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3867 NULL, slp_node, -1);
3869 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3873 /* For interleaved stores we collect vectorized defs for all the
3874 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3875 used as an input to vect_permute_store_chain(), and OPRNDS as
3876 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3878 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3879 OPRNDS are of size 1. */
3880 next_stmt = first_stmt;
3881 for (i = 0; i < group_size; i++)
3883 /* Since gaps are not supported for interleaved stores,
3884 GROUP_SIZE is the exact number of stmts in the chain.
3885 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3886 there is no interleaving, GROUP_SIZE is 1, and only one
3887 iteration of the loop will be executed. */
3888 gcc_assert (next_stmt
3889 && gimple_assign_single_p (next_stmt));
3890 op = gimple_assign_rhs1 (next_stmt);
3892 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3894 VEC_quick_push(tree, dr_chain, vec_oprnd);
3895 VEC_quick_push(tree, oprnds, vec_oprnd);
3896 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3900 /* We should have catched mismatched types earlier. */
3901 gcc_assert (useless_type_conversion_p (vectype,
3902 TREE_TYPE (vec_oprnd)));
3903 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3904 NULL_TREE, &dummy, gsi,
3905 &ptr_incr, false, &inv_p);
3906 gcc_assert (bb_vinfo || !inv_p);
3910 /* For interleaved stores we created vectorized defs for all the
3911 defs stored in OPRNDS in the previous iteration (previous copy).
3912 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3913 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3915 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3916 OPRNDS are of size 1. */
3917 for (i = 0; i < group_size; i++)
3919 op = VEC_index (tree, oprnds, i);
3920 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3922 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3923 VEC_replace(tree, dr_chain, i, vec_oprnd);
3924 VEC_replace(tree, oprnds, i, vec_oprnd);
3926 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3927 TYPE_SIZE_UNIT (aggr_type));
3934 /* Combine all the vectors into an array. */
3935 vec_array = create_vector_array (vectype, vec_num);
3936 for (i = 0; i < vec_num; i++)
3938 vec_oprnd = VEC_index (tree, dr_chain, i);
3939 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3943 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3944 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3945 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3946 gimple_call_set_lhs (new_stmt, data_ref);
3947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3948 mark_symbols_for_renaming (new_stmt);
3955 result_chain = VEC_alloc (tree, heap, group_size);
3957 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3961 next_stmt = first_stmt;
3962 for (i = 0; i < vec_num; i++)
3964 struct ptr_info_def *pi;
3967 /* Bump the vector pointer. */
3968 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3972 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3973 else if (strided_store)
3974 /* For strided stores vectorized defs are interleaved in
3975 vect_permute_store_chain(). */
3976 vec_oprnd = VEC_index (tree, result_chain, i);
3978 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3979 build_int_cst (reference_alias_ptr_type
3980 (DR_REF (first_dr)), 0));
3981 pi = get_ptr_info (dataref_ptr);
3982 pi->align = TYPE_ALIGN_UNIT (vectype);
3983 if (aligned_access_p (first_dr))
3985 else if (DR_MISALIGNMENT (first_dr) == -1)
3987 TREE_TYPE (data_ref)
3988 = build_aligned_type (TREE_TYPE (data_ref),
3989 TYPE_ALIGN (elem_type));
3990 pi->align = TYPE_ALIGN_UNIT (elem_type);
3995 TREE_TYPE (data_ref)
3996 = build_aligned_type (TREE_TYPE (data_ref),
3997 TYPE_ALIGN (elem_type));
3998 pi->misalign = DR_MISALIGNMENT (first_dr);
4001 /* Arguments are ready. Create the new vector stmt. */
4002 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4004 mark_symbols_for_renaming (new_stmt);
4009 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4017 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4019 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4020 prev_stmt_info = vinfo_for_stmt (new_stmt);
4024 VEC_free (tree, heap, dr_chain);
4025 VEC_free (tree, heap, oprnds);
4027 VEC_free (tree, heap, result_chain);
4029 VEC_free (tree, heap, vec_oprnds);
4034 /* Given a vector type VECTYPE and permutation SEL returns
4035 the VECTOR_CST mask that implements the permutation of the
4036 vector elements. If that is impossible to do, returns NULL. */
4039 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4041 tree mask_elt_type, mask_type, mask_vec;
4044 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4046 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4050 = lang_hooks.types.type_for_size
4051 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4052 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4055 for (i = nunits - 1; i >= 0; i--)
4056 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4058 mask_vec = build_vector (mask_type, mask_vec);
4063 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4064 reversal of the vector elements. If that is impossible to do,
4068 perm_mask_for_reverse (tree vectype)
4073 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4074 sel = XALLOCAVEC (unsigned char, nunits);
4076 for (i = 0; i < nunits; ++i)
4077 sel[i] = nunits - 1 - i;
4079 return vect_gen_perm_mask (vectype, sel);
4082 /* Given a vector variable X and Y, that was generated for the scalar
4083 STMT, generate instructions to permute the vector elements of X and Y
4084 using permutation mask MASK_VEC, insert them at *GSI and return the
4085 permuted vector variable. */
4088 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4089 gimple_stmt_iterator *gsi)
4091 tree vectype = TREE_TYPE (x);
4092 tree perm_dest, data_ref;
4095 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4096 data_ref = make_ssa_name (perm_dest, NULL);
4098 /* Generate the permute statement. */
4099 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4101 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4106 /* vectorizable_load.
4108 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4110 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4111 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4112 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4115 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4116 slp_tree slp_node, slp_instance slp_node_instance)
4119 tree vec_dest = NULL;
4120 tree data_ref = NULL;
4121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4122 stmt_vec_info prev_stmt_info;
4123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4124 struct loop *loop = NULL;
4125 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4126 bool nested_in_vect_loop = false;
4127 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4128 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4131 enum machine_mode mode;
4132 gimple new_stmt = NULL;
4134 enum dr_alignment_support alignment_support_scheme;
4135 tree dataref_ptr = NULL_TREE;
4137 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4139 int i, j, group_size;
4140 tree msq = NULL_TREE, lsq;
4141 tree offset = NULL_TREE;
4142 tree realignment_token = NULL_TREE;
4144 VEC(tree,heap) *dr_chain = NULL;
4145 bool strided_load = false;
4146 bool load_lanes_p = false;
4150 bool compute_in_loop = false;
4151 struct loop *at_loop;
4153 bool slp = (slp_node != NULL);
4154 bool slp_perm = false;
4155 enum tree_code code;
4156 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4159 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4160 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4161 int gather_scale = 1;
4162 enum vect_def_type gather_dt = vect_unknown_def_type;
4166 loop = LOOP_VINFO_LOOP (loop_vinfo);
4167 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4168 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4173 /* Multiple types in SLP are handled by creating the appropriate number of
4174 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4176 if (slp || PURE_SLP_STMT (stmt_info))
4179 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4181 gcc_assert (ncopies >= 1);
4183 /* FORNOW. This restriction should be relaxed. */
4184 if (nested_in_vect_loop && ncopies > 1)
4186 if (vect_print_dump_info (REPORT_DETAILS))
4187 fprintf (vect_dump, "multiple types in nested loop.");
4191 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4194 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4197 /* Is vectorizable load? */
4198 if (!is_gimple_assign (stmt))
4201 scalar_dest = gimple_assign_lhs (stmt);
4202 if (TREE_CODE (scalar_dest) != SSA_NAME)
4205 code = gimple_assign_rhs_code (stmt);
4206 if (code != ARRAY_REF
4207 && code != INDIRECT_REF
4208 && code != COMPONENT_REF
4209 && code != IMAGPART_EXPR
4210 && code != REALPART_EXPR
4212 && TREE_CODE_CLASS (code) != tcc_declaration)
4215 if (!STMT_VINFO_DATA_REF (stmt_info))
4218 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4219 if (negative && ncopies > 1)
4221 if (vect_print_dump_info (REPORT_DETAILS))
4222 fprintf (vect_dump, "multiple types with negative step.");
4226 elem_type = TREE_TYPE (vectype);
4227 mode = TYPE_MODE (vectype);
4229 /* FORNOW. In some cases can vectorize even if data-type not supported
4230 (e.g. - data copies). */
4231 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4233 if (vect_print_dump_info (REPORT_DETAILS))
4234 fprintf (vect_dump, "Aligned load, but unsupported type.");
4238 /* Check if the load is a part of an interleaving chain. */
4239 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4241 strided_load = true;
4243 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4245 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4246 if (!slp && !PURE_SLP_STMT (stmt_info))
4248 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4249 if (vect_load_lanes_supported (vectype, group_size))
4250 load_lanes_p = true;
4251 else if (!vect_strided_load_supported (vectype, group_size))
4258 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4259 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4260 if (alignment_support_scheme != dr_aligned
4261 && alignment_support_scheme != dr_unaligned_supported)
4263 if (vect_print_dump_info (REPORT_DETAILS))
4264 fprintf (vect_dump, "negative step but alignment required.");
4267 if (!perm_mask_for_reverse (vectype))
4269 if (vect_print_dump_info (REPORT_DETAILS))
4270 fprintf (vect_dump, "negative step and reversing not supported.");
4275 if (STMT_VINFO_GATHER_P (stmt_info))
4279 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4280 &gather_off, &gather_scale);
4281 gcc_assert (gather_decl);
4282 if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
4283 &def_stmt, &def, &gather_dt,
4284 &gather_off_vectype))
4286 if (vect_print_dump_info (REPORT_DETAILS))
4287 fprintf (vect_dump, "gather index use not simple.");
4292 if (!vec_stmt) /* transformation not required. */
4294 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4295 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4299 if (vect_print_dump_info (REPORT_DETAILS))
4300 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4304 if (STMT_VINFO_GATHER_P (stmt_info))
4306 tree vec_oprnd0 = NULL_TREE, op;
4307 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4308 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4309 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4310 edge pe = loop_preheader_edge (loop);
4313 enum { NARROW, NONE, WIDEN } modifier;
4314 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4316 if (nunits == gather_off_nunits)
4318 else if (nunits == gather_off_nunits / 2)
4320 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4323 for (i = 0; i < gather_off_nunits; ++i)
4324 sel[i] = i | nunits;
4326 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4327 gcc_assert (perm_mask != NULL_TREE);
4329 else if (nunits == gather_off_nunits * 2)
4331 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4334 for (i = 0; i < nunits; ++i)
4335 sel[i] = i < gather_off_nunits
4336 ? i : i + nunits - gather_off_nunits;
4338 perm_mask = vect_gen_perm_mask (vectype, sel);
4339 gcc_assert (perm_mask != NULL_TREE);
4345 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4346 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4347 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4348 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4349 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4350 scaletype = TREE_VALUE (arglist);
4351 gcc_checking_assert (types_compatible_p (srctype, rettype)
4352 && types_compatible_p (srctype, masktype));
4354 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4356 ptr = fold_convert (ptrtype, gather_base);
4357 if (!is_gimple_min_invariant (ptr))
4359 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4360 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4361 gcc_assert (!new_bb);
4364 /* Currently we support only unconditional gather loads,
4365 so mask should be all ones. */
4366 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4367 mask = build_int_cst (TREE_TYPE (masktype), -1);
4368 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4372 for (j = 0; j < 6; ++j)
4374 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4375 mask = build_real (TREE_TYPE (masktype), r);
4379 mask = build_vector_from_val (masktype, mask);
4380 mask = vect_init_vector (stmt, mask, masktype, NULL);
4382 scale = build_int_cst (scaletype, gather_scale);
4384 prev_stmt_info = NULL;
4385 for (j = 0; j < ncopies; ++j)
4387 if (modifier == WIDEN && (j & 1))
4388 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4389 perm_mask, stmt, gsi);
4392 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4395 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4397 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4399 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4400 == TYPE_VECTOR_SUBPARTS (idxtype));
4401 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4402 add_referenced_var (var);
4403 var = make_ssa_name (var, NULL);
4404 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4406 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4408 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4413 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4415 if (!useless_type_conversion_p (vectype, rettype))
4417 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4418 == TYPE_VECTOR_SUBPARTS (rettype));
4419 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4420 add_referenced_var (var);
4421 op = make_ssa_name (var, new_stmt);
4422 gimple_call_set_lhs (new_stmt, op);
4423 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4424 var = make_ssa_name (vec_dest, NULL);
4425 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4427 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4432 var = make_ssa_name (vec_dest, new_stmt);
4433 gimple_call_set_lhs (new_stmt, var);
4436 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4438 if (modifier == NARROW)
4445 var = permute_vec_elements (prev_res, var,
4446 perm_mask, stmt, gsi);
4447 new_stmt = SSA_NAME_DEF_STMT (var);
4450 if (prev_stmt_info == NULL)
4451 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4453 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4454 prev_stmt_info = vinfo_for_stmt (new_stmt);
4461 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4463 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4464 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4465 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4467 /* Check if the chain of loads is already vectorized. */
4468 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4470 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4473 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4474 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4476 /* VEC_NUM is the number of vect stmts to be created for this group. */
4479 strided_load = false;
4480 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4481 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4485 vec_num = group_size;
4491 group_size = vec_num = 1;
4494 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4495 gcc_assert (alignment_support_scheme);
4496 /* Targets with load-lane instructions must not require explicit
4498 gcc_assert (!load_lanes_p
4499 || alignment_support_scheme == dr_aligned
4500 || alignment_support_scheme == dr_unaligned_supported);
4502 /* In case the vectorization factor (VF) is bigger than the number
4503 of elements that we can fit in a vectype (nunits), we have to generate
4504 more than one vector stmt - i.e - we need to "unroll" the
4505 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4506 from one copy of the vector stmt to the next, in the field
4507 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4508 stages to find the correct vector defs to be used when vectorizing
4509 stmts that use the defs of the current stmt. The example below
4510 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4511 need to create 4 vectorized stmts):
4513 before vectorization:
4514 RELATED_STMT VEC_STMT
4518 step 1: vectorize stmt S1:
4519 We first create the vector stmt VS1_0, and, as usual, record a
4520 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4521 Next, we create the vector stmt VS1_1, and record a pointer to
4522 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4523 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4525 RELATED_STMT VEC_STMT
4526 VS1_0: vx0 = memref0 VS1_1 -
4527 VS1_1: vx1 = memref1 VS1_2 -
4528 VS1_2: vx2 = memref2 VS1_3 -
4529 VS1_3: vx3 = memref3 - -
4530 S1: x = load - VS1_0
4533 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4534 information we recorded in RELATED_STMT field is used to vectorize
4537 /* In case of interleaving (non-unit strided access):
4544 Vectorized loads are created in the order of memory accesses
4545 starting from the access of the first stmt of the chain:
4548 VS2: vx1 = &base + vec_size*1
4549 VS3: vx3 = &base + vec_size*2
4550 VS4: vx4 = &base + vec_size*3
4552 Then permutation statements are generated:
4554 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4555 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4558 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4559 (the order of the data-refs in the output of vect_permute_load_chain
4560 corresponds to the order of scalar stmts in the interleaving chain - see
4561 the documentation of vect_permute_load_chain()).
4562 The generation of permutation stmts and recording them in
4563 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4565 In case of both multiple types and interleaving, the vector loads and
4566 permutation stmts above are created for every copy. The result vector
4567 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4568 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4570 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4571 on a target that supports unaligned accesses (dr_unaligned_supported)
4572 we generate the following code:
4576 p = p + indx * vectype_size;
4581 Otherwise, the data reference is potentially unaligned on a target that
4582 does not support unaligned accesses (dr_explicit_realign_optimized) -
4583 then generate the following code, in which the data in each iteration is
4584 obtained by two vector loads, one from the previous iteration, and one
4585 from the current iteration:
4587 msq_init = *(floor(p1))
4588 p2 = initial_addr + VS - 1;
4589 realignment_token = call target_builtin;
4592 p2 = p2 + indx * vectype_size
4594 vec_dest = realign_load (msq, lsq, realignment_token)
4599 /* If the misalignment remains the same throughout the execution of the
4600 loop, we can create the init_addr and permutation mask at the loop
4601 preheader. Otherwise, it needs to be created inside the loop.
4602 This can only occur when vectorizing memory accesses in the inner-loop
4603 nested within an outer-loop that is being vectorized. */
4605 if (loop && nested_in_vect_loop_p (loop, stmt)
4606 && (TREE_INT_CST_LOW (DR_STEP (dr))
4607 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4609 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4610 compute_in_loop = true;
4613 if ((alignment_support_scheme == dr_explicit_realign_optimized
4614 || alignment_support_scheme == dr_explicit_realign)
4615 && !compute_in_loop)
4617 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4618 alignment_support_scheme, NULL_TREE,
4620 if (alignment_support_scheme == dr_explicit_realign_optimized)
4622 phi = SSA_NAME_DEF_STMT (msq);
4623 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4630 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4633 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4635 aggr_type = vectype;
4637 prev_stmt_info = NULL;
4638 for (j = 0; j < ncopies; j++)
4640 /* 1. Create the vector or array pointer update chain. */
4642 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4643 offset, &dummy, gsi,
4644 &ptr_incr, false, &inv_p);
4646 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4647 TYPE_SIZE_UNIT (aggr_type));
4649 if (strided_load || slp_perm)
4650 dr_chain = VEC_alloc (tree, heap, vec_num);
4656 vec_array = create_vector_array (vectype, vec_num);
4659 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4660 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4661 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4662 gimple_call_set_lhs (new_stmt, vec_array);
4663 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4664 mark_symbols_for_renaming (new_stmt);
4666 /* Extract each vector into an SSA_NAME. */
4667 for (i = 0; i < vec_num; i++)
4669 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4671 VEC_quick_push (tree, dr_chain, new_temp);
4674 /* Record the mapping between SSA_NAMEs and statements. */
4675 vect_record_strided_load_vectors (stmt, dr_chain);
4679 for (i = 0; i < vec_num; i++)
4682 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4685 /* 2. Create the vector-load in the loop. */
4686 switch (alignment_support_scheme)
4689 case dr_unaligned_supported:
4691 struct ptr_info_def *pi;
4693 = build2 (MEM_REF, vectype, dataref_ptr,
4694 build_int_cst (reference_alias_ptr_type
4695 (DR_REF (first_dr)), 0));
4696 pi = get_ptr_info (dataref_ptr);
4697 pi->align = TYPE_ALIGN_UNIT (vectype);
4698 if (alignment_support_scheme == dr_aligned)
4700 gcc_assert (aligned_access_p (first_dr));
4703 else if (DR_MISALIGNMENT (first_dr) == -1)
4705 TREE_TYPE (data_ref)
4706 = build_aligned_type (TREE_TYPE (data_ref),
4707 TYPE_ALIGN (elem_type));
4708 pi->align = TYPE_ALIGN_UNIT (elem_type);
4713 TREE_TYPE (data_ref)
4714 = build_aligned_type (TREE_TYPE (data_ref),
4715 TYPE_ALIGN (elem_type));
4716 pi->misalign = DR_MISALIGNMENT (first_dr);
4720 case dr_explicit_realign:
4725 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4727 if (compute_in_loop)
4728 msq = vect_setup_realignment (first_stmt, gsi,
4730 dr_explicit_realign,
4733 new_stmt = gimple_build_assign_with_ops
4734 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4736 (TREE_TYPE (dataref_ptr),
4737 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4738 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4739 gimple_assign_set_lhs (new_stmt, ptr);
4740 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4742 = build2 (MEM_REF, vectype, ptr,
4743 build_int_cst (reference_alias_ptr_type
4744 (DR_REF (first_dr)), 0));
4745 vec_dest = vect_create_destination_var (scalar_dest,
4747 new_stmt = gimple_build_assign (vec_dest, data_ref);
4748 new_temp = make_ssa_name (vec_dest, new_stmt);
4749 gimple_assign_set_lhs (new_stmt, new_temp);
4750 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4751 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4752 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4755 bump = size_binop (MULT_EXPR, vs_minus_1,
4756 TYPE_SIZE_UNIT (elem_type));
4757 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4758 new_stmt = gimple_build_assign_with_ops
4759 (BIT_AND_EXPR, NULL_TREE, ptr,
4762 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4763 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4764 gimple_assign_set_lhs (new_stmt, ptr);
4765 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4767 = build2 (MEM_REF, vectype, ptr,
4768 build_int_cst (reference_alias_ptr_type
4769 (DR_REF (first_dr)), 0));
4772 case dr_explicit_realign_optimized:
4773 new_stmt = gimple_build_assign_with_ops
4774 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4776 (TREE_TYPE (dataref_ptr),
4777 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4778 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4780 gimple_assign_set_lhs (new_stmt, new_temp);
4781 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4783 = build2 (MEM_REF, vectype, new_temp,
4784 build_int_cst (reference_alias_ptr_type
4785 (DR_REF (first_dr)), 0));
4790 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4791 new_stmt = gimple_build_assign (vec_dest, data_ref);
4792 new_temp = make_ssa_name (vec_dest, new_stmt);
4793 gimple_assign_set_lhs (new_stmt, new_temp);
4794 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4795 mark_symbols_for_renaming (new_stmt);
4797 /* 3. Handle explicit realignment if necessary/supported.
4799 vec_dest = realign_load (msq, lsq, realignment_token) */
4800 if (alignment_support_scheme == dr_explicit_realign_optimized
4801 || alignment_support_scheme == dr_explicit_realign)
4803 lsq = gimple_assign_lhs (new_stmt);
4804 if (!realignment_token)
4805 realignment_token = dataref_ptr;
4806 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4808 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4811 new_temp = make_ssa_name (vec_dest, new_stmt);
4812 gimple_assign_set_lhs (new_stmt, new_temp);
4813 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4815 if (alignment_support_scheme == dr_explicit_realign_optimized)
4818 if (i == vec_num - 1 && j == ncopies - 1)
4819 add_phi_arg (phi, lsq,
4820 loop_latch_edge (containing_loop),
4826 /* 4. Handle invariant-load. */
4827 if (inv_p && !bb_vinfo)
4830 gimple_stmt_iterator gsi2 = *gsi;
4831 gcc_assert (!strided_load);
4834 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4837 tem = fold_convert (TREE_TYPE (vectype), tem);
4838 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4842 vec_inv = build_vector_from_val (vectype, tem);
4843 new_temp = vect_init_vector (stmt, vec_inv,
4845 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4850 tree perm_mask = perm_mask_for_reverse (vectype);
4851 new_temp = permute_vec_elements (new_temp, new_temp,
4852 perm_mask, stmt, gsi);
4853 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4856 /* Collect vector loads and later create their permutation in
4857 vect_transform_strided_load (). */
4858 if (strided_load || slp_perm)
4859 VEC_quick_push (tree, dr_chain, new_temp);
4861 /* Store vector loads in the corresponding SLP_NODE. */
4862 if (slp && !slp_perm)
4863 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4868 if (slp && !slp_perm)
4873 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4874 slp_node_instance, false))
4876 VEC_free (tree, heap, dr_chain);
4885 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4886 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4891 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4893 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4894 prev_stmt_info = vinfo_for_stmt (new_stmt);
4898 VEC_free (tree, heap, dr_chain);
4904 /* Function vect_is_simple_cond.
4907 LOOP - the loop that is being vectorized.
4908 COND - Condition that is checked for simple use.
4911 *COMP_VECTYPE - the vector type for the comparison.
4913 Returns whether a COND can be vectorized. Checks whether
4914 condition operands are supportable using vec_is_simple_use. */
4917 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
4922 enum vect_def_type dt;
4923 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4925 if (!COMPARISON_CLASS_P (cond))
4928 lhs = TREE_OPERAND (cond, 0);
4929 rhs = TREE_OPERAND (cond, 1);
4931 if (TREE_CODE (lhs) == SSA_NAME)
4933 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4934 if (!vect_is_simple_use_1 (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
4938 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4939 && TREE_CODE (lhs) != FIXED_CST)
4942 if (TREE_CODE (rhs) == SSA_NAME)
4944 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4945 if (!vect_is_simple_use_1 (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
4949 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4950 && TREE_CODE (rhs) != FIXED_CST)
4953 *comp_vectype = vectype1 ? vectype1 : vectype2;
4957 /* vectorizable_condition.
4959 Check if STMT is conditional modify expression that can be vectorized.
4960 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4961 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4964 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4965 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4966 else caluse if it is 2).
4968 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4971 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4972 gimple *vec_stmt, tree reduc_def, int reduc_index,
4975 tree scalar_dest = NULL_TREE;
4976 tree vec_dest = NULL_TREE;
4977 tree cond_expr, then_clause, else_clause;
4978 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4979 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4980 tree comp_vectype = NULL_TREE;
4981 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4982 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4983 tree vec_compare, vec_cond_expr;
4985 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4987 enum vect_def_type dt, dts[4];
4988 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4990 enum tree_code code;
4991 stmt_vec_info prev_stmt_info = NULL;
4993 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4994 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4995 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
4997 if (slp_node || PURE_SLP_STMT (stmt_info))
5000 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5002 gcc_assert (ncopies >= 1);
5003 if (reduc_index && ncopies > 1)
5004 return false; /* FORNOW */
5006 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5009 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5012 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5013 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5017 /* FORNOW: not yet supported. */
5018 if (STMT_VINFO_LIVE_P (stmt_info))
5020 if (vect_print_dump_info (REPORT_DETAILS))
5021 fprintf (vect_dump, "value used after loop.");
5025 /* Is vectorizable conditional operation? */
5026 if (!is_gimple_assign (stmt))
5029 code = gimple_assign_rhs_code (stmt);
5031 if (code != COND_EXPR)
5034 cond_expr = gimple_assign_rhs1 (stmt);
5035 then_clause = gimple_assign_rhs2 (stmt);
5036 else_clause = gimple_assign_rhs3 (stmt);
5038 if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo, &comp_vectype)
5042 if (TREE_CODE (then_clause) == SSA_NAME)
5044 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5045 if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
5046 &then_def_stmt, &def, &dt))
5049 else if (TREE_CODE (then_clause) != INTEGER_CST
5050 && TREE_CODE (then_clause) != REAL_CST
5051 && TREE_CODE (then_clause) != FIXED_CST)
5054 if (TREE_CODE (else_clause) == SSA_NAME)
5056 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5057 if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
5058 &else_def_stmt, &def, &dt))
5061 else if (TREE_CODE (else_clause) != INTEGER_CST
5062 && TREE_CODE (else_clause) != REAL_CST
5063 && TREE_CODE (else_clause) != FIXED_CST)
5068 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5069 return expand_vec_cond_expr_p (vectype, comp_vectype);
5076 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5077 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5078 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5079 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5083 scalar_dest = gimple_assign_lhs (stmt);
5084 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5086 /* Handle cond expr. */
5087 for (j = 0; j < ncopies; j++)
5089 gimple new_stmt = NULL;
5094 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5095 VEC (slp_void_p, heap) *vec_defs;
5097 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5098 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5099 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5100 VEC_safe_push (tree, heap, ops, then_clause);
5101 VEC_safe_push (tree, heap, ops, else_clause);
5102 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5103 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5104 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5105 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5106 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5108 VEC_free (tree, heap, ops);
5109 VEC_free (slp_void_p, heap, vec_defs);
5115 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5117 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
5118 NULL, >emp, &def, &dts[0]);
5121 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5123 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5124 NULL, >emp, &def, &dts[1]);
5125 if (reduc_index == 1)
5126 vec_then_clause = reduc_def;
5129 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5131 vect_is_simple_use (then_clause, loop_vinfo,
5132 NULL, >emp, &def, &dts[2]);
5134 if (reduc_index == 2)
5135 vec_else_clause = reduc_def;
5138 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5140 vect_is_simple_use (else_clause, loop_vinfo,
5141 NULL, >emp, &def, &dts[3]);
5147 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5148 VEC_pop (tree, vec_oprnds0));
5149 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5150 VEC_pop (tree, vec_oprnds1));
5151 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5152 VEC_pop (tree, vec_oprnds2));
5153 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5154 VEC_pop (tree, vec_oprnds3));
5159 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5160 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5161 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5162 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5165 /* Arguments are ready. Create the new vector stmt. */
5166 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5168 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5169 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5170 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5172 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5173 vec_cond_lhs, vec_cond_rhs);
5174 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5175 vec_compare, vec_then_clause, vec_else_clause);
5177 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5178 new_temp = make_ssa_name (vec_dest, new_stmt);
5179 gimple_assign_set_lhs (new_stmt, new_temp);
5180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5182 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5189 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5191 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5193 prev_stmt_info = vinfo_for_stmt (new_stmt);
5196 VEC_free (tree, heap, vec_oprnds0);
5197 VEC_free (tree, heap, vec_oprnds1);
5198 VEC_free (tree, heap, vec_oprnds2);
5199 VEC_free (tree, heap, vec_oprnds3);
5205 /* Make sure the statement is vectorizable. */
5208 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5211 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5212 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5214 tree scalar_type, vectype;
5215 gimple pattern_stmt;
5216 gimple_seq pattern_def_seq;
5218 if (vect_print_dump_info (REPORT_DETAILS))
5220 fprintf (vect_dump, "==> examining statement: ");
5221 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5224 if (gimple_has_volatile_ops (stmt))
5226 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5227 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5232 /* Skip stmts that do not need to be vectorized. In loops this is expected
5234 - the COND_EXPR which is the loop exit condition
5235 - any LABEL_EXPRs in the loop
5236 - computations that are used only for array indexing or loop control.
5237 In basic blocks we only analyze statements that are a part of some SLP
5238 instance, therefore, all the statements are relevant.
5240 Pattern statement needs to be analyzed instead of the original statement
5241 if the original statement is not relevant. Otherwise, we analyze both
5244 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5245 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5246 && !STMT_VINFO_LIVE_P (stmt_info))
5248 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5250 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5251 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5253 /* Analyze PATTERN_STMT instead of the original stmt. */
5254 stmt = pattern_stmt;
5255 stmt_info = vinfo_for_stmt (pattern_stmt);
5256 if (vect_print_dump_info (REPORT_DETAILS))
5258 fprintf (vect_dump, "==> examining pattern statement: ");
5259 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5264 if (vect_print_dump_info (REPORT_DETAILS))
5265 fprintf (vect_dump, "irrelevant.");
5270 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5272 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5273 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5275 /* Analyze PATTERN_STMT too. */
5276 if (vect_print_dump_info (REPORT_DETAILS))
5278 fprintf (vect_dump, "==> examining pattern statement: ");
5279 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5282 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5286 if (is_pattern_stmt_p (stmt_info)
5287 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5289 gimple_stmt_iterator si;
5291 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5293 gimple pattern_def_stmt = gsi_stmt (si);
5294 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5295 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5297 /* Analyze def stmt of STMT if it's a pattern stmt. */
5298 if (vect_print_dump_info (REPORT_DETAILS))
5300 fprintf (vect_dump, "==> examining pattern def statement: ");
5301 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5304 if (!vect_analyze_stmt (pattern_def_stmt,
5305 need_to_vectorize, node))
5311 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5313 case vect_internal_def:
5316 case vect_reduction_def:
5317 case vect_nested_cycle:
5318 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5319 || relevance == vect_used_in_outer_by_reduction
5320 || relevance == vect_unused_in_scope));
5323 case vect_induction_def:
5324 case vect_constant_def:
5325 case vect_external_def:
5326 case vect_unknown_def_type:
5333 gcc_assert (PURE_SLP_STMT (stmt_info));
5335 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5336 if (vect_print_dump_info (REPORT_DETAILS))
5338 fprintf (vect_dump, "get vectype for scalar type: ");
5339 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5342 vectype = get_vectype_for_scalar_type (scalar_type);
5345 if (vect_print_dump_info (REPORT_DETAILS))
5347 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5348 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5353 if (vect_print_dump_info (REPORT_DETAILS))
5355 fprintf (vect_dump, "vectype: ");
5356 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5359 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5362 if (STMT_VINFO_RELEVANT_P (stmt_info))
5364 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5365 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5366 *need_to_vectorize = true;
5371 && (STMT_VINFO_RELEVANT_P (stmt_info)
5372 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5373 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5374 || vectorizable_shift (stmt, NULL, NULL, NULL)
5375 || vectorizable_operation (stmt, NULL, NULL, NULL)
5376 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5377 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5378 || vectorizable_call (stmt, NULL, NULL, NULL)
5379 || vectorizable_store (stmt, NULL, NULL, NULL)
5380 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5381 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5385 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5386 || vectorizable_shift (stmt, NULL, NULL, node)
5387 || vectorizable_operation (stmt, NULL, NULL, node)
5388 || vectorizable_assignment (stmt, NULL, NULL, node)
5389 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5390 || vectorizable_call (stmt, NULL, NULL, node)
5391 || vectorizable_store (stmt, NULL, NULL, node)
5392 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5397 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5399 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5400 fprintf (vect_dump, "supported: ");
5401 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5410 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5411 need extra handling, except for vectorizable reductions. */
5412 if (STMT_VINFO_LIVE_P (stmt_info)
5413 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5414 ok = vectorizable_live_operation (stmt, NULL, NULL);
5418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5420 fprintf (vect_dump, "not vectorized: live stmt not ");
5421 fprintf (vect_dump, "supported: ");
5422 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5432 /* Function vect_transform_stmt.
5434 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5437 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5438 bool *strided_store, slp_tree slp_node,
5439 slp_instance slp_node_instance)
5441 bool is_store = false;
5442 gimple vec_stmt = NULL;
5443 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5446 switch (STMT_VINFO_TYPE (stmt_info))
5448 case type_demotion_vec_info_type:
5449 case type_promotion_vec_info_type:
5450 case type_conversion_vec_info_type:
5451 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5455 case induc_vec_info_type:
5456 gcc_assert (!slp_node);
5457 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5461 case shift_vec_info_type:
5462 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5466 case op_vec_info_type:
5467 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5471 case assignment_vec_info_type:
5472 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5476 case load_vec_info_type:
5477 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5482 case store_vec_info_type:
5483 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5485 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5487 /* In case of interleaving, the whole chain is vectorized when the
5488 last store in the chain is reached. Store stmts before the last
5489 one are skipped, and there vec_stmt_info shouldn't be freed
5491 *strided_store = true;
5492 if (STMT_VINFO_VEC_STMT (stmt_info))
5499 case condition_vec_info_type:
5500 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5504 case call_vec_info_type:
5505 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5506 stmt = gsi_stmt (*gsi);
5509 case reduc_vec_info_type:
5510 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5515 if (!STMT_VINFO_LIVE_P (stmt_info))
5517 if (vect_print_dump_info (REPORT_DETAILS))
5518 fprintf (vect_dump, "stmt not supported.");
5523 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5524 is being vectorized, but outside the immediately enclosing loop. */
5526 && STMT_VINFO_LOOP_VINFO (stmt_info)
5527 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5528 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5529 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5530 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5531 || STMT_VINFO_RELEVANT (stmt_info) ==
5532 vect_used_in_outer_by_reduction))
5534 struct loop *innerloop = LOOP_VINFO_LOOP (
5535 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5536 imm_use_iterator imm_iter;
5537 use_operand_p use_p;
5541 if (vect_print_dump_info (REPORT_DETAILS))
5542 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5544 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5545 (to be used when vectorizing outer-loop stmts that use the DEF of
5547 if (gimple_code (stmt) == GIMPLE_PHI)
5548 scalar_dest = PHI_RESULT (stmt);
5550 scalar_dest = gimple_assign_lhs (stmt);
5552 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5554 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5556 exit_phi = USE_STMT (use_p);
5557 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5562 /* Handle stmts whose DEF is used outside the loop-nest that is
5563 being vectorized. */
5564 if (STMT_VINFO_LIVE_P (stmt_info)
5565 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5567 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5572 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5578 /* Remove a group of stores (for SLP or interleaving), free their
5582 vect_remove_stores (gimple first_stmt)
5584 gimple next = first_stmt;
5586 gimple_stmt_iterator next_si;
5590 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5592 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5593 if (is_pattern_stmt_p (stmt_info))
5594 next = STMT_VINFO_RELATED_STMT (stmt_info);
5595 /* Free the attached stmt_vec_info and remove the stmt. */
5596 next_si = gsi_for_stmt (next);
5597 gsi_remove (&next_si, true);
5598 free_stmt_vec_info (next);
5604 /* Function new_stmt_vec_info.
5606 Create and initialize a new stmt_vec_info struct for STMT. */
5609 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5610 bb_vec_info bb_vinfo)
5613 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5615 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5616 STMT_VINFO_STMT (res) = stmt;
5617 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5618 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5619 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5620 STMT_VINFO_LIVE_P (res) = false;
5621 STMT_VINFO_VECTYPE (res) = NULL;
5622 STMT_VINFO_VEC_STMT (res) = NULL;
5623 STMT_VINFO_VECTORIZABLE (res) = true;
5624 STMT_VINFO_IN_PATTERN_P (res) = false;
5625 STMT_VINFO_RELATED_STMT (res) = NULL;
5626 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5627 STMT_VINFO_DATA_REF (res) = NULL;
5629 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5630 STMT_VINFO_DR_OFFSET (res) = NULL;
5631 STMT_VINFO_DR_INIT (res) = NULL;
5632 STMT_VINFO_DR_STEP (res) = NULL;
5633 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5635 if (gimple_code (stmt) == GIMPLE_PHI
5636 && is_loop_header_bb_p (gimple_bb (stmt)))
5637 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5639 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5641 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5642 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5643 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5644 STMT_SLP_TYPE (res) = loop_vect;
5645 GROUP_FIRST_ELEMENT (res) = NULL;
5646 GROUP_NEXT_ELEMENT (res) = NULL;
5647 GROUP_SIZE (res) = 0;
5648 GROUP_STORE_COUNT (res) = 0;
5649 GROUP_GAP (res) = 0;
5650 GROUP_SAME_DR_STMT (res) = NULL;
5651 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5657 /* Create a hash table for stmt_vec_info. */
5660 init_stmt_vec_info_vec (void)
5662 gcc_assert (!stmt_vec_info_vec);
5663 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5667 /* Free hash table for stmt_vec_info. */
5670 free_stmt_vec_info_vec (void)
5672 gcc_assert (stmt_vec_info_vec);
5673 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5677 /* Free stmt vectorization related info. */
5680 free_stmt_vec_info (gimple stmt)
5682 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5687 /* Check if this statement has a related "pattern stmt"
5688 (introduced by the vectorizer during the pattern recognition
5689 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5691 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5693 stmt_vec_info patt_info
5694 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5697 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5700 gimple_stmt_iterator si;
5701 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5702 free_stmt_vec_info (gsi_stmt (si));
5704 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5708 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5709 set_vinfo_for_stmt (stmt, NULL);
5714 /* Function get_vectype_for_scalar_type_and_size.
5716 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5720 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5722 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5723 enum machine_mode simd_mode;
5724 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5731 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5732 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5735 /* We can't build a vector type of elements with alignment bigger than
5737 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5740 /* For vector types of elements whose mode precision doesn't
5741 match their types precision we use a element type of mode
5742 precision. The vectorization routines will have to make sure
5743 they support the proper result truncation/extension.
5744 We also make sure to build vector types with INTEGER_TYPE
5745 component type only. */
5746 if (INTEGRAL_TYPE_P (scalar_type)
5747 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5748 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5749 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5750 TYPE_UNSIGNED (scalar_type));
5752 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5753 When the component mode passes the above test simply use a type
5754 corresponding to that mode. The theory is that any use that
5755 would cause problems with this will disable vectorization anyway. */
5756 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5757 && !INTEGRAL_TYPE_P (scalar_type)
5758 && !POINTER_TYPE_P (scalar_type))
5759 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5761 /* If no size was supplied use the mode the target prefers. Otherwise
5762 lookup a vector mode of the specified size. */
5764 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5766 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5767 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5771 vectype = build_vector_type (scalar_type, nunits);
5772 if (vect_print_dump_info (REPORT_DETAILS))
5774 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5775 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5781 if (vect_print_dump_info (REPORT_DETAILS))
5783 fprintf (vect_dump, "vectype: ");
5784 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5787 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5788 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5790 if (vect_print_dump_info (REPORT_DETAILS))
5791 fprintf (vect_dump, "mode not supported by target.");
5798 unsigned int current_vector_size;
5800 /* Function get_vectype_for_scalar_type.
5802 Returns the vector type corresponding to SCALAR_TYPE as supported
5806 get_vectype_for_scalar_type (tree scalar_type)
5809 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5810 current_vector_size);
5812 && current_vector_size == 0)
5813 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5817 /* Function get_same_sized_vectype
5819 Returns a vector type corresponding to SCALAR_TYPE of size
5820 VECTOR_TYPE if supported by the target. */
5823 get_same_sized_vectype (tree scalar_type, tree vector_type)
5825 return get_vectype_for_scalar_type_and_size
5826 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5829 /* Function vect_is_simple_use.
5832 LOOP_VINFO - the vect info of the loop that is being vectorized.
5833 BB_VINFO - the vect info of the basic block that is being vectorized.
5834 OPERAND - operand of a stmt in the loop or bb.
5835 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5837 Returns whether a stmt with OPERAND can be vectorized.
5838 For loops, supportable operands are constants, loop invariants, and operands
5839 that are defined by the current iteration of the loop. Unsupportable
5840 operands are those that are defined by a previous iteration of the loop (as
5841 is the case in reduction/induction computations).
5842 For basic blocks, supportable operands are constants and bb invariants.
5843 For now, operands defined outside the basic block are not supported. */
5846 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5847 bb_vec_info bb_vinfo, gimple *def_stmt,
5848 tree *def, enum vect_def_type *dt)
5851 stmt_vec_info stmt_vinfo;
5852 struct loop *loop = NULL;
5855 loop = LOOP_VINFO_LOOP (loop_vinfo);
5860 if (vect_print_dump_info (REPORT_DETAILS))
5862 fprintf (vect_dump, "vect_is_simple_use: operand ");
5863 print_generic_expr (vect_dump, operand, TDF_SLIM);
5866 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5868 *dt = vect_constant_def;
5872 if (is_gimple_min_invariant (operand))
5875 *dt = vect_external_def;
5879 if (TREE_CODE (operand) == PAREN_EXPR)
5881 if (vect_print_dump_info (REPORT_DETAILS))
5882 fprintf (vect_dump, "non-associatable copy.");
5883 operand = TREE_OPERAND (operand, 0);
5886 if (TREE_CODE (operand) != SSA_NAME)
5888 if (vect_print_dump_info (REPORT_DETAILS))
5889 fprintf (vect_dump, "not ssa-name.");
5893 *def_stmt = SSA_NAME_DEF_STMT (operand);
5894 if (*def_stmt == NULL)
5896 if (vect_print_dump_info (REPORT_DETAILS))
5897 fprintf (vect_dump, "no def_stmt.");
5901 if (vect_print_dump_info (REPORT_DETAILS))
5903 fprintf (vect_dump, "def_stmt: ");
5904 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5907 /* Empty stmt is expected only in case of a function argument.
5908 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5909 if (gimple_nop_p (*def_stmt))
5912 *dt = vect_external_def;
5916 bb = gimple_bb (*def_stmt);
5918 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5919 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5920 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5921 *dt = vect_external_def;
5924 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5925 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5928 if (*dt == vect_unknown_def_type)
5930 if (vect_print_dump_info (REPORT_DETAILS))
5931 fprintf (vect_dump, "Unsupported pattern.");
5935 if (vect_print_dump_info (REPORT_DETAILS))
5936 fprintf (vect_dump, "type of def: %d.",*dt);
5938 switch (gimple_code (*def_stmt))
5941 *def = gimple_phi_result (*def_stmt);
5945 *def = gimple_assign_lhs (*def_stmt);
5949 *def = gimple_call_lhs (*def_stmt);
5954 if (vect_print_dump_info (REPORT_DETAILS))
5955 fprintf (vect_dump, "unsupported defining stmt: ");
5962 /* Function vect_is_simple_use_1.
5964 Same as vect_is_simple_use_1 but also determines the vector operand
5965 type of OPERAND and stores it to *VECTYPE. If the definition of
5966 OPERAND is vect_uninitialized_def, vect_constant_def or
5967 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5968 is responsible to compute the best suited vector type for the
5972 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5973 bb_vec_info bb_vinfo, gimple *def_stmt,
5974 tree *def, enum vect_def_type *dt, tree *vectype)
5976 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5979 /* Now get a vector type if the def is internal, otherwise supply
5980 NULL_TREE and leave it up to the caller to figure out a proper
5981 type for the use stmt. */
5982 if (*dt == vect_internal_def
5983 || *dt == vect_induction_def
5984 || *dt == vect_reduction_def
5985 || *dt == vect_double_reduction_def
5986 || *dt == vect_nested_cycle)
5988 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5990 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5991 && !STMT_VINFO_RELEVANT (stmt_info)
5992 && !STMT_VINFO_LIVE_P (stmt_info))
5993 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5995 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5996 gcc_assert (*vectype != NULL_TREE);
5998 else if (*dt == vect_uninitialized_def
5999 || *dt == vect_constant_def
6000 || *dt == vect_external_def)
6001 *vectype = NULL_TREE;
6009 /* Function supportable_widening_operation
6011 Check whether an operation represented by the code CODE is a
6012 widening operation that is supported by the target platform in
6013 vector form (i.e., when operating on arguments of type VECTYPE_IN
6014 producing a result of type VECTYPE_OUT).
6016 Widening operations we currently support are NOP (CONVERT), FLOAT
6017 and WIDEN_MULT. This function checks if these operations are supported
6018 by the target platform either directly (via vector tree-codes), or via
6022 - CODE1 and CODE2 are codes of vector operations to be used when
6023 vectorizing the operation, if available.
6024 - DECL1 and DECL2 are decls of target builtin functions to be used
6025 when vectorizing the operation, if available. In this case,
6026 CODE1 and CODE2 are CALL_EXPR.
6027 - MULTI_STEP_CVT determines the number of required intermediate steps in
6028 case of multi-step conversion (like char->short->int - in that case
6029 MULTI_STEP_CVT will be 1).
6030 - INTERM_TYPES contains the intermediate type required to perform the
6031 widening operation (short in the above example). */
6034 supportable_widening_operation (enum tree_code code, gimple stmt,
6035 tree vectype_out, tree vectype_in,
6036 tree *decl1, tree *decl2,
6037 enum tree_code *code1, enum tree_code *code2,
6038 int *multi_step_cvt,
6039 VEC (tree, heap) **interm_types)
6041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6042 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6043 struct loop *vect_loop = NULL;
6045 enum machine_mode vec_mode;
6046 enum insn_code icode1, icode2;
6047 optab optab1, optab2;
6048 tree vectype = vectype_in;
6049 tree wide_vectype = vectype_out;
6050 enum tree_code c1, c2;
6052 tree prev_type, intermediate_type;
6053 enum machine_mode intermediate_mode, prev_mode;
6054 optab optab3, optab4;
6056 *multi_step_cvt = 0;
6058 vect_loop = LOOP_VINFO_LOOP (loop_info);
6060 /* The result of a vectorized widening operation usually requires two vectors
6061 (because the widened results do not fit into one vector). The generated
6062 vector results would normally be expected to be generated in the same
6063 order as in the original scalar computation, i.e. if 8 results are
6064 generated in each vector iteration, they are to be organized as follows:
6065 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6067 However, in the special case that the result of the widening operation is
6068 used in a reduction computation only, the order doesn't matter (because
6069 when vectorizing a reduction we change the order of the computation).
6070 Some targets can take advantage of this and generate more efficient code.
6071 For example, targets like Altivec, that support widen_mult using a sequence
6072 of {mult_even,mult_odd} generate the following vectors:
6073 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6075 When vectorizing outer-loops, we execute the inner-loop sequentially
6076 (each vectorized inner-loop iteration contributes to VF outer-loop
6077 iterations in parallel). We therefore don't allow to change the order
6078 of the computation in the inner-loop during outer-loop vectorization. */
6081 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6082 && !nested_in_vect_loop_p (vect_loop, stmt))
6088 && code == WIDEN_MULT_EXPR
6089 && targetm.vectorize.builtin_mul_widen_even
6090 && targetm.vectorize.builtin_mul_widen_even (vectype)
6091 && targetm.vectorize.builtin_mul_widen_odd
6092 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6094 if (vect_print_dump_info (REPORT_DETAILS))
6095 fprintf (vect_dump, "Unordered widening operation detected.");
6097 *code1 = *code2 = CALL_EXPR;
6098 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6099 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6105 case WIDEN_MULT_EXPR:
6106 c1 = VEC_WIDEN_MULT_LO_EXPR;
6107 c2 = VEC_WIDEN_MULT_HI_EXPR;
6110 case WIDEN_LSHIFT_EXPR:
6111 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6112 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6116 c1 = VEC_UNPACK_LO_EXPR;
6117 c2 = VEC_UNPACK_HI_EXPR;
6121 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6122 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6125 case FIX_TRUNC_EXPR:
6126 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6127 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6128 computing the operation. */
6135 if (BYTES_BIG_ENDIAN)
6137 enum tree_code ctmp = c1;
6142 if (code == FIX_TRUNC_EXPR)
6144 /* The signedness is determined from output operand. */
6145 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6146 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6150 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6151 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6154 if (!optab1 || !optab2)
6157 vec_mode = TYPE_MODE (vectype);
6158 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6159 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6165 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6166 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6169 /* Check if it's a multi-step conversion that can be done using intermediate
6172 prev_type = vectype;
6173 prev_mode = vec_mode;
6175 if (!CONVERT_EXPR_CODE_P (code))
6178 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6179 intermediate steps in promotion sequence. We try
6180 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6182 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6183 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6185 intermediate_mode = insn_data[icode1].operand[0].mode;
6187 = lang_hooks.types.type_for_mode (intermediate_mode,
6188 TYPE_UNSIGNED (prev_type));
6189 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6190 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6192 if (!optab3 || !optab4
6193 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6194 || insn_data[icode1].operand[0].mode != intermediate_mode
6195 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6196 || insn_data[icode2].operand[0].mode != intermediate_mode
6197 || ((icode1 = optab_handler (optab3, intermediate_mode))
6198 == CODE_FOR_nothing)
6199 || ((icode2 = optab_handler (optab4, intermediate_mode))
6200 == CODE_FOR_nothing))
6203 VEC_quick_push (tree, *interm_types, intermediate_type);
6204 (*multi_step_cvt)++;
6206 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6207 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6210 prev_type = intermediate_type;
6211 prev_mode = intermediate_mode;
6214 VEC_free (tree, heap, *interm_types);
6219 /* Function supportable_narrowing_operation
6221 Check whether an operation represented by the code CODE is a
6222 narrowing operation that is supported by the target platform in
6223 vector form (i.e., when operating on arguments of type VECTYPE_IN
6224 and producing a result of type VECTYPE_OUT).
6226 Narrowing operations we currently support are NOP (CONVERT) and
6227 FIX_TRUNC. This function checks if these operations are supported by
6228 the target platform directly via vector tree-codes.
6231 - CODE1 is the code of a vector operation to be used when
6232 vectorizing the operation, if available.
6233 - MULTI_STEP_CVT determines the number of required intermediate steps in
6234 case of multi-step conversion (like int->short->char - in that case
6235 MULTI_STEP_CVT will be 1).
6236 - INTERM_TYPES contains the intermediate type required to perform the
6237 narrowing operation (short in the above example). */
6240 supportable_narrowing_operation (enum tree_code code,
6241 tree vectype_out, tree vectype_in,
6242 enum tree_code *code1, int *multi_step_cvt,
6243 VEC (tree, heap) **interm_types)
6245 enum machine_mode vec_mode;
6246 enum insn_code icode1;
6247 optab optab1, interm_optab;
6248 tree vectype = vectype_in;
6249 tree narrow_vectype = vectype_out;
6251 tree intermediate_type;
6252 enum machine_mode intermediate_mode, prev_mode;
6256 *multi_step_cvt = 0;
6260 c1 = VEC_PACK_TRUNC_EXPR;
6263 case FIX_TRUNC_EXPR:
6264 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6268 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6269 tree code and optabs used for computing the operation. */
6276 if (code == FIX_TRUNC_EXPR)
6277 /* The signedness is determined from output operand. */
6278 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6280 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6285 vec_mode = TYPE_MODE (vectype);
6286 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6291 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6294 /* Check if it's a multi-step conversion that can be done using intermediate
6296 prev_mode = vec_mode;
6297 if (code == FIX_TRUNC_EXPR)
6298 uns = TYPE_UNSIGNED (vectype_out);
6300 uns = TYPE_UNSIGNED (vectype);
6302 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6303 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6304 costly than signed. */
6305 if (code == FIX_TRUNC_EXPR && uns)
6307 enum insn_code icode2;
6310 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6312 = optab_for_tree_code (c1, intermediate_type, optab_default);
6313 if (interm_optab != NULL
6314 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6315 && insn_data[icode1].operand[0].mode
6316 == insn_data[icode2].operand[0].mode)
6319 optab1 = interm_optab;
6324 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6325 intermediate steps in promotion sequence. We try
6326 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6327 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6328 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6330 intermediate_mode = insn_data[icode1].operand[0].mode;
6332 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6334 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6337 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6338 || insn_data[icode1].operand[0].mode != intermediate_mode
6339 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6340 == CODE_FOR_nothing))
6343 VEC_quick_push (tree, *interm_types, intermediate_type);
6344 (*multi_step_cvt)++;
6346 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6349 prev_mode = intermediate_mode;
6350 optab1 = interm_optab;
6353 VEC_free (tree, heap, *interm_types);