1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
335 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
339 Generally, LIVE_P and RELEVANT are used to define the liveness and
340 relevance info of the DEF_STMT of this USE:
341 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
342 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
344 - case 1: If USE is used only for address computations (e.g. array indexing),
345 which does not need to be directly vectorized, then the liveness/relevance
346 of the respective DEF_STMT is left unchanged.
347 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
348 skip DEF_STMT cause it had already been processed.
349 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
350 be modified accordingly.
352 Return true if everything is as expected. Return false otherwise. */
355 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
356 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
359 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
360 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
361 stmt_vec_info dstmt_vinfo;
362 basic_block bb, def_bb;
365 enum vect_def_type dt;
367 /* case 1: we are only interested in uses that need to be vectorized. Uses
368 that are used for address computation are not considered relevant. */
369 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
372 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
375 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
379 if (!def_stmt || gimple_nop_p (def_stmt))
382 def_bb = gimple_bb (def_stmt);
383 if (!flow_bb_inside_loop_p (loop, def_bb))
385 if (vect_print_dump_info (REPORT_DETAILS))
386 fprintf (vect_dump, "def_stmt is out of loop.");
390 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
391 DEF_STMT must have already been processed, because this should be the
392 only way that STMT, which is a reduction-phi, was put in the worklist,
393 as there should be no other uses for DEF_STMT in the loop. So we just
394 check that everything is as expected, and we are done. */
395 dstmt_vinfo = vinfo_for_stmt (def_stmt);
396 bb = gimple_bb (stmt);
397 if (gimple_code (stmt) == GIMPLE_PHI
398 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
399 && gimple_code (def_stmt) != GIMPLE_PHI
400 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
401 && bb->loop_father == def_bb->loop_father)
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
405 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
406 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
407 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
408 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
409 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
413 /* case 3a: outer-loop stmt defining an inner-loop stmt:
414 outer-loop-header-bb:
420 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
422 if (vect_print_dump_info (REPORT_DETAILS))
423 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
427 case vect_unused_in_scope:
428 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
429 vect_used_in_scope : vect_unused_in_scope;
432 case vect_used_in_outer_by_reduction:
433 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
434 relevant = vect_used_by_reduction;
437 case vect_used_in_outer:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_in_scope;
442 case vect_used_in_scope:
450 /* case 3b: inner-loop stmt defining an outer-loop stmt:
451 outer-loop-header-bb:
455 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
457 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
464 case vect_unused_in_scope:
465 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
467 vect_used_in_outer_by_reduction : vect_unused_in_scope;
470 case vect_used_by_reduction:
471 relevant = vect_used_in_outer_by_reduction;
474 case vect_used_in_scope:
475 relevant = vect_used_in_outer;
483 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
484 is_pattern_stmt_p (stmt_vinfo));
489 /* Function vect_mark_stmts_to_be_vectorized.
491 Not all stmts in the loop need to be vectorized. For example:
500 Stmt 1 and 3 do not need to be vectorized, because loop control and
501 addressing of vectorized data-refs are handled differently.
503 This pass detects such stmts. */
506 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
508 VEC(gimple,heap) *worklist;
509 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
510 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
511 unsigned int nbbs = loop->num_nodes;
512 gimple_stmt_iterator si;
515 stmt_vec_info stmt_vinfo;
519 enum vect_relevant relevant, tmp_relevant;
520 enum vect_def_type def_type;
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
525 worklist = VEC_alloc (gimple, heap, 64);
527 /* 1. Init worklist. */
528 for (i = 0; i < nbbs; i++)
531 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
534 if (vect_print_dump_info (REPORT_DETAILS))
536 fprintf (vect_dump, "init: phi relevant? ");
537 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
540 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
541 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
545 stmt = gsi_stmt (si);
546 if (vect_print_dump_info (REPORT_DETAILS))
548 fprintf (vect_dump, "init: stmt relevant? ");
549 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
552 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
553 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
557 /* 2. Process_worklist */
558 while (VEC_length (gimple, worklist) > 0)
563 stmt = VEC_pop (gimple, worklist);
564 if (vect_print_dump_info (REPORT_DETAILS))
566 fprintf (vect_dump, "worklist: examine stmt: ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
570 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
571 (DEF_STMT) as relevant/irrelevant and live/dead according to the
572 liveness and relevance properties of STMT. */
573 stmt_vinfo = vinfo_for_stmt (stmt);
574 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
575 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
577 /* Generally, the liveness and relevance properties of STMT are
578 propagated as is to the DEF_STMTs of its USEs:
579 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
580 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
582 One exception is when STMT has been identified as defining a reduction
583 variable; in this case we set the liveness/relevance as follows:
585 relevant = vect_used_by_reduction
586 This is because we distinguish between two kinds of relevant stmts -
587 those that are used by a reduction computation, and those that are
588 (also) used by a regular computation. This allows us later on to
589 identify stmts that are used solely by a reduction, and therefore the
590 order of the results that they produce does not have to be kept. */
592 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
593 tmp_relevant = relevant;
596 case vect_reduction_def:
597 switch (tmp_relevant)
599 case vect_unused_in_scope:
600 relevant = vect_used_by_reduction;
603 case vect_used_by_reduction:
604 if (gimple_code (stmt) == GIMPLE_PHI)
609 if (vect_print_dump_info (REPORT_DETAILS))
610 fprintf (vect_dump, "unsupported use of reduction.");
612 VEC_free (gimple, heap, worklist);
619 case vect_nested_cycle:
620 if (tmp_relevant != vect_unused_in_scope
621 && tmp_relevant != vect_used_in_outer_by_reduction
622 && tmp_relevant != vect_used_in_outer)
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "unsupported use of nested cycle.");
627 VEC_free (gimple, heap, worklist);
634 case vect_double_reduction_def:
635 if (tmp_relevant != vect_unused_in_scope
636 && tmp_relevant != vect_used_by_reduction)
638 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "unsupported use of double reduction.");
641 VEC_free (gimple, heap, worklist);
652 if (is_pattern_stmt_p (stmt_vinfo))
654 /* Pattern statements are not inserted into the code, so
655 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
656 have to scan the RHS or function arguments instead. */
657 if (is_gimple_assign (stmt))
659 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
660 tree op = gimple_assign_rhs1 (stmt);
663 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
665 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
666 live_p, relevant, &worklist, false)
667 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
668 live_p, relevant, &worklist, false))
670 VEC_free (gimple, heap, worklist);
675 for (; i < gimple_num_ops (stmt); i++)
677 op = gimple_op (stmt, i);
678 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
681 VEC_free (gimple, heap, worklist);
686 else if (is_gimple_call (stmt))
688 for (i = 0; i < gimple_call_num_args (stmt); i++)
690 tree arg = gimple_call_arg (stmt, i);
691 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
694 VEC_free (gimple, heap, worklist);
701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
703 tree op = USE_FROM_PTR (use_p);
704 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
707 VEC_free (gimple, heap, worklist);
712 if (STMT_VINFO_GATHER_P (stmt_vinfo))
715 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
717 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
720 VEC_free (gimple, heap, worklist);
724 } /* while worklist */
726 VEC_free (gimple, heap, worklist);
731 /* Get cost by calling cost target builtin. */
734 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
736 tree dummy_type = NULL;
739 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
744 /* Get cost for STMT. */
747 cost_for_stmt (gimple stmt)
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
751 switch (STMT_VINFO_TYPE (stmt_info))
753 case load_vec_info_type:
754 return vect_get_stmt_cost (scalar_load);
755 case store_vec_info_type:
756 return vect_get_stmt_cost (scalar_store);
757 case op_vec_info_type:
758 case condition_vec_info_type:
759 case assignment_vec_info_type:
760 case reduc_vec_info_type:
761 case induc_vec_info_type:
762 case type_promotion_vec_info_type:
763 case type_demotion_vec_info_type:
764 case type_conversion_vec_info_type:
765 case call_vec_info_type:
766 return vect_get_stmt_cost (scalar_stmt);
767 case undef_vec_info_type:
773 /* Function vect_model_simple_cost.
775 Models cost for simple operations, i.e. those that only emit ncopies of a
776 single op. Right now, this does not account for multiple insns that could
777 be generated for the single vector op. We will handle that shortly. */
780 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
781 enum vect_def_type *dt, slp_tree slp_node)
784 int inside_cost = 0, outside_cost = 0;
786 /* The SLP costs were already calculated during SLP tree build. */
787 if (PURE_SLP_STMT (stmt_info))
790 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
792 /* FORNOW: Assuming maximum 2 args per stmts. */
793 for (i = 0; i < 2; i++)
795 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
796 outside_cost += vect_get_stmt_cost (vector_stmt);
799 if (vect_print_dump_info (REPORT_COST))
800 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
801 "outside_cost = %d .", inside_cost, outside_cost);
803 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
804 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
805 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
809 /* Function vect_cost_strided_group_size
811 For strided load or store, return the group_size only if it is the first
812 load or store of a group, else return 1. This ensures that group size is
813 only returned once per group. */
816 vect_cost_strided_group_size (stmt_vec_info stmt_info)
818 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
820 if (first_stmt == STMT_VINFO_STMT (stmt_info))
821 return GROUP_SIZE (stmt_info);
827 /* Function vect_model_store_cost
829 Models cost for stores. In the case of strided accesses, one access
830 has the overhead of the strided access attributed to it. */
833 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
834 bool store_lanes_p, enum vect_def_type dt,
838 unsigned int inside_cost = 0, outside_cost = 0;
839 struct data_reference *first_dr;
842 /* The SLP costs were already calculated during SLP tree build. */
843 if (PURE_SLP_STMT (stmt_info))
846 if (dt == vect_constant_def || dt == vect_external_def)
847 outside_cost = vect_get_stmt_cost (scalar_to_vec);
849 /* Strided access? */
850 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
854 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
859 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
860 group_size = vect_cost_strided_group_size (stmt_info);
863 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
865 /* Not a strided access. */
869 first_dr = STMT_VINFO_DATA_REF (stmt_info);
872 /* We assume that the cost of a single store-lanes instruction is
873 equivalent to the cost of GROUP_SIZE separate stores. If a strided
874 access is instead being provided by a permute-and-store operation,
875 include the cost of the permutes. */
876 if (!store_lanes_p && group_size > 1)
878 /* Uses a high and low interleave operation for each needed permute. */
879 inside_cost = ncopies * exact_log2(group_size) * group_size
880 * vect_get_stmt_cost (vector_stmt);
882 if (vect_print_dump_info (REPORT_COST))
883 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
888 /* Costs of the stores. */
889 vect_get_store_cost (first_dr, ncopies, &inside_cost);
891 if (vect_print_dump_info (REPORT_COST))
892 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
893 "outside_cost = %d .", inside_cost, outside_cost);
895 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
896 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
897 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
901 /* Calculate cost of DR's memory access. */
903 vect_get_store_cost (struct data_reference *dr, int ncopies,
904 unsigned int *inside_cost)
906 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
908 switch (alignment_support_scheme)
912 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: aligned.");
920 case dr_unaligned_supported:
922 gimple stmt = DR_STMT (dr);
923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926 /* Here, we assign an additional cost for the unaligned store. */
927 *inside_cost += ncopies
928 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
929 vectype, DR_MISALIGNMENT (dr));
931 if (vect_print_dump_info (REPORT_COST))
932 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
944 /* Function vect_model_load_cost
946 Models cost for loads. In the case of strided accesses, the last access
947 has the overhead of the strided access attributed to it. Since unaligned
948 accesses are supported for loads, we also account for the costs of the
949 access scheme chosen. */
952 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
957 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
958 unsigned int inside_cost = 0, outside_cost = 0;
960 /* The SLP costs were already calculated during SLP tree build. */
961 if (PURE_SLP_STMT (stmt_info))
964 /* Strided accesses? */
965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
966 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
968 group_size = vect_cost_strided_group_size (stmt_info);
969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
971 /* Not a strided access. */
978 /* We assume that the cost of a single load-lanes instruction is
979 equivalent to the cost of GROUP_SIZE separate loads. If a strided
980 access is instead being provided by a load-and-permute operation,
981 include the cost of the permutes. */
982 if (!load_lanes_p && group_size > 1)
984 /* Uses an even and odd extract operations for each needed permute. */
985 inside_cost = ncopies * exact_log2(group_size) * group_size
986 * vect_get_stmt_cost (vector_stmt);
988 if (vect_print_dump_info (REPORT_COST))
989 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
993 /* The loads themselves. */
994 vect_get_load_cost (first_dr, ncopies,
995 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
997 &inside_cost, &outside_cost);
999 if (vect_print_dump_info (REPORT_COST))
1000 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1001 "outside_cost = %d .", inside_cost, outside_cost);
1003 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1004 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1005 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1009 /* Calculate cost of DR's memory access. */
1011 vect_get_load_cost (struct data_reference *dr, int ncopies,
1012 bool add_realign_cost, unsigned int *inside_cost,
1013 unsigned int *outside_cost)
1015 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1017 switch (alignment_support_scheme)
1021 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1023 if (vect_print_dump_info (REPORT_COST))
1024 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1028 case dr_unaligned_supported:
1030 gimple stmt = DR_STMT (dr);
1031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1034 /* Here, we assign an additional cost for the unaligned load. */
1035 *inside_cost += ncopies
1036 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1037 vectype, DR_MISALIGNMENT (dr));
1038 if (vect_print_dump_info (REPORT_COST))
1039 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1044 case dr_explicit_realign:
1046 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1047 + vect_get_stmt_cost (vector_stmt));
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += vect_get_stmt_cost (vector_stmt);
1057 case dr_explicit_realign_optimized:
1059 if (vect_print_dump_info (REPORT_COST))
1060 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1063 /* Unaligned software pipeline has a load of an address, an initial
1064 load, and possibly a mask operation to "prime" the loop. However,
1065 if this is an access in a group of loads, which provide strided
1066 access, then the above cost should only be considered for one
1067 access in the group. Inside the loop, there is a load op
1068 and a realignment op. */
1070 if (add_realign_cost)
1072 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1073 if (targetm.vectorize.builtin_mask_for_load)
1074 *outside_cost += vect_get_stmt_cost (vector_stmt);
1077 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1078 + vect_get_stmt_cost (vector_stmt));
1088 /* Function vect_init_vector.
1090 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1091 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1092 is not NULL. Otherwise, place the initialization at the loop preheader.
1093 Return the DEF of INIT_STMT.
1094 It will be used in the vectorization of STMT. */
1097 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1098 gimple_stmt_iterator *gsi)
1100 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1108 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1109 add_referenced_var (new_var);
1110 init_stmt = gimple_build_assign (new_var, vector_var);
1111 new_temp = make_ssa_name (new_var, init_stmt);
1112 gimple_assign_set_lhs (init_stmt, new_temp);
1115 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1122 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 if (nested_in_vect_loop_p (loop, stmt))
1127 pe = loop_preheader_edge (loop);
1128 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1129 gcc_assert (!new_bb);
1133 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1135 gimple_stmt_iterator gsi_bb_start;
1137 gcc_assert (bb_vinfo);
1138 bb = BB_VINFO_BB (bb_vinfo);
1139 gsi_bb_start = gsi_after_labels (bb);
1140 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1144 if (vect_print_dump_info (REPORT_DETAILS))
1146 fprintf (vect_dump, "created new init_stmt: ");
1147 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1150 vec_oprnd = gimple_assign_lhs (init_stmt);
1155 /* Function vect_get_vec_def_for_operand.
1157 OP is an operand in STMT. This function returns a (vector) def that will be
1158 used in the vectorized stmt for STMT.
1160 In the case that OP is an SSA_NAME which is defined in the loop, then
1161 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1163 In case OP is an invariant or constant, a new stmt that creates a vector def
1164 needs to be introduced. */
1167 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1172 stmt_vec_info def_stmt_info = NULL;
1173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1174 unsigned int nunits;
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1181 enum vect_def_type dt;
1185 if (vect_print_dump_info (REPORT_DETAILS))
1187 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1188 print_generic_expr (vect_dump, op, TDF_SLIM);
1191 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1193 gcc_assert (is_simple_use);
1194 if (vect_print_dump_info (REPORT_DETAILS))
1198 fprintf (vect_dump, "def = ");
1199 print_generic_expr (vect_dump, def, TDF_SLIM);
1203 fprintf (vect_dump, " def_stmt = ");
1204 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1210 /* Case 1: operand is a constant. */
1211 case vect_constant_def:
1213 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1214 gcc_assert (vector_type);
1215 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1220 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1224 vec_cst = build_vector_from_val (vector_type,
1225 fold_convert (TREE_TYPE (vector_type),
1227 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1230 /* Case 2: operand is defined outside the loop - loop invariant. */
1231 case vect_external_def:
1233 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1234 gcc_assert (vector_type);
1235 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1240 /* Create 'vec_inv = {inv,inv,..,inv}' */
1241 if (vect_print_dump_info (REPORT_DETAILS))
1242 fprintf (vect_dump, "Create vector_inv.");
1244 for (i = nunits - 1; i >= 0; --i)
1246 t = tree_cons (NULL_TREE, def, t);
1249 /* FIXME: use build_constructor directly. */
1250 vec_inv = build_constructor_from_list (vector_type, t);
1251 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1254 /* Case 3: operand is defined inside the loop. */
1255 case vect_internal_def:
1258 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1260 /* Get the def from the vectorized stmt. */
1261 def_stmt_info = vinfo_for_stmt (def_stmt);
1263 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1264 /* Get vectorized pattern statement. */
1266 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1267 && !STMT_VINFO_RELEVANT (def_stmt_info))
1268 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1269 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1270 gcc_assert (vec_stmt);
1271 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1272 vec_oprnd = PHI_RESULT (vec_stmt);
1273 else if (is_gimple_call (vec_stmt))
1274 vec_oprnd = gimple_call_lhs (vec_stmt);
1276 vec_oprnd = gimple_assign_lhs (vec_stmt);
1280 /* Case 4: operand is defined by a loop header phi - reduction */
1281 case vect_reduction_def:
1282 case vect_double_reduction_def:
1283 case vect_nested_cycle:
1287 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1288 loop = (gimple_bb (def_stmt))->loop_father;
1290 /* Get the def before the loop */
1291 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1292 return get_initial_def_for_reduction (stmt, op, scalar_def);
1295 /* Case 5: operand is defined by loop-header phi - induction. */
1296 case vect_induction_def:
1298 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1300 /* Get the def from the vectorized stmt. */
1301 def_stmt_info = vinfo_for_stmt (def_stmt);
1302 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1303 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1304 vec_oprnd = PHI_RESULT (vec_stmt);
1306 vec_oprnd = gimple_get_lhs (vec_stmt);
1316 /* Function vect_get_vec_def_for_stmt_copy
1318 Return a vector-def for an operand. This function is used when the
1319 vectorized stmt to be created (by the caller to this function) is a "copy"
1320 created in case the vectorized result cannot fit in one vector, and several
1321 copies of the vector-stmt are required. In this case the vector-def is
1322 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1323 of the stmt that defines VEC_OPRND.
1324 DT is the type of the vector def VEC_OPRND.
1327 In case the vectorization factor (VF) is bigger than the number
1328 of elements that can fit in a vectype (nunits), we have to generate
1329 more than one vector stmt to vectorize the scalar stmt. This situation
1330 arises when there are multiple data-types operated upon in the loop; the
1331 smallest data-type determines the VF, and as a result, when vectorizing
1332 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1333 vector stmt (each computing a vector of 'nunits' results, and together
1334 computing 'VF' results in each iteration). This function is called when
1335 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1336 which VF=16 and nunits=4, so the number of copies required is 4):
1338 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1340 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1341 VS1.1: vx.1 = memref1 VS1.2
1342 VS1.2: vx.2 = memref2 VS1.3
1343 VS1.3: vx.3 = memref3
1345 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1346 VSnew.1: vz1 = vx.1 + ... VSnew.2
1347 VSnew.2: vz2 = vx.2 + ... VSnew.3
1348 VSnew.3: vz3 = vx.3 + ...
1350 The vectorization of S1 is explained in vectorizable_load.
1351 The vectorization of S2:
1352 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1353 the function 'vect_get_vec_def_for_operand' is called to
1354 get the relevant vector-def for each operand of S2. For operand x it
1355 returns the vector-def 'vx.0'.
1357 To create the remaining copies of the vector-stmt (VSnew.j), this
1358 function is called to get the relevant vector-def for each operand. It is
1359 obtained from the respective VS1.j stmt, which is recorded in the
1360 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1362 For example, to obtain the vector-def 'vx.1' in order to create the
1363 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1364 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1365 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1366 and return its def ('vx.1').
1367 Overall, to create the above sequence this function will be called 3 times:
1368 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1369 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1370 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1373 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1375 gimple vec_stmt_for_operand;
1376 stmt_vec_info def_stmt_info;
1378 /* Do nothing; can reuse same def. */
1379 if (dt == vect_external_def || dt == vect_constant_def )
1382 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1383 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1384 gcc_assert (def_stmt_info);
1385 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1386 gcc_assert (vec_stmt_for_operand);
1387 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1388 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1391 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1396 /* Get vectorized definitions for the operands to create a copy of an original
1397 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1400 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1401 VEC(tree,heap) **vec_oprnds0,
1402 VEC(tree,heap) **vec_oprnds1)
1404 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1406 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1407 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1409 if (vec_oprnds1 && *vec_oprnds1)
1411 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1412 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1413 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1418 /* Get vectorized definitions for OP0 and OP1.
1419 REDUC_INDEX is the index of reduction operand in case of reduction,
1420 and -1 otherwise. */
1423 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1424 VEC (tree, heap) **vec_oprnds0,
1425 VEC (tree, heap) **vec_oprnds1,
1426 slp_tree slp_node, int reduc_index)
1430 int nops = (op1 == NULL_TREE) ? 1 : 2;
1431 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1432 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1434 VEC_quick_push (tree, ops, op0);
1436 VEC_quick_push (tree, ops, op1);
1438 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1440 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1442 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1444 VEC_free (tree, heap, ops);
1445 VEC_free (slp_void_p, heap, vec_defs);
1451 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1452 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1453 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1457 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1458 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1459 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1465 /* Function vect_finish_stmt_generation.
1467 Insert a new stmt. */
1470 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1471 gimple_stmt_iterator *gsi)
1473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1474 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1475 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1477 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1479 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1481 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1484 if (vect_print_dump_info (REPORT_DETAILS))
1486 fprintf (vect_dump, "add new stmt: ");
1487 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1490 gimple_set_location (vec_stmt, gimple_location (stmt));
1493 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1494 a function declaration if the target has a vectorized version
1495 of the function, or NULL_TREE if the function cannot be vectorized. */
1498 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1500 tree fndecl = gimple_call_fndecl (call);
1502 /* We only handle functions that do not read or clobber memory -- i.e.
1503 const or novops ones. */
1504 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1508 || TREE_CODE (fndecl) != FUNCTION_DECL
1509 || !DECL_BUILT_IN (fndecl))
1512 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1516 /* Function vectorizable_call.
1518 Check if STMT performs a function call that can be vectorized.
1519 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1520 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1521 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1524 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1530 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1532 tree vectype_out, vectype_in;
1535 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1536 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1537 tree fndecl, new_temp, def, rhs_type;
1539 enum vect_def_type dt[3]
1540 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1541 gimple new_stmt = NULL;
1543 VEC(tree, heap) *vargs = NULL;
1544 enum { NARROW, NONE, WIDEN } modifier;
1548 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1551 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1554 /* Is STMT a vectorizable call? */
1555 if (!is_gimple_call (stmt))
1558 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1561 if (stmt_can_throw_internal (stmt))
1564 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1566 /* Process function arguments. */
1567 rhs_type = NULL_TREE;
1568 vectype_in = NULL_TREE;
1569 nargs = gimple_call_num_args (stmt);
1571 /* Bail out if the function has more than three arguments, we do not have
1572 interesting builtin functions to vectorize with more than two arguments
1573 except for fma. No arguments is also not good. */
1574 if (nargs == 0 || nargs > 3)
1577 for (i = 0; i < nargs; i++)
1581 op = gimple_call_arg (stmt, i);
1583 /* We can only handle calls with arguments of the same type. */
1585 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 fprintf (vect_dump, "argument types differ.");
1592 rhs_type = TREE_TYPE (op);
1594 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1595 &def_stmt, &def, &dt[i], &opvectype))
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "use not simple.");
1603 vectype_in = opvectype;
1605 && opvectype != vectype_in)
1607 if (vect_print_dump_info (REPORT_DETAILS))
1608 fprintf (vect_dump, "argument vector types differ.");
1612 /* If all arguments are external or constant defs use a vector type with
1613 the same size as the output vector type. */
1615 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1617 gcc_assert (vectype_in);
1620 if (vect_print_dump_info (REPORT_DETAILS))
1622 fprintf (vect_dump, "no vectype for scalar type ");
1623 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1630 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1632 if (nunits_in == nunits_out / 2)
1634 else if (nunits_out == nunits_in)
1636 else if (nunits_out == nunits_in / 2)
1641 /* For now, we only vectorize functions if a target specific builtin
1642 is available. TODO -- in some cases, it might be profitable to
1643 insert the calls for pieces of the vector, in order to be able
1644 to vectorize other operations in the loop. */
1645 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1646 if (fndecl == NULL_TREE)
1648 if (vect_print_dump_info (REPORT_DETAILS))
1649 fprintf (vect_dump, "function is not vectorizable.");
1654 gcc_assert (!gimple_vuse (stmt));
1656 if (slp_node || PURE_SLP_STMT (stmt_info))
1658 else if (modifier == NARROW)
1659 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1661 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1663 /* Sanity check: make sure that at least one copy of the vectorized stmt
1664 needs to be generated. */
1665 gcc_assert (ncopies >= 1);
1667 if (!vec_stmt) /* transformation not required. */
1669 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1670 if (vect_print_dump_info (REPORT_DETAILS))
1671 fprintf (vect_dump, "=== vectorizable_call ===");
1672 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1678 if (vect_print_dump_info (REPORT_DETAILS))
1679 fprintf (vect_dump, "transform call.");
1682 scalar_dest = gimple_call_lhs (stmt);
1683 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1685 prev_stmt_info = NULL;
1689 for (j = 0; j < ncopies; ++j)
1691 /* Build argument list for the vectorized call. */
1693 vargs = VEC_alloc (tree, heap, nargs);
1695 VEC_truncate (tree, vargs, 0);
1699 VEC (slp_void_p, heap) *vec_defs
1700 = VEC_alloc (slp_void_p, heap, nargs);
1701 VEC (tree, heap) *vec_oprnds0;
1703 for (i = 0; i < nargs; i++)
1704 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1705 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1707 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1709 /* Arguments are ready. Create the new vector stmt. */
1710 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1713 for (k = 0; k < nargs; k++)
1715 VEC (tree, heap) *vec_oprndsk
1716 = (VEC (tree, heap) *)
1717 VEC_index (slp_void_p, vec_defs, k);
1718 VEC_replace (tree, vargs, k,
1719 VEC_index (tree, vec_oprndsk, i));
1721 new_stmt = gimple_build_call_vec (fndecl, vargs);
1722 new_temp = make_ssa_name (vec_dest, new_stmt);
1723 gimple_call_set_lhs (new_stmt, new_temp);
1724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1725 mark_symbols_for_renaming (new_stmt);
1726 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1730 for (i = 0; i < nargs; i++)
1732 VEC (tree, heap) *vec_oprndsi
1733 = (VEC (tree, heap) *)
1734 VEC_index (slp_void_p, vec_defs, i);
1735 VEC_free (tree, heap, vec_oprndsi);
1737 VEC_free (slp_void_p, heap, vec_defs);
1741 for (i = 0; i < nargs; i++)
1743 op = gimple_call_arg (stmt, i);
1746 = vect_get_vec_def_for_operand (op, stmt, NULL);
1749 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1751 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1754 VEC_quick_push (tree, vargs, vec_oprnd0);
1757 new_stmt = gimple_build_call_vec (fndecl, vargs);
1758 new_temp = make_ssa_name (vec_dest, new_stmt);
1759 gimple_call_set_lhs (new_stmt, new_temp);
1761 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1762 mark_symbols_for_renaming (new_stmt);
1765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1769 prev_stmt_info = vinfo_for_stmt (new_stmt);
1775 for (j = 0; j < ncopies; ++j)
1777 /* Build argument list for the vectorized call. */
1779 vargs = VEC_alloc (tree, heap, nargs * 2);
1781 VEC_truncate (tree, vargs, 0);
1785 VEC (slp_void_p, heap) *vec_defs
1786 = VEC_alloc (slp_void_p, heap, nargs);
1787 VEC (tree, heap) *vec_oprnds0;
1789 for (i = 0; i < nargs; i++)
1790 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1791 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1793 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1795 /* Arguments are ready. Create the new vector stmt. */
1796 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1800 VEC_truncate (tree, vargs, 0);
1801 for (k = 0; k < nargs; k++)
1803 VEC (tree, heap) *vec_oprndsk
1804 = (VEC (tree, heap) *)
1805 VEC_index (slp_void_p, vec_defs, k);
1806 VEC_quick_push (tree, vargs,
1807 VEC_index (tree, vec_oprndsk, i));
1808 VEC_quick_push (tree, vargs,
1809 VEC_index (tree, vec_oprndsk, i + 1));
1811 new_stmt = gimple_build_call_vec (fndecl, vargs);
1812 new_temp = make_ssa_name (vec_dest, new_stmt);
1813 gimple_call_set_lhs (new_stmt, new_temp);
1814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1815 mark_symbols_for_renaming (new_stmt);
1816 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1820 for (i = 0; i < nargs; i++)
1822 VEC (tree, heap) *vec_oprndsi
1823 = (VEC (tree, heap) *)
1824 VEC_index (slp_void_p, vec_defs, i);
1825 VEC_free (tree, heap, vec_oprndsi);
1827 VEC_free (slp_void_p, heap, vec_defs);
1831 for (i = 0; i < nargs; i++)
1833 op = gimple_call_arg (stmt, i);
1837 = vect_get_vec_def_for_operand (op, stmt, NULL);
1839 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1843 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1845 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1847 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1850 VEC_quick_push (tree, vargs, vec_oprnd0);
1851 VEC_quick_push (tree, vargs, vec_oprnd1);
1854 new_stmt = gimple_build_call_vec (fndecl, vargs);
1855 new_temp = make_ssa_name (vec_dest, new_stmt);
1856 gimple_call_set_lhs (new_stmt, new_temp);
1858 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1859 mark_symbols_for_renaming (new_stmt);
1862 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1866 prev_stmt_info = vinfo_for_stmt (new_stmt);
1869 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1874 /* No current target implements this case. */
1878 VEC_free (tree, heap, vargs);
1880 /* Update the exception handling table with the vector stmt if necessary. */
1881 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1882 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1884 /* The call in STMT might prevent it from being removed in dce.
1885 We however cannot remove it here, due to the way the ssa name
1886 it defines is mapped to the new definition. So just replace
1887 rhs of the statement with something harmless. */
1892 type = TREE_TYPE (scalar_dest);
1893 if (is_pattern_stmt_p (stmt_info))
1894 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1896 lhs = gimple_call_lhs (stmt);
1897 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1898 set_vinfo_for_stmt (new_stmt, stmt_info);
1899 set_vinfo_for_stmt (stmt, NULL);
1900 STMT_VINFO_STMT (stmt_info) = new_stmt;
1901 gsi_replace (gsi, new_stmt, false);
1902 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1908 /* Function vect_gen_widened_results_half
1910 Create a vector stmt whose code, type, number of arguments, and result
1911 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1912 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1913 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1914 needs to be created (DECL is a function-decl of a target-builtin).
1915 STMT is the original scalar stmt that we are vectorizing. */
1918 vect_gen_widened_results_half (enum tree_code code,
1920 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1921 tree vec_dest, gimple_stmt_iterator *gsi,
1927 /* Generate half of the widened result: */
1928 if (code == CALL_EXPR)
1930 /* Target specific support */
1931 if (op_type == binary_op)
1932 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1934 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1935 new_temp = make_ssa_name (vec_dest, new_stmt);
1936 gimple_call_set_lhs (new_stmt, new_temp);
1940 /* Generic support */
1941 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1942 if (op_type != binary_op)
1944 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1946 new_temp = make_ssa_name (vec_dest, new_stmt);
1947 gimple_assign_set_lhs (new_stmt, new_temp);
1949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1955 /* Get vectorized definitions for loop-based vectorization. For the first
1956 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1957 scalar operand), and for the rest we get a copy with
1958 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1959 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1960 The vectors are collected into VEC_OPRNDS. */
1963 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1964 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1968 /* Get first vector operand. */
1969 /* All the vector operands except the very first one (that is scalar oprnd)
1971 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1972 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1974 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1976 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1978 /* Get second vector operand. */
1979 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1980 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1984 /* For conversion in multiple steps, continue to get operands
1987 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1991 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1992 For multi-step conversions store the resulting vectors and call the function
1996 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
1997 int multi_step_cvt, gimple stmt,
1998 VEC (tree, heap) *vec_dsts,
1999 gimple_stmt_iterator *gsi,
2000 slp_tree slp_node, enum tree_code code,
2001 stmt_vec_info *prev_stmt_info)
2004 tree vop0, vop1, new_tmp, vec_dest;
2006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2008 vec_dest = VEC_pop (tree, vec_dsts);
2010 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2012 /* Create demotion operation. */
2013 vop0 = VEC_index (tree, *vec_oprnds, i);
2014 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2015 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2016 new_tmp = make_ssa_name (vec_dest, new_stmt);
2017 gimple_assign_set_lhs (new_stmt, new_tmp);
2018 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2021 /* Store the resulting vector for next recursive call. */
2022 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2025 /* This is the last step of the conversion sequence. Store the
2026 vectors in SLP_NODE or in vector info of the scalar statement
2027 (or in STMT_VINFO_RELATED_STMT chain). */
2029 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2032 if (!*prev_stmt_info)
2033 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2035 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2037 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2042 /* For multi-step demotion operations we first generate demotion operations
2043 from the source type to the intermediate types, and then combine the
2044 results (stored in VEC_OPRNDS) in demotion operation to the destination
2048 /* At each level of recursion we have half of the operands we had at the
2050 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2051 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2052 stmt, vec_dsts, gsi, slp_node,
2053 VEC_PACK_TRUNC_EXPR,
2057 VEC_quick_push (tree, vec_dsts, vec_dest);
2061 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2062 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2063 the resulting vectors and call the function recursively. */
2066 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2067 VEC (tree, heap) **vec_oprnds1,
2068 gimple stmt, tree vec_dest,
2069 gimple_stmt_iterator *gsi,
2070 enum tree_code code1,
2071 enum tree_code code2, tree decl1,
2072 tree decl2, int op_type)
2075 tree vop0, vop1, new_tmp1, new_tmp2;
2076 gimple new_stmt1, new_stmt2;
2077 VEC (tree, heap) *vec_tmp = NULL;
2079 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2080 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2082 if (op_type == binary_op)
2083 vop1 = VEC_index (tree, *vec_oprnds1, i);
2087 /* Generate the two halves of promotion operation. */
2088 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2089 op_type, vec_dest, gsi, stmt);
2090 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2091 op_type, vec_dest, gsi, stmt);
2092 if (is_gimple_call (new_stmt1))
2094 new_tmp1 = gimple_call_lhs (new_stmt1);
2095 new_tmp2 = gimple_call_lhs (new_stmt2);
2099 new_tmp1 = gimple_assign_lhs (new_stmt1);
2100 new_tmp2 = gimple_assign_lhs (new_stmt2);
2103 /* Store the results for the next step. */
2104 VEC_quick_push (tree, vec_tmp, new_tmp1);
2105 VEC_quick_push (tree, vec_tmp, new_tmp2);
2108 VEC_free (tree, heap, *vec_oprnds0);
2109 *vec_oprnds0 = vec_tmp;
2113 /* Check if STMT performs a conversion operation, that can be vectorized.
2114 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2115 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2116 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2119 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2120 gimple *vec_stmt, slp_tree slp_node)
2124 tree op0, op1 = NULL_TREE;
2125 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2127 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2128 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2129 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2130 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2134 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2135 gimple new_stmt = NULL;
2136 stmt_vec_info prev_stmt_info;
2139 tree vectype_out, vectype_in;
2141 tree lhs_type, rhs_type;
2142 enum { NARROW, NONE, WIDEN } modifier;
2143 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2145 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2146 int multi_step_cvt = 0;
2147 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2148 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2150 enum machine_mode rhs_mode;
2151 unsigned short fltsz;
2153 /* Is STMT a vectorizable conversion? */
2155 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2158 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2161 if (!is_gimple_assign (stmt))
2164 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2167 code = gimple_assign_rhs_code (stmt);
2168 if (!CONVERT_EXPR_CODE_P (code)
2169 && code != FIX_TRUNC_EXPR
2170 && code != FLOAT_EXPR
2171 && code != WIDEN_MULT_EXPR
2172 && code != WIDEN_LSHIFT_EXPR)
2175 op_type = TREE_CODE_LENGTH (code);
2177 /* Check types of lhs and rhs. */
2178 scalar_dest = gimple_assign_lhs (stmt);
2179 lhs_type = TREE_TYPE (scalar_dest);
2180 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2182 op0 = gimple_assign_rhs1 (stmt);
2183 rhs_type = TREE_TYPE (op0);
2185 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2186 && !((INTEGRAL_TYPE_P (lhs_type)
2187 && INTEGRAL_TYPE_P (rhs_type))
2188 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2189 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2192 if ((INTEGRAL_TYPE_P (lhs_type)
2193 && (TYPE_PRECISION (lhs_type)
2194 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2195 || (INTEGRAL_TYPE_P (rhs_type)
2196 && (TYPE_PRECISION (rhs_type)
2197 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2199 if (vect_print_dump_info (REPORT_DETAILS))
2201 "type conversion to/from bit-precision unsupported.");
2205 /* Check the operands of the operation. */
2206 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2207 &def_stmt, &def, &dt[0], &vectype_in))
2209 if (vect_print_dump_info (REPORT_DETAILS))
2210 fprintf (vect_dump, "use not simple.");
2213 if (op_type == binary_op)
2217 op1 = gimple_assign_rhs2 (stmt);
2218 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2219 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2221 if (CONSTANT_CLASS_P (op0))
2222 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2223 &def_stmt, &def, &dt[1], &vectype_in);
2225 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2230 if (vect_print_dump_info (REPORT_DETAILS))
2231 fprintf (vect_dump, "use not simple.");
2236 /* If op0 is an external or constant defs use a vector type of
2237 the same size as the output vector type. */
2239 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2241 gcc_assert (vectype_in);
2244 if (vect_print_dump_info (REPORT_DETAILS))
2246 fprintf (vect_dump, "no vectype for scalar type ");
2247 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2253 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2254 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2255 if (nunits_in < nunits_out)
2257 else if (nunits_out == nunits_in)
2262 /* Multiple types in SLP are handled by creating the appropriate number of
2263 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2265 if (slp_node || PURE_SLP_STMT (stmt_info))
2267 else if (modifier == NARROW)
2268 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2270 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2272 /* Sanity check: make sure that at least one copy of the vectorized stmt
2273 needs to be generated. */
2274 gcc_assert (ncopies >= 1);
2276 /* Supportable by target? */
2280 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2282 if (supportable_convert_operation (code, vectype_out, vectype_in,
2287 if (vect_print_dump_info (REPORT_DETAILS))
2288 fprintf (vect_dump, "conversion not supported by target.");
2292 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2293 &decl1, &decl2, &code1, &code2,
2294 &multi_step_cvt, &interm_types))
2296 /* Binary widening operation can only be supported directly by the
2298 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2302 if (code != FLOAT_EXPR
2303 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2304 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2307 rhs_mode = TYPE_MODE (rhs_type);
2308 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2309 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2310 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2311 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2314 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2315 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2316 if (cvt_type == NULL_TREE)
2319 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2321 if (!supportable_convert_operation (code, vectype_out,
2322 cvt_type, &decl1, &codecvt1))
2325 else if (!supportable_widening_operation (code, stmt, vectype_out,
2326 cvt_type, &decl1, &decl2,
2327 &codecvt1, &codecvt2,
2332 gcc_assert (multi_step_cvt == 0);
2334 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2335 vectype_in, NULL, NULL, &code1,
2336 &code2, &multi_step_cvt,
2341 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2344 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2345 codecvt2 = ERROR_MARK;
2349 VEC_safe_push (tree, heap, interm_types, cvt_type);
2350 cvt_type = NULL_TREE;
2355 gcc_assert (op_type == unary_op);
2356 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2357 &code1, &multi_step_cvt,
2361 if (code != FIX_TRUNC_EXPR
2362 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2363 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2366 rhs_mode = TYPE_MODE (rhs_type);
2368 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2369 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2370 if (cvt_type == NULL_TREE)
2372 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2375 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2376 &code1, &multi_step_cvt,
2385 if (!vec_stmt) /* transformation not required. */
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "=== vectorizable_conversion ===");
2389 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2390 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2391 else if (modifier == NARROW)
2393 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2394 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2398 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2399 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2401 VEC_free (tree, heap, interm_types);
2406 if (vect_print_dump_info (REPORT_DETAILS))
2407 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2409 if (op_type == binary_op)
2411 if (CONSTANT_CLASS_P (op0))
2412 op0 = fold_convert (TREE_TYPE (op1), op0);
2413 else if (CONSTANT_CLASS_P (op1))
2414 op1 = fold_convert (TREE_TYPE (op0), op1);
2417 /* In case of multi-step conversion, we first generate conversion operations
2418 to the intermediate types, and then from that types to the final one.
2419 We create vector destinations for the intermediate type (TYPES) received
2420 from supportable_*_operation, and store them in the correct order
2421 for future use in vect_create_vectorized_*_stmts (). */
2422 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2423 vec_dest = vect_create_destination_var (scalar_dest,
2424 (cvt_type && modifier == WIDEN)
2425 ? cvt_type : vectype_out);
2426 VEC_quick_push (tree, vec_dsts, vec_dest);
2430 for (i = VEC_length (tree, interm_types) - 1;
2431 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2433 vec_dest = vect_create_destination_var (scalar_dest,
2435 VEC_quick_push (tree, vec_dsts, vec_dest);
2440 vec_dest = vect_create_destination_var (scalar_dest,
2442 ? vectype_out : cvt_type);
2446 if (modifier == NONE)
2447 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2448 else if (modifier == WIDEN)
2450 vec_oprnds0 = VEC_alloc (tree, heap,
2452 ? vect_pow2 (multi_step_cvt) : 1));
2453 if (op_type == binary_op)
2454 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2457 vec_oprnds0 = VEC_alloc (tree, heap,
2459 ? vect_pow2 (multi_step_cvt) : 1));
2461 else if (code == WIDEN_LSHIFT_EXPR)
2462 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2465 prev_stmt_info = NULL;
2469 for (j = 0; j < ncopies; j++)
2472 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2475 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2477 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2479 /* Arguments are ready, create the new vector stmt. */
2480 if (code1 == CALL_EXPR)
2482 new_stmt = gimple_build_call (decl1, 1, vop0);
2483 new_temp = make_ssa_name (vec_dest, new_stmt);
2484 gimple_call_set_lhs (new_stmt, new_temp);
2488 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2489 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2491 new_temp = make_ssa_name (vec_dest, new_stmt);
2492 gimple_assign_set_lhs (new_stmt, new_temp);
2495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2497 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2502 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2504 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2505 prev_stmt_info = vinfo_for_stmt (new_stmt);
2510 /* In case the vectorization factor (VF) is bigger than the number
2511 of elements that we can fit in a vectype (nunits), we have to
2512 generate more than one vector stmt - i.e - we need to "unroll"
2513 the vector stmt by a factor VF/nunits. */
2514 for (j = 0; j < ncopies; j++)
2521 if (code == WIDEN_LSHIFT_EXPR)
2526 /* Store vec_oprnd1 for every vector stmt to be created
2527 for SLP_NODE. We check during the analysis that all
2528 the shift arguments are the same. */
2529 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2530 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2532 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2536 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2537 &vec_oprnds1, slp_node, -1);
2541 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2542 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2543 if (op_type == binary_op)
2545 if (code == WIDEN_LSHIFT_EXPR)
2548 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2550 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2556 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2557 VEC_truncate (tree, vec_oprnds0, 0);
2558 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2559 if (op_type == binary_op)
2561 if (code == WIDEN_LSHIFT_EXPR)
2564 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2566 VEC_truncate (tree, vec_oprnds1, 0);
2567 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2571 /* Arguments are ready. Create the new vector stmts. */
2572 for (i = multi_step_cvt; i >= 0; i--)
2574 tree this_dest = VEC_index (tree, vec_dsts, i);
2575 enum tree_code c1 = code1, c2 = code2;
2576 if (i == 0 && codecvt2 != ERROR_MARK)
2581 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2583 stmt, this_dest, gsi,
2584 c1, c2, decl1, decl2,
2588 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2592 if (codecvt1 == CALL_EXPR)
2594 new_stmt = gimple_build_call (decl1, 1, vop0);
2595 new_temp = make_ssa_name (vec_dest, new_stmt);
2596 gimple_call_set_lhs (new_stmt, new_temp);
2600 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2601 new_temp = make_ssa_name (vec_dest, NULL);
2602 new_stmt = gimple_build_assign_with_ops (codecvt1,
2607 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2610 new_stmt = SSA_NAME_DEF_STMT (vop0);
2613 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2617 if (!prev_stmt_info)
2618 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2620 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2621 prev_stmt_info = vinfo_for_stmt (new_stmt);
2626 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2630 /* In case the vectorization factor (VF) is bigger than the number
2631 of elements that we can fit in a vectype (nunits), we have to
2632 generate more than one vector stmt - i.e - we need to "unroll"
2633 the vector stmt by a factor VF/nunits. */
2634 for (j = 0; j < ncopies; j++)
2638 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2642 VEC_truncate (tree, vec_oprnds0, 0);
2643 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2644 vect_pow2 (multi_step_cvt) - 1);
2647 /* Arguments are ready. Create the new vector stmts. */
2649 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2651 if (codecvt1 == CALL_EXPR)
2653 new_stmt = gimple_build_call (decl1, 1, vop0);
2654 new_temp = make_ssa_name (vec_dest, new_stmt);
2655 gimple_call_set_lhs (new_stmt, new_temp);
2659 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2660 new_temp = make_ssa_name (vec_dest, NULL);
2661 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2665 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2666 VEC_replace (tree, vec_oprnds0, i, new_temp);
2669 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2670 stmt, vec_dsts, gsi,
2675 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2679 VEC_free (tree, heap, vec_oprnds0);
2680 VEC_free (tree, heap, vec_oprnds1);
2681 VEC_free (tree, heap, vec_dsts);
2682 VEC_free (tree, heap, interm_types);
2688 /* Function vectorizable_assignment.
2690 Check if STMT performs an assignment (copy) that can be vectorized.
2691 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2692 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2693 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2696 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2697 gimple *vec_stmt, slp_tree slp_node)
2702 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2703 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2704 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2708 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2709 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2712 VEC(tree,heap) *vec_oprnds = NULL;
2714 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2715 gimple new_stmt = NULL;
2716 stmt_vec_info prev_stmt_info = NULL;
2717 enum tree_code code;
2720 /* Multiple types in SLP are handled by creating the appropriate number of
2721 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2723 if (slp_node || PURE_SLP_STMT (stmt_info))
2726 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2728 gcc_assert (ncopies >= 1);
2730 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2733 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2736 /* Is vectorizable assignment? */
2737 if (!is_gimple_assign (stmt))
2740 scalar_dest = gimple_assign_lhs (stmt);
2741 if (TREE_CODE (scalar_dest) != SSA_NAME)
2744 code = gimple_assign_rhs_code (stmt);
2745 if (gimple_assign_single_p (stmt)
2746 || code == PAREN_EXPR
2747 || CONVERT_EXPR_CODE_P (code))
2748 op = gimple_assign_rhs1 (stmt);
2752 if (code == VIEW_CONVERT_EXPR)
2753 op = TREE_OPERAND (op, 0);
2755 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2756 &def_stmt, &def, &dt[0], &vectype_in))
2758 if (vect_print_dump_info (REPORT_DETAILS))
2759 fprintf (vect_dump, "use not simple.");
2763 /* We can handle NOP_EXPR conversions that do not change the number
2764 of elements or the vector size. */
2765 if ((CONVERT_EXPR_CODE_P (code)
2766 || code == VIEW_CONVERT_EXPR)
2768 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2769 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2770 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2773 /* We do not handle bit-precision changes. */
2774 if ((CONVERT_EXPR_CODE_P (code)
2775 || code == VIEW_CONVERT_EXPR)
2776 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2777 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2778 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2779 || ((TYPE_PRECISION (TREE_TYPE (op))
2780 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2781 /* But a conversion that does not change the bit-pattern is ok. */
2782 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2783 > TYPE_PRECISION (TREE_TYPE (op)))
2784 && TYPE_UNSIGNED (TREE_TYPE (op))))
2786 if (vect_print_dump_info (REPORT_DETAILS))
2787 fprintf (vect_dump, "type conversion to/from bit-precision "
2792 if (!vec_stmt) /* transformation not required. */
2794 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2795 if (vect_print_dump_info (REPORT_DETAILS))
2796 fprintf (vect_dump, "=== vectorizable_assignment ===");
2797 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2802 if (vect_print_dump_info (REPORT_DETAILS))
2803 fprintf (vect_dump, "transform assignment.");
2806 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2809 for (j = 0; j < ncopies; j++)
2813 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2815 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2817 /* Arguments are ready. create the new vector stmt. */
2818 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2820 if (CONVERT_EXPR_CODE_P (code)
2821 || code == VIEW_CONVERT_EXPR)
2822 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2823 new_stmt = gimple_build_assign (vec_dest, vop);
2824 new_temp = make_ssa_name (vec_dest, new_stmt);
2825 gimple_assign_set_lhs (new_stmt, new_temp);
2826 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2828 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2835 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2837 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2839 prev_stmt_info = vinfo_for_stmt (new_stmt);
2842 VEC_free (tree, heap, vec_oprnds);
2847 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2848 either as shift by a scalar or by a vector. */
2851 vect_supportable_shift (enum tree_code code, tree scalar_type)
2854 enum machine_mode vec_mode;
2859 vectype = get_vectype_for_scalar_type (scalar_type);
2863 optab = optab_for_tree_code (code, vectype, optab_scalar);
2865 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2867 optab = optab_for_tree_code (code, vectype, optab_vector);
2869 || (optab_handler (optab, TYPE_MODE (vectype))
2870 == CODE_FOR_nothing))
2874 vec_mode = TYPE_MODE (vectype);
2875 icode = (int) optab_handler (optab, vec_mode);
2876 if (icode == CODE_FOR_nothing)
2883 /* Function vectorizable_shift.
2885 Check if STMT performs a shift operation that can be vectorized.
2886 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2887 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2888 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2891 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2892 gimple *vec_stmt, slp_tree slp_node)
2896 tree op0, op1 = NULL;
2897 tree vec_oprnd1 = NULL_TREE;
2898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2900 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2901 enum tree_code code;
2902 enum machine_mode vec_mode;
2906 enum machine_mode optab_op2_mode;
2909 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2910 gimple new_stmt = NULL;
2911 stmt_vec_info prev_stmt_info;
2918 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2921 bool scalar_shift_arg = true;
2922 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2925 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2928 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2931 /* Is STMT a vectorizable binary/unary operation? */
2932 if (!is_gimple_assign (stmt))
2935 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2938 code = gimple_assign_rhs_code (stmt);
2940 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2941 || code == RROTATE_EXPR))
2944 scalar_dest = gimple_assign_lhs (stmt);
2945 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2946 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2947 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2949 if (vect_print_dump_info (REPORT_DETAILS))
2950 fprintf (vect_dump, "bit-precision shifts not supported.");
2954 op0 = gimple_assign_rhs1 (stmt);
2955 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2956 &def_stmt, &def, &dt[0], &vectype))
2958 if (vect_print_dump_info (REPORT_DETAILS))
2959 fprintf (vect_dump, "use not simple.");
2962 /* If op0 is an external or constant def use a vector type with
2963 the same size as the output vector type. */
2965 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2967 gcc_assert (vectype);
2970 if (vect_print_dump_info (REPORT_DETAILS))
2972 fprintf (vect_dump, "no vectype for scalar type ");
2973 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2979 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2980 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2981 if (nunits_out != nunits_in)
2984 op1 = gimple_assign_rhs2 (stmt);
2985 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2986 &dt[1], &op1_vectype))
2988 if (vect_print_dump_info (REPORT_DETAILS))
2989 fprintf (vect_dump, "use not simple.");
2994 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2998 /* Multiple types in SLP are handled by creating the appropriate number of
2999 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3001 if (slp_node || PURE_SLP_STMT (stmt_info))
3004 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3006 gcc_assert (ncopies >= 1);
3008 /* Determine whether the shift amount is a vector, or scalar. If the
3009 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3011 if (dt[1] == vect_internal_def && !slp_node)
3012 scalar_shift_arg = false;
3013 else if (dt[1] == vect_constant_def
3014 || dt[1] == vect_external_def
3015 || dt[1] == vect_internal_def)
3017 /* In SLP, need to check whether the shift count is the same,
3018 in loops if it is a constant or invariant, it is always
3022 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3025 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3026 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3027 scalar_shift_arg = false;
3032 if (vect_print_dump_info (REPORT_DETAILS))
3033 fprintf (vect_dump, "operand mode requires invariant argument.");
3037 /* Vector shifted by vector. */
3038 if (!scalar_shift_arg)
3040 optab = optab_for_tree_code (code, vectype, optab_vector);
3041 if (vect_print_dump_info (REPORT_DETAILS))
3042 fprintf (vect_dump, "vector/vector shift/rotate found.");
3044 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3045 if (op1_vectype == NULL_TREE
3046 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3048 if (vect_print_dump_info (REPORT_DETAILS))
3049 fprintf (vect_dump, "unusable type for last operand in"
3050 " vector/vector shift/rotate.");
3054 /* See if the machine has a vector shifted by scalar insn and if not
3055 then see if it has a vector shifted by vector insn. */
3058 optab = optab_for_tree_code (code, vectype, optab_scalar);
3060 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3062 if (vect_print_dump_info (REPORT_DETAILS))
3063 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3067 optab = optab_for_tree_code (code, vectype, optab_vector);
3069 && (optab_handler (optab, TYPE_MODE (vectype))
3070 != CODE_FOR_nothing))
3072 scalar_shift_arg = false;
3074 if (vect_print_dump_info (REPORT_DETAILS))
3075 fprintf (vect_dump, "vector/vector shift/rotate found.");
3077 /* Unlike the other binary operators, shifts/rotates have
3078 the rhs being int, instead of the same type as the lhs,
3079 so make sure the scalar is the right type if we are
3080 dealing with vectors of long long/long/short/char. */
3081 if (dt[1] == vect_constant_def)
3082 op1 = fold_convert (TREE_TYPE (vectype), op1);
3083 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3087 && TYPE_MODE (TREE_TYPE (vectype))
3088 != TYPE_MODE (TREE_TYPE (op1)))
3090 if (vect_print_dump_info (REPORT_DETAILS))
3091 fprintf (vect_dump, "unusable type for last operand in"
3092 " vector/vector shift/rotate.");
3095 if (vec_stmt && !slp_node)
3097 op1 = fold_convert (TREE_TYPE (vectype), op1);
3098 op1 = vect_init_vector (stmt, op1,
3099 TREE_TYPE (vectype), NULL);
3106 /* Supportable by target? */
3109 if (vect_print_dump_info (REPORT_DETAILS))
3110 fprintf (vect_dump, "no optab.");
3113 vec_mode = TYPE_MODE (vectype);
3114 icode = (int) optab_handler (optab, vec_mode);
3115 if (icode == CODE_FOR_nothing)
3117 if (vect_print_dump_info (REPORT_DETAILS))
3118 fprintf (vect_dump, "op not supported by target.");
3119 /* Check only during analysis. */
3120 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3121 || (vf < vect_min_worthwhile_factor (code)
3124 if (vect_print_dump_info (REPORT_DETAILS))
3125 fprintf (vect_dump, "proceeding using word mode.");
3128 /* Worthwhile without SIMD support? Check only during analysis. */
3129 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3130 && vf < vect_min_worthwhile_factor (code)
3133 if (vect_print_dump_info (REPORT_DETAILS))
3134 fprintf (vect_dump, "not worthwhile without SIMD support.");
3138 if (!vec_stmt) /* transformation not required. */
3140 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3141 if (vect_print_dump_info (REPORT_DETAILS))
3142 fprintf (vect_dump, "=== vectorizable_shift ===");
3143 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3149 if (vect_print_dump_info (REPORT_DETAILS))
3150 fprintf (vect_dump, "transform binary/unary operation.");
3153 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3155 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3156 created in the previous stages of the recursion, so no allocation is
3157 needed, except for the case of shift with scalar shift argument. In that
3158 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3159 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3160 In case of loop-based vectorization we allocate VECs of size 1. We
3161 allocate VEC_OPRNDS1 only in case of binary operation. */
3164 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3165 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3167 else if (scalar_shift_arg)
3168 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3170 prev_stmt_info = NULL;
3171 for (j = 0; j < ncopies; j++)
3176 if (scalar_shift_arg)
3178 /* Vector shl and shr insn patterns can be defined with scalar
3179 operand 2 (shift operand). In this case, use constant or loop
3180 invariant op1 directly, without extending it to vector mode
3182 optab_op2_mode = insn_data[icode].operand[2].mode;
3183 if (!VECTOR_MODE_P (optab_op2_mode))
3185 if (vect_print_dump_info (REPORT_DETAILS))
3186 fprintf (vect_dump, "operand 1 using scalar mode.");
3188 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3191 /* Store vec_oprnd1 for every vector stmt to be created
3192 for SLP_NODE. We check during the analysis that all
3193 the shift arguments are the same.
3194 TODO: Allow different constants for different vector
3195 stmts generated for an SLP instance. */
3196 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3197 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3202 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3203 (a special case for certain kind of vector shifts); otherwise,
3204 operand 1 should be of a vector type (the usual case). */
3206 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3209 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3213 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3215 /* Arguments are ready. Create the new vector stmt. */
3216 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3218 vop1 = VEC_index (tree, vec_oprnds1, i);
3219 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3220 new_temp = make_ssa_name (vec_dest, new_stmt);
3221 gimple_assign_set_lhs (new_stmt, new_temp);
3222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3224 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3231 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3233 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3234 prev_stmt_info = vinfo_for_stmt (new_stmt);
3237 VEC_free (tree, heap, vec_oprnds0);
3238 VEC_free (tree, heap, vec_oprnds1);
3244 /* Function vectorizable_operation.
3246 Check if STMT performs a binary, unary or ternary operation that can
3248 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3249 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3250 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3253 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3254 gimple *vec_stmt, slp_tree slp_node)
3258 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3259 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3261 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3262 enum tree_code code;
3263 enum machine_mode vec_mode;
3270 enum vect_def_type dt[3]
3271 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3272 gimple new_stmt = NULL;
3273 stmt_vec_info prev_stmt_info;
3279 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3280 tree vop0, vop1, vop2;
3281 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3284 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3287 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3290 /* Is STMT a vectorizable binary/unary operation? */
3291 if (!is_gimple_assign (stmt))
3294 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3297 code = gimple_assign_rhs_code (stmt);
3299 /* For pointer addition, we should use the normal plus for
3300 the vector addition. */
3301 if (code == POINTER_PLUS_EXPR)
3304 /* Support only unary or binary operations. */
3305 op_type = TREE_CODE_LENGTH (code);
3306 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3308 if (vect_print_dump_info (REPORT_DETAILS))
3309 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3314 scalar_dest = gimple_assign_lhs (stmt);
3315 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3317 /* Most operations cannot handle bit-precision types without extra
3319 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3320 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3321 /* Exception are bitwise binary operations. */
3322 && code != BIT_IOR_EXPR
3323 && code != BIT_XOR_EXPR
3324 && code != BIT_AND_EXPR)
3326 if (vect_print_dump_info (REPORT_DETAILS))
3327 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3331 op0 = gimple_assign_rhs1 (stmt);
3332 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3333 &def_stmt, &def, &dt[0], &vectype))
3335 if (vect_print_dump_info (REPORT_DETAILS))
3336 fprintf (vect_dump, "use not simple.");
3339 /* If op0 is an external or constant def use a vector type with
3340 the same size as the output vector type. */
3342 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3344 gcc_assert (vectype);
3347 if (vect_print_dump_info (REPORT_DETAILS))
3349 fprintf (vect_dump, "no vectype for scalar type ");
3350 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3356 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3357 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3358 if (nunits_out != nunits_in)
3361 if (op_type == binary_op || op_type == ternary_op)
3363 op1 = gimple_assign_rhs2 (stmt);
3364 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
3367 if (vect_print_dump_info (REPORT_DETAILS))
3368 fprintf (vect_dump, "use not simple.");
3372 if (op_type == ternary_op)
3374 op2 = gimple_assign_rhs3 (stmt);
3375 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3378 if (vect_print_dump_info (REPORT_DETAILS))
3379 fprintf (vect_dump, "use not simple.");
3385 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3389 /* Multiple types in SLP are handled by creating the appropriate number of
3390 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3392 if (slp_node || PURE_SLP_STMT (stmt_info))
3395 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3397 gcc_assert (ncopies >= 1);
3399 /* Shifts are handled in vectorizable_shift (). */
3400 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3401 || code == RROTATE_EXPR)
3404 optab = optab_for_tree_code (code, vectype, optab_default);
3406 /* Supportable by target? */
3409 if (vect_print_dump_info (REPORT_DETAILS))
3410 fprintf (vect_dump, "no optab.");
3413 vec_mode = TYPE_MODE (vectype);
3414 icode = (int) optab_handler (optab, vec_mode);
3415 if (icode == CODE_FOR_nothing)
3417 if (vect_print_dump_info (REPORT_DETAILS))
3418 fprintf (vect_dump, "op not supported by target.");
3419 /* Check only during analysis. */
3420 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3421 || (vf < vect_min_worthwhile_factor (code)
3424 if (vect_print_dump_info (REPORT_DETAILS))
3425 fprintf (vect_dump, "proceeding using word mode.");
3428 /* Worthwhile without SIMD support? Check only during analysis. */
3429 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3430 && vf < vect_min_worthwhile_factor (code)
3433 if (vect_print_dump_info (REPORT_DETAILS))
3434 fprintf (vect_dump, "not worthwhile without SIMD support.");
3438 if (!vec_stmt) /* transformation not required. */
3440 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3441 if (vect_print_dump_info (REPORT_DETAILS))
3442 fprintf (vect_dump, "=== vectorizable_operation ===");
3443 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3449 if (vect_print_dump_info (REPORT_DETAILS))
3450 fprintf (vect_dump, "transform binary/unary operation.");
3453 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3455 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3456 created in the previous stages of the recursion, so no allocation is
3457 needed, except for the case of shift with scalar shift argument. In that
3458 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3459 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3460 In case of loop-based vectorization we allocate VECs of size 1. We
3461 allocate VEC_OPRNDS1 only in case of binary operation. */
3464 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3465 if (op_type == binary_op || op_type == ternary_op)
3466 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3467 if (op_type == ternary_op)
3468 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3471 /* In case the vectorization factor (VF) is bigger than the number
3472 of elements that we can fit in a vectype (nunits), we have to generate
3473 more than one vector stmt - i.e - we need to "unroll" the
3474 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3475 from one copy of the vector stmt to the next, in the field
3476 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3477 stages to find the correct vector defs to be used when vectorizing
3478 stmts that use the defs of the current stmt. The example below
3479 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3480 we need to create 4 vectorized stmts):
3482 before vectorization:
3483 RELATED_STMT VEC_STMT
3487 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3489 RELATED_STMT VEC_STMT
3490 VS1_0: vx0 = memref0 VS1_1 -
3491 VS1_1: vx1 = memref1 VS1_2 -
3492 VS1_2: vx2 = memref2 VS1_3 -
3493 VS1_3: vx3 = memref3 - -
3494 S1: x = load - VS1_0
3497 step2: vectorize stmt S2 (done here):
3498 To vectorize stmt S2 we first need to find the relevant vector
3499 def for the first operand 'x'. This is, as usual, obtained from
3500 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3501 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3502 relevant vector def 'vx0'. Having found 'vx0' we can generate
3503 the vector stmt VS2_0, and as usual, record it in the
3504 STMT_VINFO_VEC_STMT of stmt S2.
3505 When creating the second copy (VS2_1), we obtain the relevant vector
3506 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3507 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3508 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3509 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3510 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3511 chain of stmts and pointers:
3512 RELATED_STMT VEC_STMT
3513 VS1_0: vx0 = memref0 VS1_1 -
3514 VS1_1: vx1 = memref1 VS1_2 -
3515 VS1_2: vx2 = memref2 VS1_3 -
3516 VS1_3: vx3 = memref3 - -
3517 S1: x = load - VS1_0
3518 VS2_0: vz0 = vx0 + v1 VS2_1 -
3519 VS2_1: vz1 = vx1 + v1 VS2_2 -
3520 VS2_2: vz2 = vx2 + v1 VS2_3 -
3521 VS2_3: vz3 = vx3 + v1 - -
3522 S2: z = x + 1 - VS2_0 */
3524 prev_stmt_info = NULL;
3525 for (j = 0; j < ncopies; j++)
3530 if (op_type == binary_op || op_type == ternary_op)
3531 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3534 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3536 if (op_type == ternary_op)
3538 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3539 VEC_quick_push (tree, vec_oprnds2,
3540 vect_get_vec_def_for_operand (op2, stmt, NULL));
3545 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3546 if (op_type == ternary_op)
3548 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3549 VEC_quick_push (tree, vec_oprnds2,
3550 vect_get_vec_def_for_stmt_copy (dt[2],
3555 /* Arguments are ready. Create the new vector stmt. */
3556 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3558 vop1 = ((op_type == binary_op || op_type == ternary_op)
3559 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3560 vop2 = ((op_type == ternary_op)
3561 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3562 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3564 new_temp = make_ssa_name (vec_dest, new_stmt);
3565 gimple_assign_set_lhs (new_stmt, new_temp);
3566 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3568 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3575 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3577 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3578 prev_stmt_info = vinfo_for_stmt (new_stmt);
3581 VEC_free (tree, heap, vec_oprnds0);
3583 VEC_free (tree, heap, vec_oprnds1);
3585 VEC_free (tree, heap, vec_oprnds2);
3591 /* Function vectorizable_store.
3593 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3595 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3596 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3597 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3600 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3606 tree vec_oprnd = NULL_TREE;
3607 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3608 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3609 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3611 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3612 struct loop *loop = NULL;
3613 enum machine_mode vec_mode;
3615 enum dr_alignment_support alignment_support_scheme;
3618 enum vect_def_type dt;
3619 stmt_vec_info prev_stmt_info = NULL;
3620 tree dataref_ptr = NULL_TREE;
3621 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3624 gimple next_stmt, first_stmt = NULL;
3625 bool strided_store = false;
3626 bool store_lanes_p = false;
3627 unsigned int group_size, i;
3628 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3630 VEC(tree,heap) *vec_oprnds = NULL;
3631 bool slp = (slp_node != NULL);
3632 unsigned int vec_num;
3633 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3637 loop = LOOP_VINFO_LOOP (loop_vinfo);
3639 /* Multiple types in SLP are handled by creating the appropriate number of
3640 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3642 if (slp || PURE_SLP_STMT (stmt_info))
3645 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3647 gcc_assert (ncopies >= 1);
3649 /* FORNOW. This restriction should be relaxed. */
3650 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3652 if (vect_print_dump_info (REPORT_DETAILS))
3653 fprintf (vect_dump, "multiple types in nested loop.");
3657 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3660 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3663 /* Is vectorizable store? */
3665 if (!is_gimple_assign (stmt))
3668 scalar_dest = gimple_assign_lhs (stmt);
3669 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3670 && is_pattern_stmt_p (stmt_info))
3671 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3672 if (TREE_CODE (scalar_dest) != ARRAY_REF
3673 && TREE_CODE (scalar_dest) != INDIRECT_REF
3674 && TREE_CODE (scalar_dest) != COMPONENT_REF
3675 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3676 && TREE_CODE (scalar_dest) != REALPART_EXPR
3677 && TREE_CODE (scalar_dest) != MEM_REF)
3680 gcc_assert (gimple_assign_single_p (stmt));
3681 op = gimple_assign_rhs1 (stmt);
3682 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3684 if (vect_print_dump_info (REPORT_DETAILS))
3685 fprintf (vect_dump, "use not simple.");
3689 elem_type = TREE_TYPE (vectype);
3690 vec_mode = TYPE_MODE (vectype);
3692 /* FORNOW. In some cases can vectorize even if data-type not supported
3693 (e.g. - array initialization with 0). */
3694 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3697 if (!STMT_VINFO_DATA_REF (stmt_info))
3700 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3702 if (vect_print_dump_info (REPORT_DETAILS))
3703 fprintf (vect_dump, "negative step for store.");
3707 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3709 strided_store = true;
3710 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3711 if (!slp && !PURE_SLP_STMT (stmt_info))
3713 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3714 if (vect_store_lanes_supported (vectype, group_size))
3715 store_lanes_p = true;
3716 else if (!vect_strided_store_supported (vectype, group_size))
3720 if (first_stmt == stmt)
3722 /* STMT is the leader of the group. Check the operands of all the
3723 stmts of the group. */
3724 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3727 gcc_assert (gimple_assign_single_p (next_stmt));
3728 op = gimple_assign_rhs1 (next_stmt);
3729 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3732 if (vect_print_dump_info (REPORT_DETAILS))
3733 fprintf (vect_dump, "use not simple.");
3736 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3741 if (!vec_stmt) /* transformation not required. */
3743 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3744 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3752 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3753 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3755 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3758 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3760 /* We vectorize all the stmts of the interleaving group when we
3761 reach the last stmt in the group. */
3762 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3763 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3772 strided_store = false;
3773 /* VEC_NUM is the number of vect stmts to be created for this
3775 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3776 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3777 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3778 op = gimple_assign_rhs1 (first_stmt);
3781 /* VEC_NUM is the number of vect stmts to be created for this
3783 vec_num = group_size;
3789 group_size = vec_num = 1;
3792 if (vect_print_dump_info (REPORT_DETAILS))
3793 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3795 dr_chain = VEC_alloc (tree, heap, group_size);
3796 oprnds = VEC_alloc (tree, heap, group_size);
3798 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3799 gcc_assert (alignment_support_scheme);
3800 /* Targets with store-lane instructions must not require explicit
3802 gcc_assert (!store_lanes_p
3803 || alignment_support_scheme == dr_aligned
3804 || alignment_support_scheme == dr_unaligned_supported);
3807 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3809 aggr_type = vectype;
3811 /* In case the vectorization factor (VF) is bigger than the number
3812 of elements that we can fit in a vectype (nunits), we have to generate
3813 more than one vector stmt - i.e - we need to "unroll" the
3814 vector stmt by a factor VF/nunits. For more details see documentation in
3815 vect_get_vec_def_for_copy_stmt. */
3817 /* In case of interleaving (non-unit strided access):
3824 We create vectorized stores starting from base address (the access of the
3825 first stmt in the chain (S2 in the above example), when the last store stmt
3826 of the chain (S4) is reached:
3829 VS2: &base + vec_size*1 = vx0
3830 VS3: &base + vec_size*2 = vx1
3831 VS4: &base + vec_size*3 = vx3
3833 Then permutation statements are generated:
3835 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3836 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3839 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3840 (the order of the data-refs in the output of vect_permute_store_chain
3841 corresponds to the order of scalar stmts in the interleaving chain - see
3842 the documentation of vect_permute_store_chain()).
3844 In case of both multiple types and interleaving, above vector stores and
3845 permutation stmts are created for every copy. The result vector stmts are
3846 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3847 STMT_VINFO_RELATED_STMT for the next copies.
3850 prev_stmt_info = NULL;
3851 for (j = 0; j < ncopies; j++)
3860 /* Get vectorized arguments for SLP_NODE. */
3861 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3862 NULL, slp_node, -1);
3864 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3868 /* For interleaved stores we collect vectorized defs for all the
3869 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3870 used as an input to vect_permute_store_chain(), and OPRNDS as
3871 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3873 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3874 OPRNDS are of size 1. */
3875 next_stmt = first_stmt;
3876 for (i = 0; i < group_size; i++)
3878 /* Since gaps are not supported for interleaved stores,
3879 GROUP_SIZE is the exact number of stmts in the chain.
3880 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3881 there is no interleaving, GROUP_SIZE is 1, and only one
3882 iteration of the loop will be executed. */
3883 gcc_assert (next_stmt
3884 && gimple_assign_single_p (next_stmt));
3885 op = gimple_assign_rhs1 (next_stmt);
3887 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3889 VEC_quick_push(tree, dr_chain, vec_oprnd);
3890 VEC_quick_push(tree, oprnds, vec_oprnd);
3891 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3895 /* We should have catched mismatched types earlier. */
3896 gcc_assert (useless_type_conversion_p (vectype,
3897 TREE_TYPE (vec_oprnd)));
3898 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3899 NULL_TREE, &dummy, gsi,
3900 &ptr_incr, false, &inv_p);
3901 gcc_assert (bb_vinfo || !inv_p);
3905 /* For interleaved stores we created vectorized defs for all the
3906 defs stored in OPRNDS in the previous iteration (previous copy).
3907 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3908 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3910 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3911 OPRNDS are of size 1. */
3912 for (i = 0; i < group_size; i++)
3914 op = VEC_index (tree, oprnds, i);
3915 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3917 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3918 VEC_replace(tree, dr_chain, i, vec_oprnd);
3919 VEC_replace(tree, oprnds, i, vec_oprnd);
3921 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3922 TYPE_SIZE_UNIT (aggr_type));
3929 /* Combine all the vectors into an array. */
3930 vec_array = create_vector_array (vectype, vec_num);
3931 for (i = 0; i < vec_num; i++)
3933 vec_oprnd = VEC_index (tree, dr_chain, i);
3934 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3938 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3939 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3940 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3941 gimple_call_set_lhs (new_stmt, data_ref);
3942 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3943 mark_symbols_for_renaming (new_stmt);
3950 result_chain = VEC_alloc (tree, heap, group_size);
3952 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3956 next_stmt = first_stmt;
3957 for (i = 0; i < vec_num; i++)
3959 struct ptr_info_def *pi;
3962 /* Bump the vector pointer. */
3963 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3967 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3968 else if (strided_store)
3969 /* For strided stores vectorized defs are interleaved in
3970 vect_permute_store_chain(). */
3971 vec_oprnd = VEC_index (tree, result_chain, i);
3973 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3974 build_int_cst (reference_alias_ptr_type
3975 (DR_REF (first_dr)), 0));
3976 pi = get_ptr_info (dataref_ptr);
3977 pi->align = TYPE_ALIGN_UNIT (vectype);
3978 if (aligned_access_p (first_dr))
3980 else if (DR_MISALIGNMENT (first_dr) == -1)
3982 TREE_TYPE (data_ref)
3983 = build_aligned_type (TREE_TYPE (data_ref),
3984 TYPE_ALIGN (elem_type));
3985 pi->align = TYPE_ALIGN_UNIT (elem_type);
3990 TREE_TYPE (data_ref)
3991 = build_aligned_type (TREE_TYPE (data_ref),
3992 TYPE_ALIGN (elem_type));
3993 pi->misalign = DR_MISALIGNMENT (first_dr);
3996 /* Arguments are ready. Create the new vector stmt. */
3997 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3999 mark_symbols_for_renaming (new_stmt);
4004 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4012 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4014 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4015 prev_stmt_info = vinfo_for_stmt (new_stmt);
4019 VEC_free (tree, heap, dr_chain);
4020 VEC_free (tree, heap, oprnds);
4022 VEC_free (tree, heap, result_chain);
4024 VEC_free (tree, heap, vec_oprnds);
4029 /* Given a vector type VECTYPE and permutation SEL returns
4030 the VECTOR_CST mask that implements the permutation of the
4031 vector elements. If that is impossible to do, returns NULL. */
4034 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4036 tree mask_elt_type, mask_type, mask_vec;
4039 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4041 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4045 = lang_hooks.types.type_for_size
4046 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4047 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4050 for (i = nunits - 1; i >= 0; i--)
4051 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4053 mask_vec = build_vector (mask_type, mask_vec);
4058 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4059 reversal of the vector elements. If that is impossible to do,
4063 perm_mask_for_reverse (tree vectype)
4068 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4069 sel = XALLOCAVEC (unsigned char, nunits);
4071 for (i = 0; i < nunits; ++i)
4072 sel[i] = nunits - 1 - i;
4074 return vect_gen_perm_mask (vectype, sel);
4077 /* Given a vector variable X and Y, that was generated for the scalar
4078 STMT, generate instructions to permute the vector elements of X and Y
4079 using permutation mask MASK_VEC, insert them at *GSI and return the
4080 permuted vector variable. */
4083 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4084 gimple_stmt_iterator *gsi)
4086 tree vectype = TREE_TYPE (x);
4087 tree perm_dest, data_ref;
4090 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4091 data_ref = make_ssa_name (perm_dest, NULL);
4093 /* Generate the permute statement. */
4094 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4096 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4101 /* vectorizable_load.
4103 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4105 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4106 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4107 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4110 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4111 slp_tree slp_node, slp_instance slp_node_instance)
4114 tree vec_dest = NULL;
4115 tree data_ref = NULL;
4116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4117 stmt_vec_info prev_stmt_info;
4118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4119 struct loop *loop = NULL;
4120 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4121 bool nested_in_vect_loop = false;
4122 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4123 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4126 enum machine_mode mode;
4127 gimple new_stmt = NULL;
4129 enum dr_alignment_support alignment_support_scheme;
4130 tree dataref_ptr = NULL_TREE;
4132 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4134 int i, j, group_size;
4135 tree msq = NULL_TREE, lsq;
4136 tree offset = NULL_TREE;
4137 tree realignment_token = NULL_TREE;
4139 VEC(tree,heap) *dr_chain = NULL;
4140 bool strided_load = false;
4141 bool load_lanes_p = false;
4145 bool compute_in_loop = false;
4146 struct loop *at_loop;
4148 bool slp = (slp_node != NULL);
4149 bool slp_perm = false;
4150 enum tree_code code;
4151 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4154 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4155 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4156 int gather_scale = 1;
4157 enum vect_def_type gather_dt = vect_unknown_def_type;
4161 loop = LOOP_VINFO_LOOP (loop_vinfo);
4162 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4163 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4168 /* Multiple types in SLP are handled by creating the appropriate number of
4169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4171 if (slp || PURE_SLP_STMT (stmt_info))
4174 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4176 gcc_assert (ncopies >= 1);
4178 /* FORNOW. This restriction should be relaxed. */
4179 if (nested_in_vect_loop && ncopies > 1)
4181 if (vect_print_dump_info (REPORT_DETAILS))
4182 fprintf (vect_dump, "multiple types in nested loop.");
4186 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4189 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4192 /* Is vectorizable load? */
4193 if (!is_gimple_assign (stmt))
4196 scalar_dest = gimple_assign_lhs (stmt);
4197 if (TREE_CODE (scalar_dest) != SSA_NAME)
4200 code = gimple_assign_rhs_code (stmt);
4201 if (code != ARRAY_REF
4202 && code != INDIRECT_REF
4203 && code != COMPONENT_REF
4204 && code != IMAGPART_EXPR
4205 && code != REALPART_EXPR
4207 && TREE_CODE_CLASS (code) != tcc_declaration)
4210 if (!STMT_VINFO_DATA_REF (stmt_info))
4213 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4214 if (negative && ncopies > 1)
4216 if (vect_print_dump_info (REPORT_DETAILS))
4217 fprintf (vect_dump, "multiple types with negative step.");
4221 elem_type = TREE_TYPE (vectype);
4222 mode = TYPE_MODE (vectype);
4224 /* FORNOW. In some cases can vectorize even if data-type not supported
4225 (e.g. - data copies). */
4226 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4228 if (vect_print_dump_info (REPORT_DETAILS))
4229 fprintf (vect_dump, "Aligned load, but unsupported type.");
4233 /* Check if the load is a part of an interleaving chain. */
4234 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4236 strided_load = true;
4238 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4240 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4241 if (!slp && !PURE_SLP_STMT (stmt_info))
4243 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4244 if (vect_load_lanes_supported (vectype, group_size))
4245 load_lanes_p = true;
4246 else if (!vect_strided_load_supported (vectype, group_size))
4253 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4254 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4255 if (alignment_support_scheme != dr_aligned
4256 && alignment_support_scheme != dr_unaligned_supported)
4258 if (vect_print_dump_info (REPORT_DETAILS))
4259 fprintf (vect_dump, "negative step but alignment required.");
4262 if (!perm_mask_for_reverse (vectype))
4264 if (vect_print_dump_info (REPORT_DETAILS))
4265 fprintf (vect_dump, "negative step and reversing not supported.");
4270 if (STMT_VINFO_GATHER_P (stmt_info))
4274 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4275 &gather_off, &gather_scale);
4276 gcc_assert (gather_decl);
4277 if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
4278 &def_stmt, &def, &gather_dt,
4279 &gather_off_vectype))
4281 if (vect_print_dump_info (REPORT_DETAILS))
4282 fprintf (vect_dump, "gather index use not simple.");
4287 if (!vec_stmt) /* transformation not required. */
4289 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4290 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4294 if (vect_print_dump_info (REPORT_DETAILS))
4295 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4299 if (STMT_VINFO_GATHER_P (stmt_info))
4301 tree vec_oprnd0 = NULL_TREE, op;
4302 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4303 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4304 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4305 edge pe = loop_preheader_edge (loop);
4308 enum { NARROW, NONE, WIDEN } modifier;
4309 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4311 if (nunits == gather_off_nunits)
4313 else if (nunits == gather_off_nunits / 2)
4315 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4318 for (i = 0; i < gather_off_nunits; ++i)
4319 sel[i] = i | nunits;
4321 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4322 gcc_assert (perm_mask != NULL_TREE);
4324 else if (nunits == gather_off_nunits * 2)
4326 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4329 for (i = 0; i < nunits; ++i)
4330 sel[i] = i < gather_off_nunits
4331 ? i : i + nunits - gather_off_nunits;
4333 perm_mask = vect_gen_perm_mask (vectype, sel);
4334 gcc_assert (perm_mask != NULL_TREE);
4340 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4341 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4342 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4343 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4344 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4345 scaletype = TREE_VALUE (arglist);
4346 gcc_checking_assert (types_compatible_p (srctype, rettype)
4347 && types_compatible_p (srctype, masktype));
4349 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4351 ptr = fold_convert (ptrtype, gather_base);
4352 if (!is_gimple_min_invariant (ptr))
4354 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4355 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4356 gcc_assert (!new_bb);
4359 /* Currently we support only unconditional gather loads,
4360 so mask should be all ones. */
4361 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4362 mask = build_int_cst (TREE_TYPE (masktype), -1);
4363 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4367 for (j = 0; j < 6; ++j)
4369 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4370 mask = build_real (TREE_TYPE (masktype), r);
4374 mask = build_vector_from_val (masktype, mask);
4375 mask = vect_init_vector (stmt, mask, masktype, NULL);
4377 scale = build_int_cst (scaletype, gather_scale);
4379 prev_stmt_info = NULL;
4380 for (j = 0; j < ncopies; ++j)
4382 if (modifier == WIDEN && (j & 1))
4383 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4384 perm_mask, stmt, gsi);
4387 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4390 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4392 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4394 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4395 == TYPE_VECTOR_SUBPARTS (idxtype));
4396 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4397 add_referenced_var (var);
4398 var = make_ssa_name (var, NULL);
4399 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4401 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4403 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4408 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4410 if (!useless_type_conversion_p (vectype, rettype))
4412 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4413 == TYPE_VECTOR_SUBPARTS (rettype));
4414 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4415 add_referenced_var (var);
4416 op = make_ssa_name (var, new_stmt);
4417 gimple_call_set_lhs (new_stmt, op);
4418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4419 var = make_ssa_name (vec_dest, NULL);
4420 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4422 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4427 var = make_ssa_name (vec_dest, new_stmt);
4428 gimple_call_set_lhs (new_stmt, var);
4431 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4433 if (modifier == NARROW)
4440 var = permute_vec_elements (prev_res, var,
4441 perm_mask, stmt, gsi);
4442 new_stmt = SSA_NAME_DEF_STMT (var);
4445 if (prev_stmt_info == NULL)
4446 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4448 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4449 prev_stmt_info = vinfo_for_stmt (new_stmt);
4456 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4458 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4459 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4460 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4462 /* Check if the chain of loads is already vectorized. */
4463 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4465 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4468 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4469 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4471 /* VEC_NUM is the number of vect stmts to be created for this group. */
4474 strided_load = false;
4475 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4476 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4480 vec_num = group_size;
4486 group_size = vec_num = 1;
4489 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4490 gcc_assert (alignment_support_scheme);
4491 /* Targets with load-lane instructions must not require explicit
4493 gcc_assert (!load_lanes_p
4494 || alignment_support_scheme == dr_aligned
4495 || alignment_support_scheme == dr_unaligned_supported);
4497 /* In case the vectorization factor (VF) is bigger than the number
4498 of elements that we can fit in a vectype (nunits), we have to generate
4499 more than one vector stmt - i.e - we need to "unroll" the
4500 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4501 from one copy of the vector stmt to the next, in the field
4502 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4503 stages to find the correct vector defs to be used when vectorizing
4504 stmts that use the defs of the current stmt. The example below
4505 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4506 need to create 4 vectorized stmts):
4508 before vectorization:
4509 RELATED_STMT VEC_STMT
4513 step 1: vectorize stmt S1:
4514 We first create the vector stmt VS1_0, and, as usual, record a
4515 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4516 Next, we create the vector stmt VS1_1, and record a pointer to
4517 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4518 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4520 RELATED_STMT VEC_STMT
4521 VS1_0: vx0 = memref0 VS1_1 -
4522 VS1_1: vx1 = memref1 VS1_2 -
4523 VS1_2: vx2 = memref2 VS1_3 -
4524 VS1_3: vx3 = memref3 - -
4525 S1: x = load - VS1_0
4528 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4529 information we recorded in RELATED_STMT field is used to vectorize
4532 /* In case of interleaving (non-unit strided access):
4539 Vectorized loads are created in the order of memory accesses
4540 starting from the access of the first stmt of the chain:
4543 VS2: vx1 = &base + vec_size*1
4544 VS3: vx3 = &base + vec_size*2
4545 VS4: vx4 = &base + vec_size*3
4547 Then permutation statements are generated:
4549 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4550 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4553 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4554 (the order of the data-refs in the output of vect_permute_load_chain
4555 corresponds to the order of scalar stmts in the interleaving chain - see
4556 the documentation of vect_permute_load_chain()).
4557 The generation of permutation stmts and recording them in
4558 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4560 In case of both multiple types and interleaving, the vector loads and
4561 permutation stmts above are created for every copy. The result vector
4562 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4563 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4565 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4566 on a target that supports unaligned accesses (dr_unaligned_supported)
4567 we generate the following code:
4571 p = p + indx * vectype_size;
4576 Otherwise, the data reference is potentially unaligned on a target that
4577 does not support unaligned accesses (dr_explicit_realign_optimized) -
4578 then generate the following code, in which the data in each iteration is
4579 obtained by two vector loads, one from the previous iteration, and one
4580 from the current iteration:
4582 msq_init = *(floor(p1))
4583 p2 = initial_addr + VS - 1;
4584 realignment_token = call target_builtin;
4587 p2 = p2 + indx * vectype_size
4589 vec_dest = realign_load (msq, lsq, realignment_token)
4594 /* If the misalignment remains the same throughout the execution of the
4595 loop, we can create the init_addr and permutation mask at the loop
4596 preheader. Otherwise, it needs to be created inside the loop.
4597 This can only occur when vectorizing memory accesses in the inner-loop
4598 nested within an outer-loop that is being vectorized. */
4600 if (loop && nested_in_vect_loop_p (loop, stmt)
4601 && (TREE_INT_CST_LOW (DR_STEP (dr))
4602 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4604 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4605 compute_in_loop = true;
4608 if ((alignment_support_scheme == dr_explicit_realign_optimized
4609 || alignment_support_scheme == dr_explicit_realign)
4610 && !compute_in_loop)
4612 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4613 alignment_support_scheme, NULL_TREE,
4615 if (alignment_support_scheme == dr_explicit_realign_optimized)
4617 phi = SSA_NAME_DEF_STMT (msq);
4618 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4625 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4628 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4630 aggr_type = vectype;
4632 prev_stmt_info = NULL;
4633 for (j = 0; j < ncopies; j++)
4635 /* 1. Create the vector or array pointer update chain. */
4637 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4638 offset, &dummy, gsi,
4639 &ptr_incr, false, &inv_p);
4641 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4642 TYPE_SIZE_UNIT (aggr_type));
4644 if (strided_load || slp_perm)
4645 dr_chain = VEC_alloc (tree, heap, vec_num);
4651 vec_array = create_vector_array (vectype, vec_num);
4654 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4655 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4656 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4657 gimple_call_set_lhs (new_stmt, vec_array);
4658 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4659 mark_symbols_for_renaming (new_stmt);
4661 /* Extract each vector into an SSA_NAME. */
4662 for (i = 0; i < vec_num; i++)
4664 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4666 VEC_quick_push (tree, dr_chain, new_temp);
4669 /* Record the mapping between SSA_NAMEs and statements. */
4670 vect_record_strided_load_vectors (stmt, dr_chain);
4674 for (i = 0; i < vec_num; i++)
4677 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4680 /* 2. Create the vector-load in the loop. */
4681 switch (alignment_support_scheme)
4684 case dr_unaligned_supported:
4686 struct ptr_info_def *pi;
4688 = build2 (MEM_REF, vectype, dataref_ptr,
4689 build_int_cst (reference_alias_ptr_type
4690 (DR_REF (first_dr)), 0));
4691 pi = get_ptr_info (dataref_ptr);
4692 pi->align = TYPE_ALIGN_UNIT (vectype);
4693 if (alignment_support_scheme == dr_aligned)
4695 gcc_assert (aligned_access_p (first_dr));
4698 else if (DR_MISALIGNMENT (first_dr) == -1)
4700 TREE_TYPE (data_ref)
4701 = build_aligned_type (TREE_TYPE (data_ref),
4702 TYPE_ALIGN (elem_type));
4703 pi->align = TYPE_ALIGN_UNIT (elem_type);
4708 TREE_TYPE (data_ref)
4709 = build_aligned_type (TREE_TYPE (data_ref),
4710 TYPE_ALIGN (elem_type));
4711 pi->misalign = DR_MISALIGNMENT (first_dr);
4715 case dr_explicit_realign:
4720 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4722 if (compute_in_loop)
4723 msq = vect_setup_realignment (first_stmt, gsi,
4725 dr_explicit_realign,
4728 new_stmt = gimple_build_assign_with_ops
4729 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4731 (TREE_TYPE (dataref_ptr),
4732 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4733 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4734 gimple_assign_set_lhs (new_stmt, ptr);
4735 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4737 = build2 (MEM_REF, vectype, ptr,
4738 build_int_cst (reference_alias_ptr_type
4739 (DR_REF (first_dr)), 0));
4740 vec_dest = vect_create_destination_var (scalar_dest,
4742 new_stmt = gimple_build_assign (vec_dest, data_ref);
4743 new_temp = make_ssa_name (vec_dest, new_stmt);
4744 gimple_assign_set_lhs (new_stmt, new_temp);
4745 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4746 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4747 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4750 bump = size_binop (MULT_EXPR, vs_minus_1,
4751 TYPE_SIZE_UNIT (elem_type));
4752 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4753 new_stmt = gimple_build_assign_with_ops
4754 (BIT_AND_EXPR, NULL_TREE, ptr,
4757 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4758 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4759 gimple_assign_set_lhs (new_stmt, ptr);
4760 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4762 = build2 (MEM_REF, vectype, ptr,
4763 build_int_cst (reference_alias_ptr_type
4764 (DR_REF (first_dr)), 0));
4767 case dr_explicit_realign_optimized:
4768 new_stmt = gimple_build_assign_with_ops
4769 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4771 (TREE_TYPE (dataref_ptr),
4772 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4773 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4775 gimple_assign_set_lhs (new_stmt, new_temp);
4776 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4778 = build2 (MEM_REF, vectype, new_temp,
4779 build_int_cst (reference_alias_ptr_type
4780 (DR_REF (first_dr)), 0));
4785 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4786 new_stmt = gimple_build_assign (vec_dest, data_ref);
4787 new_temp = make_ssa_name (vec_dest, new_stmt);
4788 gimple_assign_set_lhs (new_stmt, new_temp);
4789 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4790 mark_symbols_for_renaming (new_stmt);
4792 /* 3. Handle explicit realignment if necessary/supported.
4794 vec_dest = realign_load (msq, lsq, realignment_token) */
4795 if (alignment_support_scheme == dr_explicit_realign_optimized
4796 || alignment_support_scheme == dr_explicit_realign)
4798 lsq = gimple_assign_lhs (new_stmt);
4799 if (!realignment_token)
4800 realignment_token = dataref_ptr;
4801 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4803 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4806 new_temp = make_ssa_name (vec_dest, new_stmt);
4807 gimple_assign_set_lhs (new_stmt, new_temp);
4808 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4810 if (alignment_support_scheme == dr_explicit_realign_optimized)
4813 if (i == vec_num - 1 && j == ncopies - 1)
4814 add_phi_arg (phi, lsq,
4815 loop_latch_edge (containing_loop),
4821 /* 4. Handle invariant-load. */
4822 if (inv_p && !bb_vinfo)
4825 gimple_stmt_iterator gsi2 = *gsi;
4826 gcc_assert (!strided_load);
4829 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4832 tem = fold_convert (TREE_TYPE (vectype), tem);
4833 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4837 vec_inv = build_vector_from_val (vectype, tem);
4838 new_temp = vect_init_vector (stmt, vec_inv,
4840 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4845 tree perm_mask = perm_mask_for_reverse (vectype);
4846 new_temp = permute_vec_elements (new_temp, new_temp,
4847 perm_mask, stmt, gsi);
4848 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4851 /* Collect vector loads and later create their permutation in
4852 vect_transform_strided_load (). */
4853 if (strided_load || slp_perm)
4854 VEC_quick_push (tree, dr_chain, new_temp);
4856 /* Store vector loads in the corresponding SLP_NODE. */
4857 if (slp && !slp_perm)
4858 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4863 if (slp && !slp_perm)
4868 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4869 slp_node_instance, false))
4871 VEC_free (tree, heap, dr_chain);
4880 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4881 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4886 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4888 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4889 prev_stmt_info = vinfo_for_stmt (new_stmt);
4893 VEC_free (tree, heap, dr_chain);
4899 /* Function vect_is_simple_cond.
4902 LOOP - the loop that is being vectorized.
4903 COND - Condition that is checked for simple use.
4906 *COMP_VECTYPE - the vector type for the comparison.
4908 Returns whether a COND can be vectorized. Checks whether
4909 condition operands are supportable using vec_is_simple_use. */
4912 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
4917 enum vect_def_type dt;
4918 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4920 if (!COMPARISON_CLASS_P (cond))
4923 lhs = TREE_OPERAND (cond, 0);
4924 rhs = TREE_OPERAND (cond, 1);
4926 if (TREE_CODE (lhs) == SSA_NAME)
4928 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4929 if (!vect_is_simple_use_1 (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
4933 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4934 && TREE_CODE (lhs) != FIXED_CST)
4937 if (TREE_CODE (rhs) == SSA_NAME)
4939 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4940 if (!vect_is_simple_use_1 (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
4944 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4945 && TREE_CODE (rhs) != FIXED_CST)
4948 *comp_vectype = vectype1 ? vectype1 : vectype2;
4952 /* vectorizable_condition.
4954 Check if STMT is conditional modify expression that can be vectorized.
4955 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4956 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4959 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4960 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4961 else caluse if it is 2).
4963 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4966 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4967 gimple *vec_stmt, tree reduc_def, int reduc_index,
4970 tree scalar_dest = NULL_TREE;
4971 tree vec_dest = NULL_TREE;
4972 tree cond_expr, then_clause, else_clause;
4973 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4974 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4975 tree comp_vectype = NULL_TREE;
4976 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4977 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4978 tree vec_compare, vec_cond_expr;
4980 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4982 enum vect_def_type dt, dts[4];
4983 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4985 enum tree_code code;
4986 stmt_vec_info prev_stmt_info = NULL;
4988 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4989 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4990 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
4992 if (slp_node || PURE_SLP_STMT (stmt_info))
4995 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4997 gcc_assert (ncopies >= 1);
4998 if (reduc_index && ncopies > 1)
4999 return false; /* FORNOW */
5001 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5004 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5007 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5008 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5012 /* FORNOW: not yet supported. */
5013 if (STMT_VINFO_LIVE_P (stmt_info))
5015 if (vect_print_dump_info (REPORT_DETAILS))
5016 fprintf (vect_dump, "value used after loop.");
5020 /* Is vectorizable conditional operation? */
5021 if (!is_gimple_assign (stmt))
5024 code = gimple_assign_rhs_code (stmt);
5026 if (code != COND_EXPR)
5029 cond_expr = gimple_assign_rhs1 (stmt);
5030 then_clause = gimple_assign_rhs2 (stmt);
5031 else_clause = gimple_assign_rhs3 (stmt);
5033 if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo, &comp_vectype)
5037 if (TREE_CODE (then_clause) == SSA_NAME)
5039 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5040 if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
5041 &then_def_stmt, &def, &dt))
5044 else if (TREE_CODE (then_clause) != INTEGER_CST
5045 && TREE_CODE (then_clause) != REAL_CST
5046 && TREE_CODE (then_clause) != FIXED_CST)
5049 if (TREE_CODE (else_clause) == SSA_NAME)
5051 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5052 if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
5053 &else_def_stmt, &def, &dt))
5056 else if (TREE_CODE (else_clause) != INTEGER_CST
5057 && TREE_CODE (else_clause) != REAL_CST
5058 && TREE_CODE (else_clause) != FIXED_CST)
5063 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5064 return expand_vec_cond_expr_p (vectype, comp_vectype);
5071 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5072 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5073 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5074 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5078 scalar_dest = gimple_assign_lhs (stmt);
5079 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5081 /* Handle cond expr. */
5082 for (j = 0; j < ncopies; j++)
5084 gimple new_stmt = NULL;
5089 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5090 VEC (slp_void_p, heap) *vec_defs;
5092 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5093 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5094 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5095 VEC_safe_push (tree, heap, ops, then_clause);
5096 VEC_safe_push (tree, heap, ops, else_clause);
5097 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5098 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5099 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5100 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5101 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5103 VEC_free (tree, heap, ops);
5104 VEC_free (slp_void_p, heap, vec_defs);
5110 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5112 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
5113 NULL, >emp, &def, &dts[0]);
5116 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5118 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5119 NULL, >emp, &def, &dts[1]);
5120 if (reduc_index == 1)
5121 vec_then_clause = reduc_def;
5124 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5126 vect_is_simple_use (then_clause, loop_vinfo,
5127 NULL, >emp, &def, &dts[2]);
5129 if (reduc_index == 2)
5130 vec_else_clause = reduc_def;
5133 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5135 vect_is_simple_use (else_clause, loop_vinfo,
5136 NULL, >emp, &def, &dts[3]);
5142 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5143 VEC_pop (tree, vec_oprnds0));
5144 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5145 VEC_pop (tree, vec_oprnds1));
5146 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5147 VEC_pop (tree, vec_oprnds2));
5148 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5149 VEC_pop (tree, vec_oprnds3));
5154 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5155 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5156 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5157 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5160 /* Arguments are ready. Create the new vector stmt. */
5161 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5163 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5164 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5165 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5167 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5168 vec_cond_lhs, vec_cond_rhs);
5169 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5170 vec_compare, vec_then_clause, vec_else_clause);
5172 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5173 new_temp = make_ssa_name (vec_dest, new_stmt);
5174 gimple_assign_set_lhs (new_stmt, new_temp);
5175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5177 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5184 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5186 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5188 prev_stmt_info = vinfo_for_stmt (new_stmt);
5191 VEC_free (tree, heap, vec_oprnds0);
5192 VEC_free (tree, heap, vec_oprnds1);
5193 VEC_free (tree, heap, vec_oprnds2);
5194 VEC_free (tree, heap, vec_oprnds3);
5200 /* Make sure the statement is vectorizable. */
5203 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5206 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5207 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5209 tree scalar_type, vectype;
5210 gimple pattern_stmt;
5211 gimple_seq pattern_def_seq;
5213 if (vect_print_dump_info (REPORT_DETAILS))
5215 fprintf (vect_dump, "==> examining statement: ");
5216 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5219 if (gimple_has_volatile_ops (stmt))
5221 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5222 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5227 /* Skip stmts that do not need to be vectorized. In loops this is expected
5229 - the COND_EXPR which is the loop exit condition
5230 - any LABEL_EXPRs in the loop
5231 - computations that are used only for array indexing or loop control.
5232 In basic blocks we only analyze statements that are a part of some SLP
5233 instance, therefore, all the statements are relevant.
5235 Pattern statement needs to be analyzed instead of the original statement
5236 if the original statement is not relevant. Otherwise, we analyze both
5239 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5240 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5241 && !STMT_VINFO_LIVE_P (stmt_info))
5243 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5245 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5246 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5248 /* Analyze PATTERN_STMT instead of the original stmt. */
5249 stmt = pattern_stmt;
5250 stmt_info = vinfo_for_stmt (pattern_stmt);
5251 if (vect_print_dump_info (REPORT_DETAILS))
5253 fprintf (vect_dump, "==> examining pattern statement: ");
5254 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5259 if (vect_print_dump_info (REPORT_DETAILS))
5260 fprintf (vect_dump, "irrelevant.");
5265 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5267 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5268 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5270 /* Analyze PATTERN_STMT too. */
5271 if (vect_print_dump_info (REPORT_DETAILS))
5273 fprintf (vect_dump, "==> examining pattern statement: ");
5274 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5277 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5281 if (is_pattern_stmt_p (stmt_info)
5282 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5284 gimple_stmt_iterator si;
5286 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5288 gimple pattern_def_stmt = gsi_stmt (si);
5289 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5290 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5292 /* Analyze def stmt of STMT if it's a pattern stmt. */
5293 if (vect_print_dump_info (REPORT_DETAILS))
5295 fprintf (vect_dump, "==> examining pattern def statement: ");
5296 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5299 if (!vect_analyze_stmt (pattern_def_stmt,
5300 need_to_vectorize, node))
5306 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5308 case vect_internal_def:
5311 case vect_reduction_def:
5312 case vect_nested_cycle:
5313 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5314 || relevance == vect_used_in_outer_by_reduction
5315 || relevance == vect_unused_in_scope));
5318 case vect_induction_def:
5319 case vect_constant_def:
5320 case vect_external_def:
5321 case vect_unknown_def_type:
5328 gcc_assert (PURE_SLP_STMT (stmt_info));
5330 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5331 if (vect_print_dump_info (REPORT_DETAILS))
5333 fprintf (vect_dump, "get vectype for scalar type: ");
5334 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5337 vectype = get_vectype_for_scalar_type (scalar_type);
5340 if (vect_print_dump_info (REPORT_DETAILS))
5342 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5343 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5348 if (vect_print_dump_info (REPORT_DETAILS))
5350 fprintf (vect_dump, "vectype: ");
5351 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5354 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5357 if (STMT_VINFO_RELEVANT_P (stmt_info))
5359 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5360 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5361 *need_to_vectorize = true;
5366 && (STMT_VINFO_RELEVANT_P (stmt_info)
5367 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5368 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5369 || vectorizable_shift (stmt, NULL, NULL, NULL)
5370 || vectorizable_operation (stmt, NULL, NULL, NULL)
5371 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5372 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5373 || vectorizable_call (stmt, NULL, NULL, NULL)
5374 || vectorizable_store (stmt, NULL, NULL, NULL)
5375 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5376 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5380 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5381 || vectorizable_shift (stmt, NULL, NULL, node)
5382 || vectorizable_operation (stmt, NULL, NULL, node)
5383 || vectorizable_assignment (stmt, NULL, NULL, node)
5384 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5385 || vectorizable_call (stmt, NULL, NULL, node)
5386 || vectorizable_store (stmt, NULL, NULL, node)
5387 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5392 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5394 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5395 fprintf (vect_dump, "supported: ");
5396 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5405 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5406 need extra handling, except for vectorizable reductions. */
5407 if (STMT_VINFO_LIVE_P (stmt_info)
5408 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5409 ok = vectorizable_live_operation (stmt, NULL, NULL);
5413 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5415 fprintf (vect_dump, "not vectorized: live stmt not ");
5416 fprintf (vect_dump, "supported: ");
5417 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5427 /* Function vect_transform_stmt.
5429 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5432 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5433 bool *strided_store, slp_tree slp_node,
5434 slp_instance slp_node_instance)
5436 bool is_store = false;
5437 gimple vec_stmt = NULL;
5438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5441 switch (STMT_VINFO_TYPE (stmt_info))
5443 case type_demotion_vec_info_type:
5444 case type_promotion_vec_info_type:
5445 case type_conversion_vec_info_type:
5446 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5450 case induc_vec_info_type:
5451 gcc_assert (!slp_node);
5452 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5456 case shift_vec_info_type:
5457 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5461 case op_vec_info_type:
5462 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5466 case assignment_vec_info_type:
5467 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5471 case load_vec_info_type:
5472 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5477 case store_vec_info_type:
5478 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5480 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5482 /* In case of interleaving, the whole chain is vectorized when the
5483 last store in the chain is reached. Store stmts before the last
5484 one are skipped, and there vec_stmt_info shouldn't be freed
5486 *strided_store = true;
5487 if (STMT_VINFO_VEC_STMT (stmt_info))
5494 case condition_vec_info_type:
5495 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5499 case call_vec_info_type:
5500 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5501 stmt = gsi_stmt (*gsi);
5504 case reduc_vec_info_type:
5505 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5510 if (!STMT_VINFO_LIVE_P (stmt_info))
5512 if (vect_print_dump_info (REPORT_DETAILS))
5513 fprintf (vect_dump, "stmt not supported.");
5518 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5519 is being vectorized, but outside the immediately enclosing loop. */
5521 && STMT_VINFO_LOOP_VINFO (stmt_info)
5522 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5523 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5524 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5525 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5526 || STMT_VINFO_RELEVANT (stmt_info) ==
5527 vect_used_in_outer_by_reduction))
5529 struct loop *innerloop = LOOP_VINFO_LOOP (
5530 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5531 imm_use_iterator imm_iter;
5532 use_operand_p use_p;
5536 if (vect_print_dump_info (REPORT_DETAILS))
5537 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5539 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5540 (to be used when vectorizing outer-loop stmts that use the DEF of
5542 if (gimple_code (stmt) == GIMPLE_PHI)
5543 scalar_dest = PHI_RESULT (stmt);
5545 scalar_dest = gimple_assign_lhs (stmt);
5547 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5549 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5551 exit_phi = USE_STMT (use_p);
5552 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5557 /* Handle stmts whose DEF is used outside the loop-nest that is
5558 being vectorized. */
5559 if (STMT_VINFO_LIVE_P (stmt_info)
5560 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5562 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5567 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5573 /* Remove a group of stores (for SLP or interleaving), free their
5577 vect_remove_stores (gimple first_stmt)
5579 gimple next = first_stmt;
5581 gimple_stmt_iterator next_si;
5585 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5587 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5588 if (is_pattern_stmt_p (stmt_info))
5589 next = STMT_VINFO_RELATED_STMT (stmt_info);
5590 /* Free the attached stmt_vec_info and remove the stmt. */
5591 next_si = gsi_for_stmt (next);
5592 gsi_remove (&next_si, true);
5593 free_stmt_vec_info (next);
5599 /* Function new_stmt_vec_info.
5601 Create and initialize a new stmt_vec_info struct for STMT. */
5604 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5605 bb_vec_info bb_vinfo)
5608 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5610 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5611 STMT_VINFO_STMT (res) = stmt;
5612 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5613 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5614 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5615 STMT_VINFO_LIVE_P (res) = false;
5616 STMT_VINFO_VECTYPE (res) = NULL;
5617 STMT_VINFO_VEC_STMT (res) = NULL;
5618 STMT_VINFO_VECTORIZABLE (res) = true;
5619 STMT_VINFO_IN_PATTERN_P (res) = false;
5620 STMT_VINFO_RELATED_STMT (res) = NULL;
5621 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5622 STMT_VINFO_DATA_REF (res) = NULL;
5624 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5625 STMT_VINFO_DR_OFFSET (res) = NULL;
5626 STMT_VINFO_DR_INIT (res) = NULL;
5627 STMT_VINFO_DR_STEP (res) = NULL;
5628 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5630 if (gimple_code (stmt) == GIMPLE_PHI
5631 && is_loop_header_bb_p (gimple_bb (stmt)))
5632 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5634 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5636 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5637 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5638 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5639 STMT_SLP_TYPE (res) = loop_vect;
5640 GROUP_FIRST_ELEMENT (res) = NULL;
5641 GROUP_NEXT_ELEMENT (res) = NULL;
5642 GROUP_SIZE (res) = 0;
5643 GROUP_STORE_COUNT (res) = 0;
5644 GROUP_GAP (res) = 0;
5645 GROUP_SAME_DR_STMT (res) = NULL;
5646 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5652 /* Create a hash table for stmt_vec_info. */
5655 init_stmt_vec_info_vec (void)
5657 gcc_assert (!stmt_vec_info_vec);
5658 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5662 /* Free hash table for stmt_vec_info. */
5665 free_stmt_vec_info_vec (void)
5667 gcc_assert (stmt_vec_info_vec);
5668 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5672 /* Free stmt vectorization related info. */
5675 free_stmt_vec_info (gimple stmt)
5677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5682 /* Check if this statement has a related "pattern stmt"
5683 (introduced by the vectorizer during the pattern recognition
5684 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5686 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5688 stmt_vec_info patt_info
5689 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5692 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5695 gimple_stmt_iterator si;
5696 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5697 free_stmt_vec_info (gsi_stmt (si));
5699 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5703 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5704 set_vinfo_for_stmt (stmt, NULL);
5709 /* Function get_vectype_for_scalar_type_and_size.
5711 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5715 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5717 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5718 enum machine_mode simd_mode;
5719 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5726 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5727 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5730 /* We can't build a vector type of elements with alignment bigger than
5732 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5735 /* For vector types of elements whose mode precision doesn't
5736 match their types precision we use a element type of mode
5737 precision. The vectorization routines will have to make sure
5738 they support the proper result truncation/extension.
5739 We also make sure to build vector types with INTEGER_TYPE
5740 component type only. */
5741 if (INTEGRAL_TYPE_P (scalar_type)
5742 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5743 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5744 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5745 TYPE_UNSIGNED (scalar_type));
5747 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5748 When the component mode passes the above test simply use a type
5749 corresponding to that mode. The theory is that any use that
5750 would cause problems with this will disable vectorization anyway. */
5751 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5752 && !INTEGRAL_TYPE_P (scalar_type)
5753 && !POINTER_TYPE_P (scalar_type))
5754 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5756 /* If no size was supplied use the mode the target prefers. Otherwise
5757 lookup a vector mode of the specified size. */
5759 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5761 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5762 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5766 vectype = build_vector_type (scalar_type, nunits);
5767 if (vect_print_dump_info (REPORT_DETAILS))
5769 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5770 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5776 if (vect_print_dump_info (REPORT_DETAILS))
5778 fprintf (vect_dump, "vectype: ");
5779 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5782 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5783 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5785 if (vect_print_dump_info (REPORT_DETAILS))
5786 fprintf (vect_dump, "mode not supported by target.");
5793 unsigned int current_vector_size;
5795 /* Function get_vectype_for_scalar_type.
5797 Returns the vector type corresponding to SCALAR_TYPE as supported
5801 get_vectype_for_scalar_type (tree scalar_type)
5804 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5805 current_vector_size);
5807 && current_vector_size == 0)
5808 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5812 /* Function get_same_sized_vectype
5814 Returns a vector type corresponding to SCALAR_TYPE of size
5815 VECTOR_TYPE if supported by the target. */
5818 get_same_sized_vectype (tree scalar_type, tree vector_type)
5820 return get_vectype_for_scalar_type_and_size
5821 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5824 /* Function vect_is_simple_use.
5827 LOOP_VINFO - the vect info of the loop that is being vectorized.
5828 BB_VINFO - the vect info of the basic block that is being vectorized.
5829 OPERAND - operand of a stmt in the loop or bb.
5830 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5832 Returns whether a stmt with OPERAND can be vectorized.
5833 For loops, supportable operands are constants, loop invariants, and operands
5834 that are defined by the current iteration of the loop. Unsupportable
5835 operands are those that are defined by a previous iteration of the loop (as
5836 is the case in reduction/induction computations).
5837 For basic blocks, supportable operands are constants and bb invariants.
5838 For now, operands defined outside the basic block are not supported. */
5841 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5842 bb_vec_info bb_vinfo, gimple *def_stmt,
5843 tree *def, enum vect_def_type *dt)
5846 stmt_vec_info stmt_vinfo;
5847 struct loop *loop = NULL;
5850 loop = LOOP_VINFO_LOOP (loop_vinfo);
5855 if (vect_print_dump_info (REPORT_DETAILS))
5857 fprintf (vect_dump, "vect_is_simple_use: operand ");
5858 print_generic_expr (vect_dump, operand, TDF_SLIM);
5861 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5863 *dt = vect_constant_def;
5867 if (is_gimple_min_invariant (operand))
5870 *dt = vect_external_def;
5874 if (TREE_CODE (operand) == PAREN_EXPR)
5876 if (vect_print_dump_info (REPORT_DETAILS))
5877 fprintf (vect_dump, "non-associatable copy.");
5878 operand = TREE_OPERAND (operand, 0);
5881 if (TREE_CODE (operand) != SSA_NAME)
5883 if (vect_print_dump_info (REPORT_DETAILS))
5884 fprintf (vect_dump, "not ssa-name.");
5888 *def_stmt = SSA_NAME_DEF_STMT (operand);
5889 if (*def_stmt == NULL)
5891 if (vect_print_dump_info (REPORT_DETAILS))
5892 fprintf (vect_dump, "no def_stmt.");
5896 if (vect_print_dump_info (REPORT_DETAILS))
5898 fprintf (vect_dump, "def_stmt: ");
5899 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5902 /* Empty stmt is expected only in case of a function argument.
5903 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5904 if (gimple_nop_p (*def_stmt))
5907 *dt = vect_external_def;
5911 bb = gimple_bb (*def_stmt);
5913 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5914 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5915 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5916 *dt = vect_external_def;
5919 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5920 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5923 if (*dt == vect_unknown_def_type)
5925 if (vect_print_dump_info (REPORT_DETAILS))
5926 fprintf (vect_dump, "Unsupported pattern.");
5930 if (vect_print_dump_info (REPORT_DETAILS))
5931 fprintf (vect_dump, "type of def: %d.",*dt);
5933 switch (gimple_code (*def_stmt))
5936 *def = gimple_phi_result (*def_stmt);
5940 *def = gimple_assign_lhs (*def_stmt);
5944 *def = gimple_call_lhs (*def_stmt);
5949 if (vect_print_dump_info (REPORT_DETAILS))
5950 fprintf (vect_dump, "unsupported defining stmt: ");
5957 /* Function vect_is_simple_use_1.
5959 Same as vect_is_simple_use_1 but also determines the vector operand
5960 type of OPERAND and stores it to *VECTYPE. If the definition of
5961 OPERAND is vect_uninitialized_def, vect_constant_def or
5962 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5963 is responsible to compute the best suited vector type for the
5967 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5968 bb_vec_info bb_vinfo, gimple *def_stmt,
5969 tree *def, enum vect_def_type *dt, tree *vectype)
5971 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5974 /* Now get a vector type if the def is internal, otherwise supply
5975 NULL_TREE and leave it up to the caller to figure out a proper
5976 type for the use stmt. */
5977 if (*dt == vect_internal_def
5978 || *dt == vect_induction_def
5979 || *dt == vect_reduction_def
5980 || *dt == vect_double_reduction_def
5981 || *dt == vect_nested_cycle)
5983 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5985 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5986 && !STMT_VINFO_RELEVANT (stmt_info)
5987 && !STMT_VINFO_LIVE_P (stmt_info))
5988 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5990 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5991 gcc_assert (*vectype != NULL_TREE);
5993 else if (*dt == vect_uninitialized_def
5994 || *dt == vect_constant_def
5995 || *dt == vect_external_def)
5996 *vectype = NULL_TREE;
6004 /* Function supportable_widening_operation
6006 Check whether an operation represented by the code CODE is a
6007 widening operation that is supported by the target platform in
6008 vector form (i.e., when operating on arguments of type VECTYPE_IN
6009 producing a result of type VECTYPE_OUT).
6011 Widening operations we currently support are NOP (CONVERT), FLOAT
6012 and WIDEN_MULT. This function checks if these operations are supported
6013 by the target platform either directly (via vector tree-codes), or via
6017 - CODE1 and CODE2 are codes of vector operations to be used when
6018 vectorizing the operation, if available.
6019 - DECL1 and DECL2 are decls of target builtin functions to be used
6020 when vectorizing the operation, if available. In this case,
6021 CODE1 and CODE2 are CALL_EXPR.
6022 - MULTI_STEP_CVT determines the number of required intermediate steps in
6023 case of multi-step conversion (like char->short->int - in that case
6024 MULTI_STEP_CVT will be 1).
6025 - INTERM_TYPES contains the intermediate type required to perform the
6026 widening operation (short in the above example). */
6029 supportable_widening_operation (enum tree_code code, gimple stmt,
6030 tree vectype_out, tree vectype_in,
6031 tree *decl1, tree *decl2,
6032 enum tree_code *code1, enum tree_code *code2,
6033 int *multi_step_cvt,
6034 VEC (tree, heap) **interm_types)
6036 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6037 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6038 struct loop *vect_loop = NULL;
6040 enum machine_mode vec_mode;
6041 enum insn_code icode1, icode2;
6042 optab optab1, optab2;
6043 tree vectype = vectype_in;
6044 tree wide_vectype = vectype_out;
6045 enum tree_code c1, c2;
6047 tree prev_type, intermediate_type;
6048 enum machine_mode intermediate_mode, prev_mode;
6049 optab optab3, optab4;
6051 *multi_step_cvt = 0;
6053 vect_loop = LOOP_VINFO_LOOP (loop_info);
6055 /* The result of a vectorized widening operation usually requires two vectors
6056 (because the widened results do not fit into one vector). The generated
6057 vector results would normally be expected to be generated in the same
6058 order as in the original scalar computation, i.e. if 8 results are
6059 generated in each vector iteration, they are to be organized as follows:
6060 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6062 However, in the special case that the result of the widening operation is
6063 used in a reduction computation only, the order doesn't matter (because
6064 when vectorizing a reduction we change the order of the computation).
6065 Some targets can take advantage of this and generate more efficient code.
6066 For example, targets like Altivec, that support widen_mult using a sequence
6067 of {mult_even,mult_odd} generate the following vectors:
6068 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6070 When vectorizing outer-loops, we execute the inner-loop sequentially
6071 (each vectorized inner-loop iteration contributes to VF outer-loop
6072 iterations in parallel). We therefore don't allow to change the order
6073 of the computation in the inner-loop during outer-loop vectorization. */
6076 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6077 && !nested_in_vect_loop_p (vect_loop, stmt))
6083 && code == WIDEN_MULT_EXPR
6084 && targetm.vectorize.builtin_mul_widen_even
6085 && targetm.vectorize.builtin_mul_widen_even (vectype)
6086 && targetm.vectorize.builtin_mul_widen_odd
6087 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6089 if (vect_print_dump_info (REPORT_DETAILS))
6090 fprintf (vect_dump, "Unordered widening operation detected.");
6092 *code1 = *code2 = CALL_EXPR;
6093 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6094 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6100 case WIDEN_MULT_EXPR:
6101 c1 = VEC_WIDEN_MULT_LO_EXPR;
6102 c2 = VEC_WIDEN_MULT_HI_EXPR;
6105 case WIDEN_LSHIFT_EXPR:
6106 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6107 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6111 c1 = VEC_UNPACK_LO_EXPR;
6112 c2 = VEC_UNPACK_HI_EXPR;
6116 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6117 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6120 case FIX_TRUNC_EXPR:
6121 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6122 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6123 computing the operation. */
6130 if (BYTES_BIG_ENDIAN)
6132 enum tree_code ctmp = c1;
6137 if (code == FIX_TRUNC_EXPR)
6139 /* The signedness is determined from output operand. */
6140 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6141 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6145 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6146 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6149 if (!optab1 || !optab2)
6152 vec_mode = TYPE_MODE (vectype);
6153 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6154 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6160 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6161 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6164 /* Check if it's a multi-step conversion that can be done using intermediate
6167 prev_type = vectype;
6168 prev_mode = vec_mode;
6170 if (!CONVERT_EXPR_CODE_P (code))
6173 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6174 intermediate steps in promotion sequence. We try
6175 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6177 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6178 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6180 intermediate_mode = insn_data[icode1].operand[0].mode;
6182 = lang_hooks.types.type_for_mode (intermediate_mode,
6183 TYPE_UNSIGNED (prev_type));
6184 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6185 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6187 if (!optab3 || !optab4
6188 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6189 || insn_data[icode1].operand[0].mode != intermediate_mode
6190 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6191 || insn_data[icode2].operand[0].mode != intermediate_mode
6192 || ((icode1 = optab_handler (optab3, intermediate_mode))
6193 == CODE_FOR_nothing)
6194 || ((icode2 = optab_handler (optab4, intermediate_mode))
6195 == CODE_FOR_nothing))
6198 VEC_quick_push (tree, *interm_types, intermediate_type);
6199 (*multi_step_cvt)++;
6201 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6202 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6205 prev_type = intermediate_type;
6206 prev_mode = intermediate_mode;
6209 VEC_free (tree, heap, *interm_types);
6214 /* Function supportable_narrowing_operation
6216 Check whether an operation represented by the code CODE is a
6217 narrowing operation that is supported by the target platform in
6218 vector form (i.e., when operating on arguments of type VECTYPE_IN
6219 and producing a result of type VECTYPE_OUT).
6221 Narrowing operations we currently support are NOP (CONVERT) and
6222 FIX_TRUNC. This function checks if these operations are supported by
6223 the target platform directly via vector tree-codes.
6226 - CODE1 is the code of a vector operation to be used when
6227 vectorizing the operation, if available.
6228 - MULTI_STEP_CVT determines the number of required intermediate steps in
6229 case of multi-step conversion (like int->short->char - in that case
6230 MULTI_STEP_CVT will be 1).
6231 - INTERM_TYPES contains the intermediate type required to perform the
6232 narrowing operation (short in the above example). */
6235 supportable_narrowing_operation (enum tree_code code,
6236 tree vectype_out, tree vectype_in,
6237 enum tree_code *code1, int *multi_step_cvt,
6238 VEC (tree, heap) **interm_types)
6240 enum machine_mode vec_mode;
6241 enum insn_code icode1;
6242 optab optab1, interm_optab;
6243 tree vectype = vectype_in;
6244 tree narrow_vectype = vectype_out;
6246 tree intermediate_type;
6247 enum machine_mode intermediate_mode, prev_mode;
6251 *multi_step_cvt = 0;
6255 c1 = VEC_PACK_TRUNC_EXPR;
6258 case FIX_TRUNC_EXPR:
6259 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6263 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6264 tree code and optabs used for computing the operation. */
6271 if (code == FIX_TRUNC_EXPR)
6272 /* The signedness is determined from output operand. */
6273 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6275 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6280 vec_mode = TYPE_MODE (vectype);
6281 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6286 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6289 /* Check if it's a multi-step conversion that can be done using intermediate
6291 prev_mode = vec_mode;
6292 if (code == FIX_TRUNC_EXPR)
6293 uns = TYPE_UNSIGNED (vectype_out);
6295 uns = TYPE_UNSIGNED (vectype);
6297 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6298 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6299 costly than signed. */
6300 if (code == FIX_TRUNC_EXPR && uns)
6302 enum insn_code icode2;
6305 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6307 = optab_for_tree_code (c1, intermediate_type, optab_default);
6308 if (interm_optab != NULL
6309 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6310 && insn_data[icode1].operand[0].mode
6311 == insn_data[icode2].operand[0].mode)
6314 optab1 = interm_optab;
6319 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6320 intermediate steps in promotion sequence. We try
6321 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6322 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6323 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6325 intermediate_mode = insn_data[icode1].operand[0].mode;
6327 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6329 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6332 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6333 || insn_data[icode1].operand[0].mode != intermediate_mode
6334 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6335 == CODE_FOR_nothing))
6338 VEC_quick_push (tree, *interm_types, intermediate_type);
6339 (*multi_step_cvt)++;
6341 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6344 prev_mode = intermediate_mode;
6345 optab1 = interm_optab;
6348 VEC_free (tree, heap, *interm_types);