1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
335 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
339 Generally, LIVE_P and RELEVANT are used to define the liveness and
340 relevance info of the DEF_STMT of this USE:
341 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
342 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
344 - case 1: If USE is used only for address computations (e.g. array indexing),
345 which does not need to be directly vectorized, then the liveness/relevance
346 of the respective DEF_STMT is left unchanged.
347 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
348 skip DEF_STMT cause it had already been processed.
349 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
350 be modified accordingly.
352 Return true if everything is as expected. Return false otherwise. */
355 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
356 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
359 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
360 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
361 stmt_vec_info dstmt_vinfo;
362 basic_block bb, def_bb;
365 enum vect_def_type dt;
367 /* case 1: we are only interested in uses that need to be vectorized. Uses
368 that are used for address computation are not considered relevant. */
369 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
372 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
375 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
379 if (!def_stmt || gimple_nop_p (def_stmt))
382 def_bb = gimple_bb (def_stmt);
383 if (!flow_bb_inside_loop_p (loop, def_bb))
385 if (vect_print_dump_info (REPORT_DETAILS))
386 fprintf (vect_dump, "def_stmt is out of loop.");
390 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
391 DEF_STMT must have already been processed, because this should be the
392 only way that STMT, which is a reduction-phi, was put in the worklist,
393 as there should be no other uses for DEF_STMT in the loop. So we just
394 check that everything is as expected, and we are done. */
395 dstmt_vinfo = vinfo_for_stmt (def_stmt);
396 bb = gimple_bb (stmt);
397 if (gimple_code (stmt) == GIMPLE_PHI
398 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
399 && gimple_code (def_stmt) != GIMPLE_PHI
400 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
401 && bb->loop_father == def_bb->loop_father)
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
405 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
406 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
407 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
408 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
409 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
413 /* case 3a: outer-loop stmt defining an inner-loop stmt:
414 outer-loop-header-bb:
420 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
422 if (vect_print_dump_info (REPORT_DETAILS))
423 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
427 case vect_unused_in_scope:
428 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
429 vect_used_in_scope : vect_unused_in_scope;
432 case vect_used_in_outer_by_reduction:
433 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
434 relevant = vect_used_by_reduction;
437 case vect_used_in_outer:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_in_scope;
442 case vect_used_in_scope:
450 /* case 3b: inner-loop stmt defining an outer-loop stmt:
451 outer-loop-header-bb:
455 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
457 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
464 case vect_unused_in_scope:
465 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
467 vect_used_in_outer_by_reduction : vect_unused_in_scope;
470 case vect_used_by_reduction:
471 relevant = vect_used_in_outer_by_reduction;
474 case vect_used_in_scope:
475 relevant = vect_used_in_outer;
483 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
484 is_pattern_stmt_p (stmt_vinfo));
489 /* Function vect_mark_stmts_to_be_vectorized.
491 Not all stmts in the loop need to be vectorized. For example:
500 Stmt 1 and 3 do not need to be vectorized, because loop control and
501 addressing of vectorized data-refs are handled differently.
503 This pass detects such stmts. */
506 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
508 VEC(gimple,heap) *worklist;
509 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
510 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
511 unsigned int nbbs = loop->num_nodes;
512 gimple_stmt_iterator si;
515 stmt_vec_info stmt_vinfo;
519 enum vect_relevant relevant, tmp_relevant;
520 enum vect_def_type def_type;
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
525 worklist = VEC_alloc (gimple, heap, 64);
527 /* 1. Init worklist. */
528 for (i = 0; i < nbbs; i++)
531 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
534 if (vect_print_dump_info (REPORT_DETAILS))
536 fprintf (vect_dump, "init: phi relevant? ");
537 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
540 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
541 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
545 stmt = gsi_stmt (si);
546 if (vect_print_dump_info (REPORT_DETAILS))
548 fprintf (vect_dump, "init: stmt relevant? ");
549 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
552 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
553 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
557 /* 2. Process_worklist */
558 while (VEC_length (gimple, worklist) > 0)
563 stmt = VEC_pop (gimple, worklist);
564 if (vect_print_dump_info (REPORT_DETAILS))
566 fprintf (vect_dump, "worklist: examine stmt: ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
570 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
571 (DEF_STMT) as relevant/irrelevant and live/dead according to the
572 liveness and relevance properties of STMT. */
573 stmt_vinfo = vinfo_for_stmt (stmt);
574 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
575 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
577 /* Generally, the liveness and relevance properties of STMT are
578 propagated as is to the DEF_STMTs of its USEs:
579 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
580 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
582 One exception is when STMT has been identified as defining a reduction
583 variable; in this case we set the liveness/relevance as follows:
585 relevant = vect_used_by_reduction
586 This is because we distinguish between two kinds of relevant stmts -
587 those that are used by a reduction computation, and those that are
588 (also) used by a regular computation. This allows us later on to
589 identify stmts that are used solely by a reduction, and therefore the
590 order of the results that they produce does not have to be kept. */
592 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
593 tmp_relevant = relevant;
596 case vect_reduction_def:
597 switch (tmp_relevant)
599 case vect_unused_in_scope:
600 relevant = vect_used_by_reduction;
603 case vect_used_by_reduction:
604 if (gimple_code (stmt) == GIMPLE_PHI)
609 if (vect_print_dump_info (REPORT_DETAILS))
610 fprintf (vect_dump, "unsupported use of reduction.");
612 VEC_free (gimple, heap, worklist);
619 case vect_nested_cycle:
620 if (tmp_relevant != vect_unused_in_scope
621 && tmp_relevant != vect_used_in_outer_by_reduction
622 && tmp_relevant != vect_used_in_outer)
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "unsupported use of nested cycle.");
627 VEC_free (gimple, heap, worklist);
634 case vect_double_reduction_def:
635 if (tmp_relevant != vect_unused_in_scope
636 && tmp_relevant != vect_used_by_reduction)
638 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "unsupported use of double reduction.");
641 VEC_free (gimple, heap, worklist);
652 if (is_pattern_stmt_p (stmt_vinfo))
654 /* Pattern statements are not inserted into the code, so
655 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
656 have to scan the RHS or function arguments instead. */
657 if (is_gimple_assign (stmt))
659 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
660 tree op = gimple_assign_rhs1 (stmt);
663 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
665 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
666 live_p, relevant, &worklist, false)
667 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
668 live_p, relevant, &worklist, false))
670 VEC_free (gimple, heap, worklist);
675 for (; i < gimple_num_ops (stmt); i++)
677 op = gimple_op (stmt, i);
678 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
681 VEC_free (gimple, heap, worklist);
686 else if (is_gimple_call (stmt))
688 for (i = 0; i < gimple_call_num_args (stmt); i++)
690 tree arg = gimple_call_arg (stmt, i);
691 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
694 VEC_free (gimple, heap, worklist);
701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
703 tree op = USE_FROM_PTR (use_p);
704 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
707 VEC_free (gimple, heap, worklist);
712 if (STMT_VINFO_GATHER_P (stmt_vinfo))
715 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
717 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
720 VEC_free (gimple, heap, worklist);
724 } /* while worklist */
726 VEC_free (gimple, heap, worklist);
731 /* Get cost by calling cost target builtin. */
734 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
736 tree dummy_type = NULL;
739 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
744 /* Get cost for STMT. */
747 cost_for_stmt (gimple stmt)
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
751 switch (STMT_VINFO_TYPE (stmt_info))
753 case load_vec_info_type:
754 return vect_get_stmt_cost (scalar_load);
755 case store_vec_info_type:
756 return vect_get_stmt_cost (scalar_store);
757 case op_vec_info_type:
758 case condition_vec_info_type:
759 case assignment_vec_info_type:
760 case reduc_vec_info_type:
761 case induc_vec_info_type:
762 case type_promotion_vec_info_type:
763 case type_demotion_vec_info_type:
764 case type_conversion_vec_info_type:
765 case call_vec_info_type:
766 return vect_get_stmt_cost (scalar_stmt);
767 case undef_vec_info_type:
773 /* Function vect_model_simple_cost.
775 Models cost for simple operations, i.e. those that only emit ncopies of a
776 single op. Right now, this does not account for multiple insns that could
777 be generated for the single vector op. We will handle that shortly. */
780 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
781 enum vect_def_type *dt, slp_tree slp_node)
784 int inside_cost = 0, outside_cost = 0;
786 /* The SLP costs were already calculated during SLP tree build. */
787 if (PURE_SLP_STMT (stmt_info))
790 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
792 /* FORNOW: Assuming maximum 2 args per stmts. */
793 for (i = 0; i < 2; i++)
795 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
796 outside_cost += vect_get_stmt_cost (vector_stmt);
799 if (vect_print_dump_info (REPORT_COST))
800 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
801 "outside_cost = %d .", inside_cost, outside_cost);
803 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
804 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
805 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
809 /* Function vect_cost_strided_group_size
811 For strided load or store, return the group_size only if it is the first
812 load or store of a group, else return 1. This ensures that group size is
813 only returned once per group. */
816 vect_cost_strided_group_size (stmt_vec_info stmt_info)
818 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
820 if (first_stmt == STMT_VINFO_STMT (stmt_info))
821 return GROUP_SIZE (stmt_info);
827 /* Function vect_model_store_cost
829 Models cost for stores. In the case of strided accesses, one access
830 has the overhead of the strided access attributed to it. */
833 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
834 bool store_lanes_p, enum vect_def_type dt,
838 unsigned int inside_cost = 0, outside_cost = 0;
839 struct data_reference *first_dr;
842 /* The SLP costs were already calculated during SLP tree build. */
843 if (PURE_SLP_STMT (stmt_info))
846 if (dt == vect_constant_def || dt == vect_external_def)
847 outside_cost = vect_get_stmt_cost (scalar_to_vec);
849 /* Strided access? */
850 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
854 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
859 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
860 group_size = vect_cost_strided_group_size (stmt_info);
863 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
865 /* Not a strided access. */
869 first_dr = STMT_VINFO_DATA_REF (stmt_info);
872 /* We assume that the cost of a single store-lanes instruction is
873 equivalent to the cost of GROUP_SIZE separate stores. If a strided
874 access is instead being provided by a permute-and-store operation,
875 include the cost of the permutes. */
876 if (!store_lanes_p && group_size > 1)
878 /* Uses a high and low interleave operation for each needed permute. */
879 inside_cost = ncopies * exact_log2(group_size) * group_size
880 * vect_get_stmt_cost (vector_stmt);
882 if (vect_print_dump_info (REPORT_COST))
883 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
888 /* Costs of the stores. */
889 vect_get_store_cost (first_dr, ncopies, &inside_cost);
891 if (vect_print_dump_info (REPORT_COST))
892 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
893 "outside_cost = %d .", inside_cost, outside_cost);
895 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
896 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
897 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
901 /* Calculate cost of DR's memory access. */
903 vect_get_store_cost (struct data_reference *dr, int ncopies,
904 unsigned int *inside_cost)
906 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
908 switch (alignment_support_scheme)
912 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: aligned.");
920 case dr_unaligned_supported:
922 gimple stmt = DR_STMT (dr);
923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926 /* Here, we assign an additional cost for the unaligned store. */
927 *inside_cost += ncopies
928 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
929 vectype, DR_MISALIGNMENT (dr));
931 if (vect_print_dump_info (REPORT_COST))
932 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
944 /* Function vect_model_load_cost
946 Models cost for loads. In the case of strided accesses, the last access
947 has the overhead of the strided access attributed to it. Since unaligned
948 accesses are supported for loads, we also account for the costs of the
949 access scheme chosen. */
952 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
957 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
958 unsigned int inside_cost = 0, outside_cost = 0;
960 /* The SLP costs were already calculated during SLP tree build. */
961 if (PURE_SLP_STMT (stmt_info))
964 /* Strided accesses? */
965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
966 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
968 group_size = vect_cost_strided_group_size (stmt_info);
969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
971 /* Not a strided access. */
978 /* We assume that the cost of a single load-lanes instruction is
979 equivalent to the cost of GROUP_SIZE separate loads. If a strided
980 access is instead being provided by a load-and-permute operation,
981 include the cost of the permutes. */
982 if (!load_lanes_p && group_size > 1)
984 /* Uses an even and odd extract operations for each needed permute. */
985 inside_cost = ncopies * exact_log2(group_size) * group_size
986 * vect_get_stmt_cost (vector_stmt);
988 if (vect_print_dump_info (REPORT_COST))
989 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
993 /* The loads themselves. */
994 vect_get_load_cost (first_dr, ncopies,
995 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
997 &inside_cost, &outside_cost);
999 if (vect_print_dump_info (REPORT_COST))
1000 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1001 "outside_cost = %d .", inside_cost, outside_cost);
1003 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1004 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1005 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1009 /* Calculate cost of DR's memory access. */
1011 vect_get_load_cost (struct data_reference *dr, int ncopies,
1012 bool add_realign_cost, unsigned int *inside_cost,
1013 unsigned int *outside_cost)
1015 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1017 switch (alignment_support_scheme)
1021 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1023 if (vect_print_dump_info (REPORT_COST))
1024 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1028 case dr_unaligned_supported:
1030 gimple stmt = DR_STMT (dr);
1031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1034 /* Here, we assign an additional cost for the unaligned load. */
1035 *inside_cost += ncopies
1036 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1037 vectype, DR_MISALIGNMENT (dr));
1038 if (vect_print_dump_info (REPORT_COST))
1039 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1044 case dr_explicit_realign:
1046 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1047 + vect_get_stmt_cost (vector_stmt));
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += vect_get_stmt_cost (vector_stmt);
1057 case dr_explicit_realign_optimized:
1059 if (vect_print_dump_info (REPORT_COST))
1060 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1063 /* Unaligned software pipeline has a load of an address, an initial
1064 load, and possibly a mask operation to "prime" the loop. However,
1065 if this is an access in a group of loads, which provide strided
1066 access, then the above cost should only be considered for one
1067 access in the group. Inside the loop, there is a load op
1068 and a realignment op. */
1070 if (add_realign_cost)
1072 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1073 if (targetm.vectorize.builtin_mask_for_load)
1074 *outside_cost += vect_get_stmt_cost (vector_stmt);
1077 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1078 + vect_get_stmt_cost (vector_stmt));
1088 /* Function vect_init_vector.
1090 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1091 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1092 is not NULL. Otherwise, place the initialization at the loop preheader.
1093 Return the DEF of INIT_STMT.
1094 It will be used in the vectorization of STMT. */
1097 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1098 gimple_stmt_iterator *gsi)
1100 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1108 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1109 add_referenced_var (new_var);
1110 init_stmt = gimple_build_assign (new_var, vector_var);
1111 new_temp = make_ssa_name (new_var, init_stmt);
1112 gimple_assign_set_lhs (init_stmt, new_temp);
1115 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1122 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 if (nested_in_vect_loop_p (loop, stmt))
1127 pe = loop_preheader_edge (loop);
1128 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1129 gcc_assert (!new_bb);
1133 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1135 gimple_stmt_iterator gsi_bb_start;
1137 gcc_assert (bb_vinfo);
1138 bb = BB_VINFO_BB (bb_vinfo);
1139 gsi_bb_start = gsi_after_labels (bb);
1140 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1144 if (vect_print_dump_info (REPORT_DETAILS))
1146 fprintf (vect_dump, "created new init_stmt: ");
1147 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1150 vec_oprnd = gimple_assign_lhs (init_stmt);
1155 /* Function vect_get_vec_def_for_operand.
1157 OP is an operand in STMT. This function returns a (vector) def that will be
1158 used in the vectorized stmt for STMT.
1160 In the case that OP is an SSA_NAME which is defined in the loop, then
1161 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1163 In case OP is an invariant or constant, a new stmt that creates a vector def
1164 needs to be introduced. */
1167 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1172 stmt_vec_info def_stmt_info = NULL;
1173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1174 unsigned int nunits;
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1181 enum vect_def_type dt;
1185 if (vect_print_dump_info (REPORT_DETAILS))
1187 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1188 print_generic_expr (vect_dump, op, TDF_SLIM);
1191 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1193 gcc_assert (is_simple_use);
1194 if (vect_print_dump_info (REPORT_DETAILS))
1198 fprintf (vect_dump, "def = ");
1199 print_generic_expr (vect_dump, def, TDF_SLIM);
1203 fprintf (vect_dump, " def_stmt = ");
1204 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1210 /* Case 1: operand is a constant. */
1211 case vect_constant_def:
1213 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1214 gcc_assert (vector_type);
1215 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1220 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1224 vec_cst = build_vector_from_val (vector_type,
1225 fold_convert (TREE_TYPE (vector_type),
1227 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1230 /* Case 2: operand is defined outside the loop - loop invariant. */
1231 case vect_external_def:
1233 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1234 gcc_assert (vector_type);
1235 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1240 /* Create 'vec_inv = {inv,inv,..,inv}' */
1241 if (vect_print_dump_info (REPORT_DETAILS))
1242 fprintf (vect_dump, "Create vector_inv.");
1244 for (i = nunits - 1; i >= 0; --i)
1246 t = tree_cons (NULL_TREE, def, t);
1249 /* FIXME: use build_constructor directly. */
1250 vec_inv = build_constructor_from_list (vector_type, t);
1251 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1254 /* Case 3: operand is defined inside the loop. */
1255 case vect_internal_def:
1258 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1260 /* Get the def from the vectorized stmt. */
1261 def_stmt_info = vinfo_for_stmt (def_stmt);
1263 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1264 /* Get vectorized pattern statement. */
1266 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1267 && !STMT_VINFO_RELEVANT (def_stmt_info))
1268 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1269 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1270 gcc_assert (vec_stmt);
1271 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1272 vec_oprnd = PHI_RESULT (vec_stmt);
1273 else if (is_gimple_call (vec_stmt))
1274 vec_oprnd = gimple_call_lhs (vec_stmt);
1276 vec_oprnd = gimple_assign_lhs (vec_stmt);
1280 /* Case 4: operand is defined by a loop header phi - reduction */
1281 case vect_reduction_def:
1282 case vect_double_reduction_def:
1283 case vect_nested_cycle:
1287 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1288 loop = (gimple_bb (def_stmt))->loop_father;
1290 /* Get the def before the loop */
1291 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1292 return get_initial_def_for_reduction (stmt, op, scalar_def);
1295 /* Case 5: operand is defined by loop-header phi - induction. */
1296 case vect_induction_def:
1298 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1300 /* Get the def from the vectorized stmt. */
1301 def_stmt_info = vinfo_for_stmt (def_stmt);
1302 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1303 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1304 vec_oprnd = PHI_RESULT (vec_stmt);
1306 vec_oprnd = gimple_get_lhs (vec_stmt);
1316 /* Function vect_get_vec_def_for_stmt_copy
1318 Return a vector-def for an operand. This function is used when the
1319 vectorized stmt to be created (by the caller to this function) is a "copy"
1320 created in case the vectorized result cannot fit in one vector, and several
1321 copies of the vector-stmt are required. In this case the vector-def is
1322 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1323 of the stmt that defines VEC_OPRND.
1324 DT is the type of the vector def VEC_OPRND.
1327 In case the vectorization factor (VF) is bigger than the number
1328 of elements that can fit in a vectype (nunits), we have to generate
1329 more than one vector stmt to vectorize the scalar stmt. This situation
1330 arises when there are multiple data-types operated upon in the loop; the
1331 smallest data-type determines the VF, and as a result, when vectorizing
1332 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1333 vector stmt (each computing a vector of 'nunits' results, and together
1334 computing 'VF' results in each iteration). This function is called when
1335 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1336 which VF=16 and nunits=4, so the number of copies required is 4):
1338 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1340 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1341 VS1.1: vx.1 = memref1 VS1.2
1342 VS1.2: vx.2 = memref2 VS1.3
1343 VS1.3: vx.3 = memref3
1345 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1346 VSnew.1: vz1 = vx.1 + ... VSnew.2
1347 VSnew.2: vz2 = vx.2 + ... VSnew.3
1348 VSnew.3: vz3 = vx.3 + ...
1350 The vectorization of S1 is explained in vectorizable_load.
1351 The vectorization of S2:
1352 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1353 the function 'vect_get_vec_def_for_operand' is called to
1354 get the relevant vector-def for each operand of S2. For operand x it
1355 returns the vector-def 'vx.0'.
1357 To create the remaining copies of the vector-stmt (VSnew.j), this
1358 function is called to get the relevant vector-def for each operand. It is
1359 obtained from the respective VS1.j stmt, which is recorded in the
1360 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1362 For example, to obtain the vector-def 'vx.1' in order to create the
1363 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1364 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1365 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1366 and return its def ('vx.1').
1367 Overall, to create the above sequence this function will be called 3 times:
1368 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1369 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1370 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1373 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1375 gimple vec_stmt_for_operand;
1376 stmt_vec_info def_stmt_info;
1378 /* Do nothing; can reuse same def. */
1379 if (dt == vect_external_def || dt == vect_constant_def )
1382 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1383 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1384 gcc_assert (def_stmt_info);
1385 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1386 gcc_assert (vec_stmt_for_operand);
1387 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1388 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1391 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1396 /* Get vectorized definitions for the operands to create a copy of an original
1397 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1400 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1401 VEC(tree,heap) **vec_oprnds0,
1402 VEC(tree,heap) **vec_oprnds1)
1404 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1406 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1407 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1409 if (vec_oprnds1 && *vec_oprnds1)
1411 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1412 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1413 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1418 /* Get vectorized definitions for OP0 and OP1.
1419 REDUC_INDEX is the index of reduction operand in case of reduction,
1420 and -1 otherwise. */
1423 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1424 VEC (tree, heap) **vec_oprnds0,
1425 VEC (tree, heap) **vec_oprnds1,
1426 slp_tree slp_node, int reduc_index)
1430 int nops = (op1 == NULL_TREE) ? 1 : 2;
1431 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1432 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1434 VEC_quick_push (tree, ops, op0);
1436 VEC_quick_push (tree, ops, op1);
1438 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1440 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1442 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1444 VEC_free (tree, heap, ops);
1445 VEC_free (slp_void_p, heap, vec_defs);
1451 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1452 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1453 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1457 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1458 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1459 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1465 /* Function vect_finish_stmt_generation.
1467 Insert a new stmt. */
1470 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1471 gimple_stmt_iterator *gsi)
1473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1474 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1475 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1477 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1479 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1481 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1484 if (vect_print_dump_info (REPORT_DETAILS))
1486 fprintf (vect_dump, "add new stmt: ");
1487 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1490 gimple_set_location (vec_stmt, gimple_location (stmt));
1493 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1494 a function declaration if the target has a vectorized version
1495 of the function, or NULL_TREE if the function cannot be vectorized. */
1498 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1500 tree fndecl = gimple_call_fndecl (call);
1502 /* We only handle functions that do not read or clobber memory -- i.e.
1503 const or novops ones. */
1504 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1508 || TREE_CODE (fndecl) != FUNCTION_DECL
1509 || !DECL_BUILT_IN (fndecl))
1512 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1516 /* Function vectorizable_call.
1518 Check if STMT performs a function call that can be vectorized.
1519 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1520 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1521 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1524 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1530 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1532 tree vectype_out, vectype_in;
1535 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1536 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1537 tree fndecl, new_temp, def, rhs_type;
1539 enum vect_def_type dt[3]
1540 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1541 gimple new_stmt = NULL;
1543 VEC(tree, heap) *vargs = NULL;
1544 enum { NARROW, NONE, WIDEN } modifier;
1548 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1551 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1554 /* Is STMT a vectorizable call? */
1555 if (!is_gimple_call (stmt))
1558 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1561 if (stmt_can_throw_internal (stmt))
1564 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1566 /* Process function arguments. */
1567 rhs_type = NULL_TREE;
1568 vectype_in = NULL_TREE;
1569 nargs = gimple_call_num_args (stmt);
1571 /* Bail out if the function has more than three arguments, we do not have
1572 interesting builtin functions to vectorize with more than two arguments
1573 except for fma. No arguments is also not good. */
1574 if (nargs == 0 || nargs > 3)
1577 for (i = 0; i < nargs; i++)
1581 op = gimple_call_arg (stmt, i);
1583 /* We can only handle calls with arguments of the same type. */
1585 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 fprintf (vect_dump, "argument types differ.");
1592 rhs_type = TREE_TYPE (op);
1594 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1595 &def_stmt, &def, &dt[i], &opvectype))
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "use not simple.");
1603 vectype_in = opvectype;
1605 && opvectype != vectype_in)
1607 if (vect_print_dump_info (REPORT_DETAILS))
1608 fprintf (vect_dump, "argument vector types differ.");
1612 /* If all arguments are external or constant defs use a vector type with
1613 the same size as the output vector type. */
1615 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1617 gcc_assert (vectype_in);
1620 if (vect_print_dump_info (REPORT_DETAILS))
1622 fprintf (vect_dump, "no vectype for scalar type ");
1623 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1630 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1632 if (nunits_in == nunits_out / 2)
1634 else if (nunits_out == nunits_in)
1636 else if (nunits_out == nunits_in / 2)
1641 /* For now, we only vectorize functions if a target specific builtin
1642 is available. TODO -- in some cases, it might be profitable to
1643 insert the calls for pieces of the vector, in order to be able
1644 to vectorize other operations in the loop. */
1645 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1646 if (fndecl == NULL_TREE)
1648 if (vect_print_dump_info (REPORT_DETAILS))
1649 fprintf (vect_dump, "function is not vectorizable.");
1654 gcc_assert (!gimple_vuse (stmt));
1656 if (slp_node || PURE_SLP_STMT (stmt_info))
1658 else if (modifier == NARROW)
1659 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1661 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1663 /* Sanity check: make sure that at least one copy of the vectorized stmt
1664 needs to be generated. */
1665 gcc_assert (ncopies >= 1);
1667 if (!vec_stmt) /* transformation not required. */
1669 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1670 if (vect_print_dump_info (REPORT_DETAILS))
1671 fprintf (vect_dump, "=== vectorizable_call ===");
1672 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1678 if (vect_print_dump_info (REPORT_DETAILS))
1679 fprintf (vect_dump, "transform call.");
1682 scalar_dest = gimple_call_lhs (stmt);
1683 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1685 prev_stmt_info = NULL;
1689 for (j = 0; j < ncopies; ++j)
1691 /* Build argument list for the vectorized call. */
1693 vargs = VEC_alloc (tree, heap, nargs);
1695 VEC_truncate (tree, vargs, 0);
1699 VEC (slp_void_p, heap) *vec_defs
1700 = VEC_alloc (slp_void_p, heap, nargs);
1701 VEC (tree, heap) *vec_oprnds0;
1703 for (i = 0; i < nargs; i++)
1704 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1705 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1707 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1709 /* Arguments are ready. Create the new vector stmt. */
1710 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1713 for (k = 0; k < nargs; k++)
1715 VEC (tree, heap) *vec_oprndsk
1716 = (VEC (tree, heap) *)
1717 VEC_index (slp_void_p, vec_defs, k);
1718 VEC_replace (tree, vargs, k,
1719 VEC_index (tree, vec_oprndsk, i));
1721 new_stmt = gimple_build_call_vec (fndecl, vargs);
1722 new_temp = make_ssa_name (vec_dest, new_stmt);
1723 gimple_call_set_lhs (new_stmt, new_temp);
1724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1725 mark_symbols_for_renaming (new_stmt);
1726 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1730 for (i = 0; i < nargs; i++)
1732 VEC (tree, heap) *vec_oprndsi
1733 = (VEC (tree, heap) *)
1734 VEC_index (slp_void_p, vec_defs, i);
1735 VEC_free (tree, heap, vec_oprndsi);
1737 VEC_free (slp_void_p, heap, vec_defs);
1741 for (i = 0; i < nargs; i++)
1743 op = gimple_call_arg (stmt, i);
1746 = vect_get_vec_def_for_operand (op, stmt, NULL);
1749 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1751 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1754 VEC_quick_push (tree, vargs, vec_oprnd0);
1757 new_stmt = gimple_build_call_vec (fndecl, vargs);
1758 new_temp = make_ssa_name (vec_dest, new_stmt);
1759 gimple_call_set_lhs (new_stmt, new_temp);
1761 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1762 mark_symbols_for_renaming (new_stmt);
1765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1769 prev_stmt_info = vinfo_for_stmt (new_stmt);
1775 for (j = 0; j < ncopies; ++j)
1777 /* Build argument list for the vectorized call. */
1779 vargs = VEC_alloc (tree, heap, nargs * 2);
1781 VEC_truncate (tree, vargs, 0);
1785 VEC (slp_void_p, heap) *vec_defs
1786 = VEC_alloc (slp_void_p, heap, nargs);
1787 VEC (tree, heap) *vec_oprnds0;
1789 for (i = 0; i < nargs; i++)
1790 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1791 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1793 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1795 /* Arguments are ready. Create the new vector stmt. */
1796 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1800 VEC_truncate (tree, vargs, 0);
1801 for (k = 0; k < nargs; k++)
1803 VEC (tree, heap) *vec_oprndsk
1804 = (VEC (tree, heap) *)
1805 VEC_index (slp_void_p, vec_defs, k);
1806 VEC_quick_push (tree, vargs,
1807 VEC_index (tree, vec_oprndsk, i));
1808 VEC_quick_push (tree, vargs,
1809 VEC_index (tree, vec_oprndsk, i + 1));
1811 new_stmt = gimple_build_call_vec (fndecl, vargs);
1812 new_temp = make_ssa_name (vec_dest, new_stmt);
1813 gimple_call_set_lhs (new_stmt, new_temp);
1814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1815 mark_symbols_for_renaming (new_stmt);
1816 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1820 for (i = 0; i < nargs; i++)
1822 VEC (tree, heap) *vec_oprndsi
1823 = (VEC (tree, heap) *)
1824 VEC_index (slp_void_p, vec_defs, i);
1825 VEC_free (tree, heap, vec_oprndsi);
1827 VEC_free (slp_void_p, heap, vec_defs);
1831 for (i = 0; i < nargs; i++)
1833 op = gimple_call_arg (stmt, i);
1837 = vect_get_vec_def_for_operand (op, stmt, NULL);
1839 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1843 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1845 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1847 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1850 VEC_quick_push (tree, vargs, vec_oprnd0);
1851 VEC_quick_push (tree, vargs, vec_oprnd1);
1854 new_stmt = gimple_build_call_vec (fndecl, vargs);
1855 new_temp = make_ssa_name (vec_dest, new_stmt);
1856 gimple_call_set_lhs (new_stmt, new_temp);
1858 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1859 mark_symbols_for_renaming (new_stmt);
1862 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1866 prev_stmt_info = vinfo_for_stmt (new_stmt);
1869 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1874 /* No current target implements this case. */
1878 VEC_free (tree, heap, vargs);
1880 /* Update the exception handling table with the vector stmt if necessary. */
1881 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1882 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1884 /* The call in STMT might prevent it from being removed in dce.
1885 We however cannot remove it here, due to the way the ssa name
1886 it defines is mapped to the new definition. So just replace
1887 rhs of the statement with something harmless. */
1889 type = TREE_TYPE (scalar_dest);
1890 if (is_pattern_stmt_p (stmt_info))
1891 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1893 lhs = gimple_call_lhs (stmt);
1894 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1895 set_vinfo_for_stmt (new_stmt, stmt_info);
1897 set_vinfo_for_stmt (stmt, NULL);
1898 STMT_VINFO_STMT (stmt_info) = new_stmt;
1899 gsi_replace (gsi, new_stmt, false);
1900 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1906 /* Function vect_gen_widened_results_half
1908 Create a vector stmt whose code, type, number of arguments, and result
1909 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1910 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1911 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1912 needs to be created (DECL is a function-decl of a target-builtin).
1913 STMT is the original scalar stmt that we are vectorizing. */
1916 vect_gen_widened_results_half (enum tree_code code,
1918 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1919 tree vec_dest, gimple_stmt_iterator *gsi,
1925 /* Generate half of the widened result: */
1926 if (code == CALL_EXPR)
1928 /* Target specific support */
1929 if (op_type == binary_op)
1930 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1932 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1933 new_temp = make_ssa_name (vec_dest, new_stmt);
1934 gimple_call_set_lhs (new_stmt, new_temp);
1938 /* Generic support */
1939 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1940 if (op_type != binary_op)
1942 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1944 new_temp = make_ssa_name (vec_dest, new_stmt);
1945 gimple_assign_set_lhs (new_stmt, new_temp);
1947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1953 /* Get vectorized definitions for loop-based vectorization. For the first
1954 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1955 scalar operand), and for the rest we get a copy with
1956 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1957 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1958 The vectors are collected into VEC_OPRNDS. */
1961 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1962 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1966 /* Get first vector operand. */
1967 /* All the vector operands except the very first one (that is scalar oprnd)
1969 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1970 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1972 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1974 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1976 /* Get second vector operand. */
1977 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1978 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1982 /* For conversion in multiple steps, continue to get operands
1985 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1989 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1990 For multi-step conversions store the resulting vectors and call the function
1994 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
1995 int multi_step_cvt, gimple stmt,
1996 VEC (tree, heap) *vec_dsts,
1997 gimple_stmt_iterator *gsi,
1998 slp_tree slp_node, enum tree_code code,
1999 stmt_vec_info *prev_stmt_info)
2002 tree vop0, vop1, new_tmp, vec_dest;
2004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2006 vec_dest = VEC_pop (tree, vec_dsts);
2008 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2010 /* Create demotion operation. */
2011 vop0 = VEC_index (tree, *vec_oprnds, i);
2012 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2013 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2014 new_tmp = make_ssa_name (vec_dest, new_stmt);
2015 gimple_assign_set_lhs (new_stmt, new_tmp);
2016 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2019 /* Store the resulting vector for next recursive call. */
2020 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2023 /* This is the last step of the conversion sequence. Store the
2024 vectors in SLP_NODE or in vector info of the scalar statement
2025 (or in STMT_VINFO_RELATED_STMT chain). */
2027 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2030 if (!*prev_stmt_info)
2031 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2033 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2035 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2040 /* For multi-step demotion operations we first generate demotion operations
2041 from the source type to the intermediate types, and then combine the
2042 results (stored in VEC_OPRNDS) in demotion operation to the destination
2046 /* At each level of recursion we have half of the operands we had at the
2048 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2049 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2050 stmt, vec_dsts, gsi, slp_node,
2051 VEC_PACK_TRUNC_EXPR,
2055 VEC_quick_push (tree, vec_dsts, vec_dest);
2059 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2060 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2061 the resulting vectors and call the function recursively. */
2064 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2065 VEC (tree, heap) **vec_oprnds1,
2066 gimple stmt, tree vec_dest,
2067 gimple_stmt_iterator *gsi,
2068 enum tree_code code1,
2069 enum tree_code code2, tree decl1,
2070 tree decl2, int op_type)
2073 tree vop0, vop1, new_tmp1, new_tmp2;
2074 gimple new_stmt1, new_stmt2;
2075 VEC (tree, heap) *vec_tmp = NULL;
2077 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2078 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2080 if (op_type == binary_op)
2081 vop1 = VEC_index (tree, *vec_oprnds1, i);
2085 /* Generate the two halves of promotion operation. */
2086 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2087 op_type, vec_dest, gsi, stmt);
2088 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2089 op_type, vec_dest, gsi, stmt);
2090 if (is_gimple_call (new_stmt1))
2092 new_tmp1 = gimple_call_lhs (new_stmt1);
2093 new_tmp2 = gimple_call_lhs (new_stmt2);
2097 new_tmp1 = gimple_assign_lhs (new_stmt1);
2098 new_tmp2 = gimple_assign_lhs (new_stmt2);
2101 /* Store the results for the next step. */
2102 VEC_quick_push (tree, vec_tmp, new_tmp1);
2103 VEC_quick_push (tree, vec_tmp, new_tmp2);
2106 VEC_free (tree, heap, *vec_oprnds0);
2107 *vec_oprnds0 = vec_tmp;
2111 /* Check if STMT performs a conversion operation, that can be vectorized.
2112 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2113 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2114 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2117 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2118 gimple *vec_stmt, slp_tree slp_node)
2122 tree op0, op1 = NULL_TREE;
2123 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2126 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2127 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2128 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2132 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2133 gimple new_stmt = NULL;
2134 stmt_vec_info prev_stmt_info;
2137 tree vectype_out, vectype_in;
2139 tree lhs_type, rhs_type;
2140 enum { NARROW, NONE, WIDEN } modifier;
2141 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2144 int multi_step_cvt = 0;
2145 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2146 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2148 enum machine_mode rhs_mode;
2149 unsigned short fltsz;
2151 /* Is STMT a vectorizable conversion? */
2153 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2156 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2159 if (!is_gimple_assign (stmt))
2162 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2165 code = gimple_assign_rhs_code (stmt);
2166 if (!CONVERT_EXPR_CODE_P (code)
2167 && code != FIX_TRUNC_EXPR
2168 && code != FLOAT_EXPR
2169 && code != WIDEN_MULT_EXPR
2170 && code != WIDEN_LSHIFT_EXPR)
2173 op_type = TREE_CODE_LENGTH (code);
2175 /* Check types of lhs and rhs. */
2176 scalar_dest = gimple_assign_lhs (stmt);
2177 lhs_type = TREE_TYPE (scalar_dest);
2178 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2180 op0 = gimple_assign_rhs1 (stmt);
2181 rhs_type = TREE_TYPE (op0);
2183 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2184 && !((INTEGRAL_TYPE_P (lhs_type)
2185 && INTEGRAL_TYPE_P (rhs_type))
2186 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2187 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2190 if ((INTEGRAL_TYPE_P (lhs_type)
2191 && (TYPE_PRECISION (lhs_type)
2192 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2193 || (INTEGRAL_TYPE_P (rhs_type)
2194 && (TYPE_PRECISION (rhs_type)
2195 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2197 if (vect_print_dump_info (REPORT_DETAILS))
2199 "type conversion to/from bit-precision unsupported.");
2203 /* Check the operands of the operation. */
2204 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2205 &def_stmt, &def, &dt[0], &vectype_in))
2207 if (vect_print_dump_info (REPORT_DETAILS))
2208 fprintf (vect_dump, "use not simple.");
2211 if (op_type == binary_op)
2215 op1 = gimple_assign_rhs2 (stmt);
2216 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2217 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2219 if (CONSTANT_CLASS_P (op0))
2220 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2221 &def_stmt, &def, &dt[1], &vectype_in);
2223 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2228 if (vect_print_dump_info (REPORT_DETAILS))
2229 fprintf (vect_dump, "use not simple.");
2234 /* If op0 is an external or constant defs use a vector type of
2235 the same size as the output vector type. */
2237 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2239 gcc_assert (vectype_in);
2242 if (vect_print_dump_info (REPORT_DETAILS))
2244 fprintf (vect_dump, "no vectype for scalar type ");
2245 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2251 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2252 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2253 if (nunits_in < nunits_out)
2255 else if (nunits_out == nunits_in)
2260 /* Multiple types in SLP are handled by creating the appropriate number of
2261 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2263 if (slp_node || PURE_SLP_STMT (stmt_info))
2265 else if (modifier == NARROW)
2266 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2268 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2270 /* Sanity check: make sure that at least one copy of the vectorized stmt
2271 needs to be generated. */
2272 gcc_assert (ncopies >= 1);
2274 /* Supportable by target? */
2278 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2280 if (supportable_convert_operation (code, vectype_out, vectype_in,
2285 if (vect_print_dump_info (REPORT_DETAILS))
2286 fprintf (vect_dump, "conversion not supported by target.");
2290 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2291 &decl1, &decl2, &code1, &code2,
2292 &multi_step_cvt, &interm_types))
2294 /* Binary widening operation can only be supported directly by the
2296 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2300 if (code != FLOAT_EXPR
2301 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2302 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2305 rhs_mode = TYPE_MODE (rhs_type);
2306 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2307 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2308 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2309 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2312 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2313 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2314 if (cvt_type == NULL_TREE)
2317 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2319 if (!supportable_convert_operation (code, vectype_out,
2320 cvt_type, &decl1, &codecvt1))
2323 else if (!supportable_widening_operation (code, stmt, vectype_out,
2324 cvt_type, &decl1, &decl2,
2325 &codecvt1, &codecvt2,
2330 gcc_assert (multi_step_cvt == 0);
2332 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2333 vectype_in, NULL, NULL, &code1,
2334 &code2, &multi_step_cvt,
2339 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2342 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2343 codecvt2 = ERROR_MARK;
2347 VEC_safe_push (tree, heap, interm_types, cvt_type);
2348 cvt_type = NULL_TREE;
2353 gcc_assert (op_type == unary_op);
2354 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2355 &code1, &multi_step_cvt,
2359 if (code != FIX_TRUNC_EXPR
2360 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2361 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2364 rhs_mode = TYPE_MODE (rhs_type);
2366 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2367 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2368 if (cvt_type == NULL_TREE)
2370 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2373 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2374 &code1, &multi_step_cvt,
2383 if (!vec_stmt) /* transformation not required. */
2385 if (vect_print_dump_info (REPORT_DETAILS))
2386 fprintf (vect_dump, "=== vectorizable_conversion ===");
2387 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2388 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2389 else if (modifier == NARROW)
2391 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2392 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2396 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2397 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2399 VEC_free (tree, heap, interm_types);
2404 if (vect_print_dump_info (REPORT_DETAILS))
2405 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2407 if (op_type == binary_op)
2409 if (CONSTANT_CLASS_P (op0))
2410 op0 = fold_convert (TREE_TYPE (op1), op0);
2411 else if (CONSTANT_CLASS_P (op1))
2412 op1 = fold_convert (TREE_TYPE (op0), op1);
2415 /* In case of multi-step conversion, we first generate conversion operations
2416 to the intermediate types, and then from that types to the final one.
2417 We create vector destinations for the intermediate type (TYPES) received
2418 from supportable_*_operation, and store them in the correct order
2419 for future use in vect_create_vectorized_*_stmts (). */
2420 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2421 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2422 VEC_quick_push (tree, vec_dsts, vec_dest);
2426 for (i = VEC_length (tree, interm_types) - 1;
2427 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2429 vec_dest = vect_create_destination_var (scalar_dest,
2431 VEC_quick_push (tree, vec_dsts, vec_dest);
2436 vec_dest = vect_create_destination_var (scalar_dest, cvt_type);
2440 if (modifier == NONE)
2441 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2442 else if (modifier == WIDEN)
2444 vec_oprnds0 = VEC_alloc (tree, heap,
2446 ? vect_pow2 (multi_step_cvt) : 1));
2447 if (op_type == binary_op)
2448 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2451 vec_oprnds0 = VEC_alloc (tree, heap,
2453 ? vect_pow2 (multi_step_cvt) : 1));
2455 else if (code == WIDEN_LSHIFT_EXPR)
2456 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2459 prev_stmt_info = NULL;
2463 for (j = 0; j < ncopies; j++)
2466 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2469 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2471 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2473 /* Arguments are ready, create the new vector stmt. */
2474 if (code1 == CALL_EXPR)
2476 new_stmt = gimple_build_call (decl1, 1, vop0);
2477 new_temp = make_ssa_name (vec_dest, new_stmt);
2478 gimple_call_set_lhs (new_stmt, new_temp);
2482 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2483 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2485 new_temp = make_ssa_name (vec_dest, new_stmt);
2486 gimple_assign_set_lhs (new_stmt, new_temp);
2489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2491 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2496 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2498 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2499 prev_stmt_info = vinfo_for_stmt (new_stmt);
2504 /* In case the vectorization factor (VF) is bigger than the number
2505 of elements that we can fit in a vectype (nunits), we have to
2506 generate more than one vector stmt - i.e - we need to "unroll"
2507 the vector stmt by a factor VF/nunits. */
2508 for (j = 0; j < ncopies; j++)
2515 if (code == WIDEN_LSHIFT_EXPR)
2520 /* Store vec_oprnd1 for every vector stmt to be created
2521 for SLP_NODE. We check during the analysis that all
2522 the shift arguments are the same. */
2523 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2524 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2526 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2530 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2531 &vec_oprnds1, slp_node, -1);
2535 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2536 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2537 if (op_type == binary_op)
2539 if (code == WIDEN_LSHIFT_EXPR)
2542 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2544 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2550 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2551 VEC_truncate (tree, vec_oprnds0, 0);
2552 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2553 if (op_type == binary_op)
2555 if (code == WIDEN_LSHIFT_EXPR)
2558 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2560 VEC_truncate (tree, vec_oprnds1, 0);
2561 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2565 /* Arguments are ready. Create the new vector stmts. */
2566 for (i = multi_step_cvt; i >= 0; i--)
2568 tree this_dest = VEC_index (tree, vec_dsts, i);
2569 enum tree_code c1 = code1, c2 = code2;
2570 if (i == 0 && codecvt2 != ERROR_MARK)
2575 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2577 stmt, this_dest, gsi,
2578 c1, c2, decl1, decl2,
2582 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2586 if (codecvt1 == CALL_EXPR)
2588 new_stmt = gimple_build_call (decl1, 1, vop0);
2589 new_temp = make_ssa_name (vec_dest, new_stmt);
2590 gimple_call_set_lhs (new_stmt, new_temp);
2594 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2595 new_temp = make_ssa_name (vec_dest, NULL);
2596 new_stmt = gimple_build_assign_with_ops (codecvt1,
2601 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2604 new_stmt = SSA_NAME_DEF_STMT (vop0);
2607 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2611 if (!prev_stmt_info)
2612 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2614 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2615 prev_stmt_info = vinfo_for_stmt (new_stmt);
2620 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2624 /* In case the vectorization factor (VF) is bigger than the number
2625 of elements that we can fit in a vectype (nunits), we have to
2626 generate more than one vector stmt - i.e - we need to "unroll"
2627 the vector stmt by a factor VF/nunits. */
2628 for (j = 0; j < ncopies; j++)
2632 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2636 VEC_truncate (tree, vec_oprnds0, 0);
2637 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2638 vect_pow2 (multi_step_cvt) - 1);
2641 /* Arguments are ready. Create the new vector stmts. */
2643 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2645 if (codecvt1 == CALL_EXPR)
2647 new_stmt = gimple_build_call (decl1, 1, vop0);
2648 new_temp = make_ssa_name (vec_dest, new_stmt);
2649 gimple_call_set_lhs (new_stmt, new_temp);
2653 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2654 new_temp = make_ssa_name (vec_dest, NULL);
2655 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2659 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2660 VEC_replace (tree, vec_oprnds0, i, new_temp);
2663 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2664 stmt, vec_dsts, gsi,
2669 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2673 VEC_free (tree, heap, vec_oprnds0);
2674 VEC_free (tree, heap, vec_oprnds1);
2675 VEC_free (tree, heap, vec_dsts);
2676 VEC_free (tree, heap, interm_types);
2682 /* Function vectorizable_assignment.
2684 Check if STMT performs an assignment (copy) that can be vectorized.
2685 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2686 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2687 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2690 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2691 gimple *vec_stmt, slp_tree slp_node)
2696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2697 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2698 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2702 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2703 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2706 VEC(tree,heap) *vec_oprnds = NULL;
2708 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2709 gimple new_stmt = NULL;
2710 stmt_vec_info prev_stmt_info = NULL;
2711 enum tree_code code;
2714 /* Multiple types in SLP are handled by creating the appropriate number of
2715 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2717 if (slp_node || PURE_SLP_STMT (stmt_info))
2720 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2722 gcc_assert (ncopies >= 1);
2724 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2730 /* Is vectorizable assignment? */
2731 if (!is_gimple_assign (stmt))
2734 scalar_dest = gimple_assign_lhs (stmt);
2735 if (TREE_CODE (scalar_dest) != SSA_NAME)
2738 code = gimple_assign_rhs_code (stmt);
2739 if (gimple_assign_single_p (stmt)
2740 || code == PAREN_EXPR
2741 || CONVERT_EXPR_CODE_P (code))
2742 op = gimple_assign_rhs1 (stmt);
2746 if (code == VIEW_CONVERT_EXPR)
2747 op = TREE_OPERAND (op, 0);
2749 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2750 &def_stmt, &def, &dt[0], &vectype_in))
2752 if (vect_print_dump_info (REPORT_DETAILS))
2753 fprintf (vect_dump, "use not simple.");
2757 /* We can handle NOP_EXPR conversions that do not change the number
2758 of elements or the vector size. */
2759 if ((CONVERT_EXPR_CODE_P (code)
2760 || code == VIEW_CONVERT_EXPR)
2762 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2763 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2764 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2767 /* We do not handle bit-precision changes. */
2768 if ((CONVERT_EXPR_CODE_P (code)
2769 || code == VIEW_CONVERT_EXPR)
2770 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2771 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2772 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2773 || ((TYPE_PRECISION (TREE_TYPE (op))
2774 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2775 /* But a conversion that does not change the bit-pattern is ok. */
2776 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2777 > TYPE_PRECISION (TREE_TYPE (op)))
2778 && TYPE_UNSIGNED (TREE_TYPE (op))))
2780 if (vect_print_dump_info (REPORT_DETAILS))
2781 fprintf (vect_dump, "type conversion to/from bit-precision "
2786 if (!vec_stmt) /* transformation not required. */
2788 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2789 if (vect_print_dump_info (REPORT_DETAILS))
2790 fprintf (vect_dump, "=== vectorizable_assignment ===");
2791 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2796 if (vect_print_dump_info (REPORT_DETAILS))
2797 fprintf (vect_dump, "transform assignment.");
2800 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2803 for (j = 0; j < ncopies; j++)
2807 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2809 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2811 /* Arguments are ready. create the new vector stmt. */
2812 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2814 if (CONVERT_EXPR_CODE_P (code)
2815 || code == VIEW_CONVERT_EXPR)
2816 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2817 new_stmt = gimple_build_assign (vec_dest, vop);
2818 new_temp = make_ssa_name (vec_dest, new_stmt);
2819 gimple_assign_set_lhs (new_stmt, new_temp);
2820 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2822 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2829 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2831 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2833 prev_stmt_info = vinfo_for_stmt (new_stmt);
2836 VEC_free (tree, heap, vec_oprnds);
2841 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2842 either as shift by a scalar or by a vector. */
2845 vect_supportable_shift (enum tree_code code, tree scalar_type)
2848 enum machine_mode vec_mode;
2853 vectype = get_vectype_for_scalar_type (scalar_type);
2857 optab = optab_for_tree_code (code, vectype, optab_scalar);
2859 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2861 optab = optab_for_tree_code (code, vectype, optab_vector);
2863 || (optab_handler (optab, TYPE_MODE (vectype))
2864 == CODE_FOR_nothing))
2868 vec_mode = TYPE_MODE (vectype);
2869 icode = (int) optab_handler (optab, vec_mode);
2870 if (icode == CODE_FOR_nothing)
2877 /* Function vectorizable_shift.
2879 Check if STMT performs a shift operation that can be vectorized.
2880 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2881 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2882 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2885 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2886 gimple *vec_stmt, slp_tree slp_node)
2890 tree op0, op1 = NULL;
2891 tree vec_oprnd1 = NULL_TREE;
2892 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2894 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2895 enum tree_code code;
2896 enum machine_mode vec_mode;
2900 enum machine_mode optab_op2_mode;
2903 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2904 gimple new_stmt = NULL;
2905 stmt_vec_info prev_stmt_info;
2912 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2915 bool scalar_shift_arg = true;
2916 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2919 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2922 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2925 /* Is STMT a vectorizable binary/unary operation? */
2926 if (!is_gimple_assign (stmt))
2929 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2932 code = gimple_assign_rhs_code (stmt);
2934 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2935 || code == RROTATE_EXPR))
2938 scalar_dest = gimple_assign_lhs (stmt);
2939 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2940 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2941 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2943 if (vect_print_dump_info (REPORT_DETAILS))
2944 fprintf (vect_dump, "bit-precision shifts not supported.");
2948 op0 = gimple_assign_rhs1 (stmt);
2949 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2950 &def_stmt, &def, &dt[0], &vectype))
2952 if (vect_print_dump_info (REPORT_DETAILS))
2953 fprintf (vect_dump, "use not simple.");
2956 /* If op0 is an external or constant def use a vector type with
2957 the same size as the output vector type. */
2959 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2961 gcc_assert (vectype);
2964 if (vect_print_dump_info (REPORT_DETAILS))
2966 fprintf (vect_dump, "no vectype for scalar type ");
2967 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2973 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2974 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2975 if (nunits_out != nunits_in)
2978 op1 = gimple_assign_rhs2 (stmt);
2979 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2980 &dt[1], &op1_vectype))
2982 if (vect_print_dump_info (REPORT_DETAILS))
2983 fprintf (vect_dump, "use not simple.");
2988 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2992 /* Multiple types in SLP are handled by creating the appropriate number of
2993 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2995 if (slp_node || PURE_SLP_STMT (stmt_info))
2998 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3000 gcc_assert (ncopies >= 1);
3002 /* Determine whether the shift amount is a vector, or scalar. If the
3003 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3005 if (dt[1] == vect_internal_def && !slp_node)
3006 scalar_shift_arg = false;
3007 else if (dt[1] == vect_constant_def
3008 || dt[1] == vect_external_def
3009 || dt[1] == vect_internal_def)
3011 /* In SLP, need to check whether the shift count is the same,
3012 in loops if it is a constant or invariant, it is always
3016 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3019 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3020 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3021 scalar_shift_arg = false;
3026 if (vect_print_dump_info (REPORT_DETAILS))
3027 fprintf (vect_dump, "operand mode requires invariant argument.");
3031 /* Vector shifted by vector. */
3032 if (!scalar_shift_arg)
3034 optab = optab_for_tree_code (code, vectype, optab_vector);
3035 if (vect_print_dump_info (REPORT_DETAILS))
3036 fprintf (vect_dump, "vector/vector shift/rotate found.");
3038 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3039 if (op1_vectype == NULL_TREE
3040 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3042 if (vect_print_dump_info (REPORT_DETAILS))
3043 fprintf (vect_dump, "unusable type for last operand in"
3044 " vector/vector shift/rotate.");
3048 /* See if the machine has a vector shifted by scalar insn and if not
3049 then see if it has a vector shifted by vector insn. */
3052 optab = optab_for_tree_code (code, vectype, optab_scalar);
3054 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3056 if (vect_print_dump_info (REPORT_DETAILS))
3057 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3061 optab = optab_for_tree_code (code, vectype, optab_vector);
3063 && (optab_handler (optab, TYPE_MODE (vectype))
3064 != CODE_FOR_nothing))
3066 scalar_shift_arg = false;
3068 if (vect_print_dump_info (REPORT_DETAILS))
3069 fprintf (vect_dump, "vector/vector shift/rotate found.");
3071 /* Unlike the other binary operators, shifts/rotates have
3072 the rhs being int, instead of the same type as the lhs,
3073 so make sure the scalar is the right type if we are
3074 dealing with vectors of long long/long/short/char. */
3075 if (dt[1] == vect_constant_def)
3076 op1 = fold_convert (TREE_TYPE (vectype), op1);
3077 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3081 && TYPE_MODE (TREE_TYPE (vectype))
3082 != TYPE_MODE (TREE_TYPE (op1)))
3084 if (vect_print_dump_info (REPORT_DETAILS))
3085 fprintf (vect_dump, "unusable type for last operand in"
3086 " vector/vector shift/rotate.");
3089 if (vec_stmt && !slp_node)
3091 op1 = fold_convert (TREE_TYPE (vectype), op1);
3092 op1 = vect_init_vector (stmt, op1,
3093 TREE_TYPE (vectype), NULL);
3100 /* Supportable by target? */
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 fprintf (vect_dump, "no optab.");
3107 vec_mode = TYPE_MODE (vectype);
3108 icode = (int) optab_handler (optab, vec_mode);
3109 if (icode == CODE_FOR_nothing)
3111 if (vect_print_dump_info (REPORT_DETAILS))
3112 fprintf (vect_dump, "op not supported by target.");
3113 /* Check only during analysis. */
3114 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3115 || (vf < vect_min_worthwhile_factor (code)
3118 if (vect_print_dump_info (REPORT_DETAILS))
3119 fprintf (vect_dump, "proceeding using word mode.");
3122 /* Worthwhile without SIMD support? Check only during analysis. */
3123 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3124 && vf < vect_min_worthwhile_factor (code)
3127 if (vect_print_dump_info (REPORT_DETAILS))
3128 fprintf (vect_dump, "not worthwhile without SIMD support.");
3132 if (!vec_stmt) /* transformation not required. */
3134 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3135 if (vect_print_dump_info (REPORT_DETAILS))
3136 fprintf (vect_dump, "=== vectorizable_shift ===");
3137 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3143 if (vect_print_dump_info (REPORT_DETAILS))
3144 fprintf (vect_dump, "transform binary/unary operation.");
3147 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3149 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3150 created in the previous stages of the recursion, so no allocation is
3151 needed, except for the case of shift with scalar shift argument. In that
3152 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3153 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3154 In case of loop-based vectorization we allocate VECs of size 1. We
3155 allocate VEC_OPRNDS1 only in case of binary operation. */
3158 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3159 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3161 else if (scalar_shift_arg)
3162 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3164 prev_stmt_info = NULL;
3165 for (j = 0; j < ncopies; j++)
3170 if (scalar_shift_arg)
3172 /* Vector shl and shr insn patterns can be defined with scalar
3173 operand 2 (shift operand). In this case, use constant or loop
3174 invariant op1 directly, without extending it to vector mode
3176 optab_op2_mode = insn_data[icode].operand[2].mode;
3177 if (!VECTOR_MODE_P (optab_op2_mode))
3179 if (vect_print_dump_info (REPORT_DETAILS))
3180 fprintf (vect_dump, "operand 1 using scalar mode.");
3182 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3185 /* Store vec_oprnd1 for every vector stmt to be created
3186 for SLP_NODE. We check during the analysis that all
3187 the shift arguments are the same.
3188 TODO: Allow different constants for different vector
3189 stmts generated for an SLP instance. */
3190 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3191 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3196 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3197 (a special case for certain kind of vector shifts); otherwise,
3198 operand 1 should be of a vector type (the usual case). */
3200 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3203 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3207 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3209 /* Arguments are ready. Create the new vector stmt. */
3210 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3212 vop1 = VEC_index (tree, vec_oprnds1, i);
3213 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3214 new_temp = make_ssa_name (vec_dest, new_stmt);
3215 gimple_assign_set_lhs (new_stmt, new_temp);
3216 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3218 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3225 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3227 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3228 prev_stmt_info = vinfo_for_stmt (new_stmt);
3231 VEC_free (tree, heap, vec_oprnds0);
3232 VEC_free (tree, heap, vec_oprnds1);
3238 /* Function vectorizable_operation.
3240 Check if STMT performs a binary, unary or ternary operation that can
3242 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3243 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3244 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3247 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3248 gimple *vec_stmt, slp_tree slp_node)
3252 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3256 enum tree_code code;
3257 enum machine_mode vec_mode;
3264 enum vect_def_type dt[3]
3265 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3266 gimple new_stmt = NULL;
3267 stmt_vec_info prev_stmt_info;
3273 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3274 tree vop0, vop1, vop2;
3275 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3278 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3281 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3284 /* Is STMT a vectorizable binary/unary operation? */
3285 if (!is_gimple_assign (stmt))
3288 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3291 code = gimple_assign_rhs_code (stmt);
3293 /* For pointer addition, we should use the normal plus for
3294 the vector addition. */
3295 if (code == POINTER_PLUS_EXPR)
3298 /* Support only unary or binary operations. */
3299 op_type = TREE_CODE_LENGTH (code);
3300 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3302 if (vect_print_dump_info (REPORT_DETAILS))
3303 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3308 scalar_dest = gimple_assign_lhs (stmt);
3309 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3311 /* Most operations cannot handle bit-precision types without extra
3313 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3314 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3315 /* Exception are bitwise binary operations. */
3316 && code != BIT_IOR_EXPR
3317 && code != BIT_XOR_EXPR
3318 && code != BIT_AND_EXPR)
3320 if (vect_print_dump_info (REPORT_DETAILS))
3321 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3325 op0 = gimple_assign_rhs1 (stmt);
3326 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3327 &def_stmt, &def, &dt[0], &vectype))
3329 if (vect_print_dump_info (REPORT_DETAILS))
3330 fprintf (vect_dump, "use not simple.");
3333 /* If op0 is an external or constant def use a vector type with
3334 the same size as the output vector type. */
3336 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3338 gcc_assert (vectype);
3341 if (vect_print_dump_info (REPORT_DETAILS))
3343 fprintf (vect_dump, "no vectype for scalar type ");
3344 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3350 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3351 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3352 if (nunits_out != nunits_in)
3355 if (op_type == binary_op || op_type == ternary_op)
3357 op1 = gimple_assign_rhs2 (stmt);
3358 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
3361 if (vect_print_dump_info (REPORT_DETAILS))
3362 fprintf (vect_dump, "use not simple.");
3366 if (op_type == ternary_op)
3368 op2 = gimple_assign_rhs3 (stmt);
3369 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3372 if (vect_print_dump_info (REPORT_DETAILS))
3373 fprintf (vect_dump, "use not simple.");
3379 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3383 /* Multiple types in SLP are handled by creating the appropriate number of
3384 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3386 if (slp_node || PURE_SLP_STMT (stmt_info))
3389 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3391 gcc_assert (ncopies >= 1);
3393 /* Shifts are handled in vectorizable_shift (). */
3394 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3395 || code == RROTATE_EXPR)
3398 optab = optab_for_tree_code (code, vectype, optab_default);
3400 /* Supportable by target? */
3403 if (vect_print_dump_info (REPORT_DETAILS))
3404 fprintf (vect_dump, "no optab.");
3407 vec_mode = TYPE_MODE (vectype);
3408 icode = (int) optab_handler (optab, vec_mode);
3409 if (icode == CODE_FOR_nothing)
3411 if (vect_print_dump_info (REPORT_DETAILS))
3412 fprintf (vect_dump, "op not supported by target.");
3413 /* Check only during analysis. */
3414 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3415 || (vf < vect_min_worthwhile_factor (code)
3418 if (vect_print_dump_info (REPORT_DETAILS))
3419 fprintf (vect_dump, "proceeding using word mode.");
3422 /* Worthwhile without SIMD support? Check only during analysis. */
3423 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3424 && vf < vect_min_worthwhile_factor (code)
3427 if (vect_print_dump_info (REPORT_DETAILS))
3428 fprintf (vect_dump, "not worthwhile without SIMD support.");
3432 if (!vec_stmt) /* transformation not required. */
3434 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3435 if (vect_print_dump_info (REPORT_DETAILS))
3436 fprintf (vect_dump, "=== vectorizable_operation ===");
3437 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3443 if (vect_print_dump_info (REPORT_DETAILS))
3444 fprintf (vect_dump, "transform binary/unary operation.");
3447 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3449 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3450 created in the previous stages of the recursion, so no allocation is
3451 needed, except for the case of shift with scalar shift argument. In that
3452 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3453 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3454 In case of loop-based vectorization we allocate VECs of size 1. We
3455 allocate VEC_OPRNDS1 only in case of binary operation. */
3458 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3459 if (op_type == binary_op || op_type == ternary_op)
3460 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3461 if (op_type == ternary_op)
3462 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3465 /* In case the vectorization factor (VF) is bigger than the number
3466 of elements that we can fit in a vectype (nunits), we have to generate
3467 more than one vector stmt - i.e - we need to "unroll" the
3468 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3469 from one copy of the vector stmt to the next, in the field
3470 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3471 stages to find the correct vector defs to be used when vectorizing
3472 stmts that use the defs of the current stmt. The example below
3473 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3474 we need to create 4 vectorized stmts):
3476 before vectorization:
3477 RELATED_STMT VEC_STMT
3481 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3483 RELATED_STMT VEC_STMT
3484 VS1_0: vx0 = memref0 VS1_1 -
3485 VS1_1: vx1 = memref1 VS1_2 -
3486 VS1_2: vx2 = memref2 VS1_3 -
3487 VS1_3: vx3 = memref3 - -
3488 S1: x = load - VS1_0
3491 step2: vectorize stmt S2 (done here):
3492 To vectorize stmt S2 we first need to find the relevant vector
3493 def for the first operand 'x'. This is, as usual, obtained from
3494 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3495 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3496 relevant vector def 'vx0'. Having found 'vx0' we can generate
3497 the vector stmt VS2_0, and as usual, record it in the
3498 STMT_VINFO_VEC_STMT of stmt S2.
3499 When creating the second copy (VS2_1), we obtain the relevant vector
3500 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3501 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3502 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3503 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3504 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3505 chain of stmts and pointers:
3506 RELATED_STMT VEC_STMT
3507 VS1_0: vx0 = memref0 VS1_1 -
3508 VS1_1: vx1 = memref1 VS1_2 -
3509 VS1_2: vx2 = memref2 VS1_3 -
3510 VS1_3: vx3 = memref3 - -
3511 S1: x = load - VS1_0
3512 VS2_0: vz0 = vx0 + v1 VS2_1 -
3513 VS2_1: vz1 = vx1 + v1 VS2_2 -
3514 VS2_2: vz2 = vx2 + v1 VS2_3 -
3515 VS2_3: vz3 = vx3 + v1 - -
3516 S2: z = x + 1 - VS2_0 */
3518 prev_stmt_info = NULL;
3519 for (j = 0; j < ncopies; j++)
3524 if (op_type == binary_op || op_type == ternary_op)
3525 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3528 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3530 if (op_type == ternary_op)
3532 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3533 VEC_quick_push (tree, vec_oprnds2,
3534 vect_get_vec_def_for_operand (op2, stmt, NULL));
3539 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3540 if (op_type == ternary_op)
3542 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3543 VEC_quick_push (tree, vec_oprnds2,
3544 vect_get_vec_def_for_stmt_copy (dt[2],
3549 /* Arguments are ready. Create the new vector stmt. */
3550 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3552 vop1 = ((op_type == binary_op || op_type == ternary_op)
3553 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3554 vop2 = ((op_type == ternary_op)
3555 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3556 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3558 new_temp = make_ssa_name (vec_dest, new_stmt);
3559 gimple_assign_set_lhs (new_stmt, new_temp);
3560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3562 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3569 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3571 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3572 prev_stmt_info = vinfo_for_stmt (new_stmt);
3575 VEC_free (tree, heap, vec_oprnds0);
3577 VEC_free (tree, heap, vec_oprnds1);
3579 VEC_free (tree, heap, vec_oprnds2);
3585 /* Function vectorizable_store.
3587 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3589 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3590 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3591 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3594 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3600 tree vec_oprnd = NULL_TREE;
3601 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3602 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3603 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3605 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3606 struct loop *loop = NULL;
3607 enum machine_mode vec_mode;
3609 enum dr_alignment_support alignment_support_scheme;
3612 enum vect_def_type dt;
3613 stmt_vec_info prev_stmt_info = NULL;
3614 tree dataref_ptr = NULL_TREE;
3615 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3618 gimple next_stmt, first_stmt = NULL;
3619 bool strided_store = false;
3620 bool store_lanes_p = false;
3621 unsigned int group_size, i;
3622 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3624 VEC(tree,heap) *vec_oprnds = NULL;
3625 bool slp = (slp_node != NULL);
3626 unsigned int vec_num;
3627 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3631 loop = LOOP_VINFO_LOOP (loop_vinfo);
3633 /* Multiple types in SLP are handled by creating the appropriate number of
3634 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3636 if (slp || PURE_SLP_STMT (stmt_info))
3639 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3641 gcc_assert (ncopies >= 1);
3643 /* FORNOW. This restriction should be relaxed. */
3644 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3646 if (vect_print_dump_info (REPORT_DETAILS))
3647 fprintf (vect_dump, "multiple types in nested loop.");
3651 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3654 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3657 /* Is vectorizable store? */
3659 if (!is_gimple_assign (stmt))
3662 scalar_dest = gimple_assign_lhs (stmt);
3663 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3664 && is_pattern_stmt_p (stmt_info))
3665 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3666 if (TREE_CODE (scalar_dest) != ARRAY_REF
3667 && TREE_CODE (scalar_dest) != INDIRECT_REF
3668 && TREE_CODE (scalar_dest) != COMPONENT_REF
3669 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3670 && TREE_CODE (scalar_dest) != REALPART_EXPR
3671 && TREE_CODE (scalar_dest) != MEM_REF)
3674 gcc_assert (gimple_assign_single_p (stmt));
3675 op = gimple_assign_rhs1 (stmt);
3676 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3678 if (vect_print_dump_info (REPORT_DETAILS))
3679 fprintf (vect_dump, "use not simple.");
3683 elem_type = TREE_TYPE (vectype);
3684 vec_mode = TYPE_MODE (vectype);
3686 /* FORNOW. In some cases can vectorize even if data-type not supported
3687 (e.g. - array initialization with 0). */
3688 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3691 if (!STMT_VINFO_DATA_REF (stmt_info))
3694 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3696 if (vect_print_dump_info (REPORT_DETAILS))
3697 fprintf (vect_dump, "negative step for store.");
3701 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3703 strided_store = true;
3704 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3705 if (!slp && !PURE_SLP_STMT (stmt_info))
3707 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3708 if (vect_store_lanes_supported (vectype, group_size))
3709 store_lanes_p = true;
3710 else if (!vect_strided_store_supported (vectype, group_size))
3714 if (first_stmt == stmt)
3716 /* STMT is the leader of the group. Check the operands of all the
3717 stmts of the group. */
3718 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3721 gcc_assert (gimple_assign_single_p (next_stmt));
3722 op = gimple_assign_rhs1 (next_stmt);
3723 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3726 if (vect_print_dump_info (REPORT_DETAILS))
3727 fprintf (vect_dump, "use not simple.");
3730 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3735 if (!vec_stmt) /* transformation not required. */
3737 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3738 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3746 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3747 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3749 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3752 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3754 /* We vectorize all the stmts of the interleaving group when we
3755 reach the last stmt in the group. */
3756 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3757 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3766 strided_store = false;
3767 /* VEC_NUM is the number of vect stmts to be created for this
3769 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3770 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3771 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3772 op = gimple_assign_rhs1 (first_stmt);
3775 /* VEC_NUM is the number of vect stmts to be created for this
3777 vec_num = group_size;
3783 group_size = vec_num = 1;
3786 if (vect_print_dump_info (REPORT_DETAILS))
3787 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3789 dr_chain = VEC_alloc (tree, heap, group_size);
3790 oprnds = VEC_alloc (tree, heap, group_size);
3792 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3793 gcc_assert (alignment_support_scheme);
3794 /* Targets with store-lane instructions must not require explicit
3796 gcc_assert (!store_lanes_p
3797 || alignment_support_scheme == dr_aligned
3798 || alignment_support_scheme == dr_unaligned_supported);
3801 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3803 aggr_type = vectype;
3805 /* In case the vectorization factor (VF) is bigger than the number
3806 of elements that we can fit in a vectype (nunits), we have to generate
3807 more than one vector stmt - i.e - we need to "unroll" the
3808 vector stmt by a factor VF/nunits. For more details see documentation in
3809 vect_get_vec_def_for_copy_stmt. */
3811 /* In case of interleaving (non-unit strided access):
3818 We create vectorized stores starting from base address (the access of the
3819 first stmt in the chain (S2 in the above example), when the last store stmt
3820 of the chain (S4) is reached:
3823 VS2: &base + vec_size*1 = vx0
3824 VS3: &base + vec_size*2 = vx1
3825 VS4: &base + vec_size*3 = vx3
3827 Then permutation statements are generated:
3829 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3830 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3833 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3834 (the order of the data-refs in the output of vect_permute_store_chain
3835 corresponds to the order of scalar stmts in the interleaving chain - see
3836 the documentation of vect_permute_store_chain()).
3838 In case of both multiple types and interleaving, above vector stores and
3839 permutation stmts are created for every copy. The result vector stmts are
3840 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3841 STMT_VINFO_RELATED_STMT for the next copies.
3844 prev_stmt_info = NULL;
3845 for (j = 0; j < ncopies; j++)
3854 /* Get vectorized arguments for SLP_NODE. */
3855 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3856 NULL, slp_node, -1);
3858 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3862 /* For interleaved stores we collect vectorized defs for all the
3863 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3864 used as an input to vect_permute_store_chain(), and OPRNDS as
3865 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3867 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3868 OPRNDS are of size 1. */
3869 next_stmt = first_stmt;
3870 for (i = 0; i < group_size; i++)
3872 /* Since gaps are not supported for interleaved stores,
3873 GROUP_SIZE is the exact number of stmts in the chain.
3874 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3875 there is no interleaving, GROUP_SIZE is 1, and only one
3876 iteration of the loop will be executed. */
3877 gcc_assert (next_stmt
3878 && gimple_assign_single_p (next_stmt));
3879 op = gimple_assign_rhs1 (next_stmt);
3881 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3883 VEC_quick_push(tree, dr_chain, vec_oprnd);
3884 VEC_quick_push(tree, oprnds, vec_oprnd);
3885 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3889 /* We should have catched mismatched types earlier. */
3890 gcc_assert (useless_type_conversion_p (vectype,
3891 TREE_TYPE (vec_oprnd)));
3892 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3893 NULL_TREE, &dummy, gsi,
3894 &ptr_incr, false, &inv_p);
3895 gcc_assert (bb_vinfo || !inv_p);
3899 /* For interleaved stores we created vectorized defs for all the
3900 defs stored in OPRNDS in the previous iteration (previous copy).
3901 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3902 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3904 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3905 OPRNDS are of size 1. */
3906 for (i = 0; i < group_size; i++)
3908 op = VEC_index (tree, oprnds, i);
3909 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3911 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3912 VEC_replace(tree, dr_chain, i, vec_oprnd);
3913 VEC_replace(tree, oprnds, i, vec_oprnd);
3915 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3916 TYPE_SIZE_UNIT (aggr_type));
3923 /* Combine all the vectors into an array. */
3924 vec_array = create_vector_array (vectype, vec_num);
3925 for (i = 0; i < vec_num; i++)
3927 vec_oprnd = VEC_index (tree, dr_chain, i);
3928 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3932 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3933 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3934 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3935 gimple_call_set_lhs (new_stmt, data_ref);
3936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3937 mark_symbols_for_renaming (new_stmt);
3944 result_chain = VEC_alloc (tree, heap, group_size);
3946 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3950 next_stmt = first_stmt;
3951 for (i = 0; i < vec_num; i++)
3953 struct ptr_info_def *pi;
3956 /* Bump the vector pointer. */
3957 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3961 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3962 else if (strided_store)
3963 /* For strided stores vectorized defs are interleaved in
3964 vect_permute_store_chain(). */
3965 vec_oprnd = VEC_index (tree, result_chain, i);
3967 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3968 build_int_cst (reference_alias_ptr_type
3969 (DR_REF (first_dr)), 0));
3970 pi = get_ptr_info (dataref_ptr);
3971 pi->align = TYPE_ALIGN_UNIT (vectype);
3972 if (aligned_access_p (first_dr))
3974 else if (DR_MISALIGNMENT (first_dr) == -1)
3976 TREE_TYPE (data_ref)
3977 = build_aligned_type (TREE_TYPE (data_ref),
3978 TYPE_ALIGN (elem_type));
3979 pi->align = TYPE_ALIGN_UNIT (elem_type);
3984 TREE_TYPE (data_ref)
3985 = build_aligned_type (TREE_TYPE (data_ref),
3986 TYPE_ALIGN (elem_type));
3987 pi->misalign = DR_MISALIGNMENT (first_dr);
3990 /* Arguments are ready. Create the new vector stmt. */
3991 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3993 mark_symbols_for_renaming (new_stmt);
3998 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4006 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4008 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4009 prev_stmt_info = vinfo_for_stmt (new_stmt);
4013 VEC_free (tree, heap, dr_chain);
4014 VEC_free (tree, heap, oprnds);
4016 VEC_free (tree, heap, result_chain);
4018 VEC_free (tree, heap, vec_oprnds);
4023 /* Given a vector type VECTYPE and permutation SEL returns
4024 the VECTOR_CST mask that implements the permutation of the
4025 vector elements. If that is impossible to do, returns NULL. */
4028 gen_perm_mask (tree vectype, unsigned char *sel)
4030 tree mask_elt_type, mask_type, mask_vec;
4033 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4035 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4039 = lang_hooks.types.type_for_size
4040 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4041 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4044 for (i = nunits - 1; i >= 0; i--)
4045 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4047 mask_vec = build_vector (mask_type, mask_vec);
4052 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4053 reversal of the vector elements. If that is impossible to do,
4057 perm_mask_for_reverse (tree vectype)
4062 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4063 sel = XALLOCAVEC (unsigned char, nunits);
4065 for (i = 0; i < nunits; ++i)
4066 sel[i] = nunits - 1 - i;
4068 return gen_perm_mask (vectype, sel);
4071 /* Given a vector variable X and Y, that was generated for the scalar
4072 STMT, generate instructions to permute the vector elements of X and Y
4073 using permutation mask MASK_VEC, insert them at *GSI and return the
4074 permuted vector variable. */
4077 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4078 gimple_stmt_iterator *gsi)
4080 tree vectype = TREE_TYPE (x);
4081 tree perm_dest, data_ref;
4084 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4085 data_ref = make_ssa_name (perm_dest, NULL);
4087 /* Generate the permute statement. */
4088 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4090 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4095 /* vectorizable_load.
4097 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4099 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4100 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4101 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4104 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4105 slp_tree slp_node, slp_instance slp_node_instance)
4108 tree vec_dest = NULL;
4109 tree data_ref = NULL;
4110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4111 stmt_vec_info prev_stmt_info;
4112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4113 struct loop *loop = NULL;
4114 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4115 bool nested_in_vect_loop = false;
4116 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4117 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4120 enum machine_mode mode;
4121 gimple new_stmt = NULL;
4123 enum dr_alignment_support alignment_support_scheme;
4124 tree dataref_ptr = NULL_TREE;
4126 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4128 int i, j, group_size;
4129 tree msq = NULL_TREE, lsq;
4130 tree offset = NULL_TREE;
4131 tree realignment_token = NULL_TREE;
4133 VEC(tree,heap) *dr_chain = NULL;
4134 bool strided_load = false;
4135 bool load_lanes_p = false;
4139 bool compute_in_loop = false;
4140 struct loop *at_loop;
4142 bool slp = (slp_node != NULL);
4143 bool slp_perm = false;
4144 enum tree_code code;
4145 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4148 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4149 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4150 int gather_scale = 1;
4151 enum vect_def_type gather_dt = vect_unknown_def_type;
4155 loop = LOOP_VINFO_LOOP (loop_vinfo);
4156 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4157 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4162 /* Multiple types in SLP are handled by creating the appropriate number of
4163 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4165 if (slp || PURE_SLP_STMT (stmt_info))
4168 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4170 gcc_assert (ncopies >= 1);
4172 /* FORNOW. This restriction should be relaxed. */
4173 if (nested_in_vect_loop && ncopies > 1)
4175 if (vect_print_dump_info (REPORT_DETAILS))
4176 fprintf (vect_dump, "multiple types in nested loop.");
4180 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4183 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4186 /* Is vectorizable load? */
4187 if (!is_gimple_assign (stmt))
4190 scalar_dest = gimple_assign_lhs (stmt);
4191 if (TREE_CODE (scalar_dest) != SSA_NAME)
4194 code = gimple_assign_rhs_code (stmt);
4195 if (code != ARRAY_REF
4196 && code != INDIRECT_REF
4197 && code != COMPONENT_REF
4198 && code != IMAGPART_EXPR
4199 && code != REALPART_EXPR
4201 && TREE_CODE_CLASS (code) != tcc_declaration)
4204 if (!STMT_VINFO_DATA_REF (stmt_info))
4207 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4208 if (negative && ncopies > 1)
4210 if (vect_print_dump_info (REPORT_DETAILS))
4211 fprintf (vect_dump, "multiple types with negative step.");
4215 elem_type = TREE_TYPE (vectype);
4216 mode = TYPE_MODE (vectype);
4218 /* FORNOW. In some cases can vectorize even if data-type not supported
4219 (e.g. - data copies). */
4220 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4222 if (vect_print_dump_info (REPORT_DETAILS))
4223 fprintf (vect_dump, "Aligned load, but unsupported type.");
4227 /* Check if the load is a part of an interleaving chain. */
4228 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4230 strided_load = true;
4232 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4234 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4235 if (!slp && !PURE_SLP_STMT (stmt_info))
4237 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4238 if (vect_load_lanes_supported (vectype, group_size))
4239 load_lanes_p = true;
4240 else if (!vect_strided_load_supported (vectype, group_size))
4247 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4248 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4249 if (alignment_support_scheme != dr_aligned
4250 && alignment_support_scheme != dr_unaligned_supported)
4252 if (vect_print_dump_info (REPORT_DETAILS))
4253 fprintf (vect_dump, "negative step but alignment required.");
4256 if (!perm_mask_for_reverse (vectype))
4258 if (vect_print_dump_info (REPORT_DETAILS))
4259 fprintf (vect_dump, "negative step and reversing not supported.");
4264 if (STMT_VINFO_GATHER_P (stmt_info))
4268 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4269 &gather_off, &gather_scale);
4270 gcc_assert (gather_decl);
4271 if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
4272 &def_stmt, &def, &gather_dt,
4273 &gather_off_vectype))
4275 if (vect_print_dump_info (REPORT_DETAILS))
4276 fprintf (vect_dump, "gather index use not simple.");
4281 if (!vec_stmt) /* transformation not required. */
4283 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4284 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4288 if (vect_print_dump_info (REPORT_DETAILS))
4289 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4293 if (STMT_VINFO_GATHER_P (stmt_info))
4295 tree vec_oprnd0 = NULL_TREE, op;
4296 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4297 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4298 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4299 edge pe = loop_preheader_edge (loop);
4302 enum { NARROW, NONE, WIDEN } modifier;
4303 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4305 if (nunits == gather_off_nunits)
4307 else if (nunits == gather_off_nunits / 2)
4309 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4312 for (i = 0; i < gather_off_nunits; ++i)
4313 sel[i] = i | nunits;
4315 perm_mask = gen_perm_mask (gather_off_vectype, sel);
4316 gcc_assert (perm_mask != NULL_TREE);
4318 else if (nunits == gather_off_nunits * 2)
4320 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4323 for (i = 0; i < nunits; ++i)
4324 sel[i] = i < gather_off_nunits
4325 ? i : i + nunits - gather_off_nunits;
4327 perm_mask = gen_perm_mask (vectype, sel);
4328 gcc_assert (perm_mask != NULL_TREE);
4334 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4335 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4336 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4337 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4338 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4339 scaletype = TREE_VALUE (arglist);
4340 gcc_checking_assert (types_compatible_p (srctype, rettype)
4341 && types_compatible_p (srctype, masktype));
4343 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4345 ptr = fold_convert (ptrtype, gather_base);
4346 if (!is_gimple_min_invariant (ptr))
4348 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4349 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4350 gcc_assert (!new_bb);
4353 /* Currently we support only unconditional gather loads,
4354 so mask should be all ones. */
4355 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4356 mask = build_int_cst (TREE_TYPE (masktype), -1);
4357 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4361 for (j = 0; j < 6; ++j)
4363 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4364 mask = build_real (TREE_TYPE (masktype), r);
4368 mask = build_vector_from_val (masktype, mask);
4369 mask = vect_init_vector (stmt, mask, masktype, NULL);
4371 scale = build_int_cst (scaletype, gather_scale);
4373 prev_stmt_info = NULL;
4374 for (j = 0; j < ncopies; ++j)
4376 if (modifier == WIDEN && (j & 1))
4377 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4378 perm_mask, stmt, gsi);
4381 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4384 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4386 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4388 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4389 == TYPE_VECTOR_SUBPARTS (idxtype));
4390 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4391 add_referenced_var (var);
4392 var = make_ssa_name (var, NULL);
4393 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4395 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4397 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4402 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4404 if (!useless_type_conversion_p (vectype, rettype))
4406 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4407 == TYPE_VECTOR_SUBPARTS (rettype));
4408 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4409 add_referenced_var (var);
4410 op = make_ssa_name (var, new_stmt);
4411 gimple_call_set_lhs (new_stmt, op);
4412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4413 var = make_ssa_name (vec_dest, NULL);
4414 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4416 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4421 var = make_ssa_name (vec_dest, new_stmt);
4422 gimple_call_set_lhs (new_stmt, var);
4425 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4427 if (modifier == NARROW)
4434 var = permute_vec_elements (prev_res, var,
4435 perm_mask, stmt, gsi);
4436 new_stmt = SSA_NAME_DEF_STMT (var);
4439 if (prev_stmt_info == NULL)
4440 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4442 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4443 prev_stmt_info = vinfo_for_stmt (new_stmt);
4450 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4452 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4453 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4454 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4456 /* Check if the chain of loads is already vectorized. */
4457 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4459 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4462 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4463 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4465 /* VEC_NUM is the number of vect stmts to be created for this group. */
4468 strided_load = false;
4469 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4470 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4474 vec_num = group_size;
4480 group_size = vec_num = 1;
4483 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4484 gcc_assert (alignment_support_scheme);
4485 /* Targets with load-lane instructions must not require explicit
4487 gcc_assert (!load_lanes_p
4488 || alignment_support_scheme == dr_aligned
4489 || alignment_support_scheme == dr_unaligned_supported);
4491 /* In case the vectorization factor (VF) is bigger than the number
4492 of elements that we can fit in a vectype (nunits), we have to generate
4493 more than one vector stmt - i.e - we need to "unroll" the
4494 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4495 from one copy of the vector stmt to the next, in the field
4496 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4497 stages to find the correct vector defs to be used when vectorizing
4498 stmts that use the defs of the current stmt. The example below
4499 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4500 need to create 4 vectorized stmts):
4502 before vectorization:
4503 RELATED_STMT VEC_STMT
4507 step 1: vectorize stmt S1:
4508 We first create the vector stmt VS1_0, and, as usual, record a
4509 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4510 Next, we create the vector stmt VS1_1, and record a pointer to
4511 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4512 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4514 RELATED_STMT VEC_STMT
4515 VS1_0: vx0 = memref0 VS1_1 -
4516 VS1_1: vx1 = memref1 VS1_2 -
4517 VS1_2: vx2 = memref2 VS1_3 -
4518 VS1_3: vx3 = memref3 - -
4519 S1: x = load - VS1_0
4522 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4523 information we recorded in RELATED_STMT field is used to vectorize
4526 /* In case of interleaving (non-unit strided access):
4533 Vectorized loads are created in the order of memory accesses
4534 starting from the access of the first stmt of the chain:
4537 VS2: vx1 = &base + vec_size*1
4538 VS3: vx3 = &base + vec_size*2
4539 VS4: vx4 = &base + vec_size*3
4541 Then permutation statements are generated:
4543 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4544 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4547 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4548 (the order of the data-refs in the output of vect_permute_load_chain
4549 corresponds to the order of scalar stmts in the interleaving chain - see
4550 the documentation of vect_permute_load_chain()).
4551 The generation of permutation stmts and recording them in
4552 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4554 In case of both multiple types and interleaving, the vector loads and
4555 permutation stmts above are created for every copy. The result vector
4556 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4557 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4559 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4560 on a target that supports unaligned accesses (dr_unaligned_supported)
4561 we generate the following code:
4565 p = p + indx * vectype_size;
4570 Otherwise, the data reference is potentially unaligned on a target that
4571 does not support unaligned accesses (dr_explicit_realign_optimized) -
4572 then generate the following code, in which the data in each iteration is
4573 obtained by two vector loads, one from the previous iteration, and one
4574 from the current iteration:
4576 msq_init = *(floor(p1))
4577 p2 = initial_addr + VS - 1;
4578 realignment_token = call target_builtin;
4581 p2 = p2 + indx * vectype_size
4583 vec_dest = realign_load (msq, lsq, realignment_token)
4588 /* If the misalignment remains the same throughout the execution of the
4589 loop, we can create the init_addr and permutation mask at the loop
4590 preheader. Otherwise, it needs to be created inside the loop.
4591 This can only occur when vectorizing memory accesses in the inner-loop
4592 nested within an outer-loop that is being vectorized. */
4594 if (loop && nested_in_vect_loop_p (loop, stmt)
4595 && (TREE_INT_CST_LOW (DR_STEP (dr))
4596 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4598 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4599 compute_in_loop = true;
4602 if ((alignment_support_scheme == dr_explicit_realign_optimized
4603 || alignment_support_scheme == dr_explicit_realign)
4604 && !compute_in_loop)
4606 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4607 alignment_support_scheme, NULL_TREE,
4609 if (alignment_support_scheme == dr_explicit_realign_optimized)
4611 phi = SSA_NAME_DEF_STMT (msq);
4612 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4619 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4622 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4624 aggr_type = vectype;
4626 prev_stmt_info = NULL;
4627 for (j = 0; j < ncopies; j++)
4629 /* 1. Create the vector or array pointer update chain. */
4631 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4632 offset, &dummy, gsi,
4633 &ptr_incr, false, &inv_p);
4635 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4636 TYPE_SIZE_UNIT (aggr_type));
4638 if (strided_load || slp_perm)
4639 dr_chain = VEC_alloc (tree, heap, vec_num);
4645 vec_array = create_vector_array (vectype, vec_num);
4648 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4649 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4650 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4651 gimple_call_set_lhs (new_stmt, vec_array);
4652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4653 mark_symbols_for_renaming (new_stmt);
4655 /* Extract each vector into an SSA_NAME. */
4656 for (i = 0; i < vec_num; i++)
4658 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4660 VEC_quick_push (tree, dr_chain, new_temp);
4663 /* Record the mapping between SSA_NAMEs and statements. */
4664 vect_record_strided_load_vectors (stmt, dr_chain);
4668 for (i = 0; i < vec_num; i++)
4671 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4674 /* 2. Create the vector-load in the loop. */
4675 switch (alignment_support_scheme)
4678 case dr_unaligned_supported:
4680 struct ptr_info_def *pi;
4682 = build2 (MEM_REF, vectype, dataref_ptr,
4683 build_int_cst (reference_alias_ptr_type
4684 (DR_REF (first_dr)), 0));
4685 pi = get_ptr_info (dataref_ptr);
4686 pi->align = TYPE_ALIGN_UNIT (vectype);
4687 if (alignment_support_scheme == dr_aligned)
4689 gcc_assert (aligned_access_p (first_dr));
4692 else if (DR_MISALIGNMENT (first_dr) == -1)
4694 TREE_TYPE (data_ref)
4695 = build_aligned_type (TREE_TYPE (data_ref),
4696 TYPE_ALIGN (elem_type));
4697 pi->align = TYPE_ALIGN_UNIT (elem_type);
4702 TREE_TYPE (data_ref)
4703 = build_aligned_type (TREE_TYPE (data_ref),
4704 TYPE_ALIGN (elem_type));
4705 pi->misalign = DR_MISALIGNMENT (first_dr);
4709 case dr_explicit_realign:
4714 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4716 if (compute_in_loop)
4717 msq = vect_setup_realignment (first_stmt, gsi,
4719 dr_explicit_realign,
4722 new_stmt = gimple_build_assign_with_ops
4723 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4725 (TREE_TYPE (dataref_ptr),
4726 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4727 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4728 gimple_assign_set_lhs (new_stmt, ptr);
4729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4731 = build2 (MEM_REF, vectype, ptr,
4732 build_int_cst (reference_alias_ptr_type
4733 (DR_REF (first_dr)), 0));
4734 vec_dest = vect_create_destination_var (scalar_dest,
4736 new_stmt = gimple_build_assign (vec_dest, data_ref);
4737 new_temp = make_ssa_name (vec_dest, new_stmt);
4738 gimple_assign_set_lhs (new_stmt, new_temp);
4739 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4740 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4741 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4744 bump = size_binop (MULT_EXPR, vs_minus_1,
4745 TYPE_SIZE_UNIT (elem_type));
4746 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4747 new_stmt = gimple_build_assign_with_ops
4748 (BIT_AND_EXPR, NULL_TREE, ptr,
4751 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4752 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4753 gimple_assign_set_lhs (new_stmt, ptr);
4754 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4756 = build2 (MEM_REF, vectype, ptr,
4757 build_int_cst (reference_alias_ptr_type
4758 (DR_REF (first_dr)), 0));
4761 case dr_explicit_realign_optimized:
4762 new_stmt = gimple_build_assign_with_ops
4763 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4765 (TREE_TYPE (dataref_ptr),
4766 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4767 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4769 gimple_assign_set_lhs (new_stmt, new_temp);
4770 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4772 = build2 (MEM_REF, vectype, new_temp,
4773 build_int_cst (reference_alias_ptr_type
4774 (DR_REF (first_dr)), 0));
4779 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4780 new_stmt = gimple_build_assign (vec_dest, data_ref);
4781 new_temp = make_ssa_name (vec_dest, new_stmt);
4782 gimple_assign_set_lhs (new_stmt, new_temp);
4783 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4784 mark_symbols_for_renaming (new_stmt);
4786 /* 3. Handle explicit realignment if necessary/supported.
4788 vec_dest = realign_load (msq, lsq, realignment_token) */
4789 if (alignment_support_scheme == dr_explicit_realign_optimized
4790 || alignment_support_scheme == dr_explicit_realign)
4792 lsq = gimple_assign_lhs (new_stmt);
4793 if (!realignment_token)
4794 realignment_token = dataref_ptr;
4795 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4797 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4800 new_temp = make_ssa_name (vec_dest, new_stmt);
4801 gimple_assign_set_lhs (new_stmt, new_temp);
4802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4804 if (alignment_support_scheme == dr_explicit_realign_optimized)
4807 if (i == vec_num - 1 && j == ncopies - 1)
4808 add_phi_arg (phi, lsq,
4809 loop_latch_edge (containing_loop),
4815 /* 4. Handle invariant-load. */
4816 if (inv_p && !bb_vinfo)
4819 gimple_stmt_iterator gsi2 = *gsi;
4820 gcc_assert (!strided_load);
4823 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4826 tem = fold_convert (TREE_TYPE (vectype), tem);
4827 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4831 vec_inv = build_vector_from_val (vectype, tem);
4832 new_temp = vect_init_vector (stmt, vec_inv,
4834 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4839 tree perm_mask = perm_mask_for_reverse (vectype);
4840 new_temp = permute_vec_elements (new_temp, new_temp,
4841 perm_mask, stmt, gsi);
4842 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4845 /* Collect vector loads and later create their permutation in
4846 vect_transform_strided_load (). */
4847 if (strided_load || slp_perm)
4848 VEC_quick_push (tree, dr_chain, new_temp);
4850 /* Store vector loads in the corresponding SLP_NODE. */
4851 if (slp && !slp_perm)
4852 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4857 if (slp && !slp_perm)
4862 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4863 slp_node_instance, false))
4865 VEC_free (tree, heap, dr_chain);
4874 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4875 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4880 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4882 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4883 prev_stmt_info = vinfo_for_stmt (new_stmt);
4887 VEC_free (tree, heap, dr_chain);
4893 /* Function vect_is_simple_cond.
4896 LOOP - the loop that is being vectorized.
4897 COND - Condition that is checked for simple use.
4900 *COMP_VECTYPE - the vector type for the comparison.
4902 Returns whether a COND can be vectorized. Checks whether
4903 condition operands are supportable using vec_is_simple_use. */
4906 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
4911 enum vect_def_type dt;
4912 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4914 if (!COMPARISON_CLASS_P (cond))
4917 lhs = TREE_OPERAND (cond, 0);
4918 rhs = TREE_OPERAND (cond, 1);
4920 if (TREE_CODE (lhs) == SSA_NAME)
4922 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4923 if (!vect_is_simple_use_1 (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
4927 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4928 && TREE_CODE (lhs) != FIXED_CST)
4931 if (TREE_CODE (rhs) == SSA_NAME)
4933 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4934 if (!vect_is_simple_use_1 (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
4938 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4939 && TREE_CODE (rhs) != FIXED_CST)
4942 *comp_vectype = vectype1 ? vectype1 : vectype2;
4946 /* vectorizable_condition.
4948 Check if STMT is conditional modify expression that can be vectorized.
4949 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4950 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4953 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4954 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4955 else caluse if it is 2).
4957 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4960 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4961 gimple *vec_stmt, tree reduc_def, int reduc_index,
4964 tree scalar_dest = NULL_TREE;
4965 tree vec_dest = NULL_TREE;
4966 tree cond_expr, then_clause, else_clause;
4967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4970 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4971 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4972 tree vec_compare, vec_cond_expr;
4974 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4976 enum vect_def_type dt, dts[4];
4977 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4979 enum tree_code code;
4980 stmt_vec_info prev_stmt_info = NULL;
4982 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4983 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4984 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
4986 if (slp_node || PURE_SLP_STMT (stmt_info))
4989 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4991 gcc_assert (ncopies >= 1);
4992 if (reduc_index && ncopies > 1)
4993 return false; /* FORNOW */
4995 if (reduc_index && STMT_SLP_TYPE (stmt_info))
4998 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5001 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5002 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5006 /* FORNOW: not yet supported. */
5007 if (STMT_VINFO_LIVE_P (stmt_info))
5009 if (vect_print_dump_info (REPORT_DETAILS))
5010 fprintf (vect_dump, "value used after loop.");
5014 /* Is vectorizable conditional operation? */
5015 if (!is_gimple_assign (stmt))
5018 code = gimple_assign_rhs_code (stmt);
5020 if (code != COND_EXPR)
5023 cond_expr = gimple_assign_rhs1 (stmt);
5024 then_clause = gimple_assign_rhs2 (stmt);
5025 else_clause = gimple_assign_rhs3 (stmt);
5027 if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo, &comp_vectype)
5031 if (TREE_CODE (then_clause) == SSA_NAME)
5033 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5034 if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
5035 &then_def_stmt, &def, &dt))
5038 else if (TREE_CODE (then_clause) != INTEGER_CST
5039 && TREE_CODE (then_clause) != REAL_CST
5040 && TREE_CODE (then_clause) != FIXED_CST)
5043 if (TREE_CODE (else_clause) == SSA_NAME)
5045 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5046 if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
5047 &else_def_stmt, &def, &dt))
5050 else if (TREE_CODE (else_clause) != INTEGER_CST
5051 && TREE_CODE (else_clause) != REAL_CST
5052 && TREE_CODE (else_clause) != FIXED_CST)
5057 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5058 return expand_vec_cond_expr_p (vectype, comp_vectype);
5065 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5066 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5067 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5068 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5072 scalar_dest = gimple_assign_lhs (stmt);
5073 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5075 /* Handle cond expr. */
5076 for (j = 0; j < ncopies; j++)
5078 gimple new_stmt = NULL;
5083 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5084 VEC (slp_void_p, heap) *vec_defs;
5086 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5087 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5088 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5089 VEC_safe_push (tree, heap, ops, then_clause);
5090 VEC_safe_push (tree, heap, ops, else_clause);
5091 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5092 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5093 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5094 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5095 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5097 VEC_free (tree, heap, ops);
5098 VEC_free (slp_void_p, heap, vec_defs);
5104 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5106 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
5107 NULL, >emp, &def, &dts[0]);
5110 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5112 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5113 NULL, >emp, &def, &dts[1]);
5114 if (reduc_index == 1)
5115 vec_then_clause = reduc_def;
5118 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5120 vect_is_simple_use (then_clause, loop_vinfo,
5121 NULL, >emp, &def, &dts[2]);
5123 if (reduc_index == 2)
5124 vec_else_clause = reduc_def;
5127 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5129 vect_is_simple_use (else_clause, loop_vinfo,
5130 NULL, >emp, &def, &dts[3]);
5136 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5137 VEC_pop (tree, vec_oprnds0));
5138 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5139 VEC_pop (tree, vec_oprnds1));
5140 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5141 VEC_pop (tree, vec_oprnds2));
5142 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5143 VEC_pop (tree, vec_oprnds3));
5148 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5149 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5150 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5151 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5154 /* Arguments are ready. Create the new vector stmt. */
5155 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5157 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5158 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5159 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5161 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5162 vec_cond_lhs, vec_cond_rhs);
5163 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5164 vec_compare, vec_then_clause, vec_else_clause);
5166 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5167 new_temp = make_ssa_name (vec_dest, new_stmt);
5168 gimple_assign_set_lhs (new_stmt, new_temp);
5169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5171 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5178 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5180 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5182 prev_stmt_info = vinfo_for_stmt (new_stmt);
5185 VEC_free (tree, heap, vec_oprnds0);
5186 VEC_free (tree, heap, vec_oprnds1);
5187 VEC_free (tree, heap, vec_oprnds2);
5188 VEC_free (tree, heap, vec_oprnds3);
5194 /* Make sure the statement is vectorizable. */
5197 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5200 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5201 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5203 tree scalar_type, vectype;
5204 gimple pattern_stmt, pattern_def_stmt;
5206 if (vect_print_dump_info (REPORT_DETAILS))
5208 fprintf (vect_dump, "==> examining statement: ");
5209 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5212 if (gimple_has_volatile_ops (stmt))
5214 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5215 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5220 /* Skip stmts that do not need to be vectorized. In loops this is expected
5222 - the COND_EXPR which is the loop exit condition
5223 - any LABEL_EXPRs in the loop
5224 - computations that are used only for array indexing or loop control.
5225 In basic blocks we only analyze statements that are a part of some SLP
5226 instance, therefore, all the statements are relevant.
5228 Pattern statement needs to be analyzed instead of the original statement
5229 if the original statement is not relevant. Otherwise, we analyze both
5232 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5233 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5234 && !STMT_VINFO_LIVE_P (stmt_info))
5236 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5238 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5239 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5241 /* Analyze PATTERN_STMT instead of the original stmt. */
5242 stmt = pattern_stmt;
5243 stmt_info = vinfo_for_stmt (pattern_stmt);
5244 if (vect_print_dump_info (REPORT_DETAILS))
5246 fprintf (vect_dump, "==> examining pattern statement: ");
5247 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5252 if (vect_print_dump_info (REPORT_DETAILS))
5253 fprintf (vect_dump, "irrelevant.");
5258 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5260 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5261 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5263 /* Analyze PATTERN_STMT too. */
5264 if (vect_print_dump_info (REPORT_DETAILS))
5266 fprintf (vect_dump, "==> examining pattern statement: ");
5267 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5270 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5274 if (is_pattern_stmt_p (stmt_info)
5275 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5276 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5277 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5279 /* Analyze def stmt of STMT if it's a pattern stmt. */
5280 if (vect_print_dump_info (REPORT_DETAILS))
5282 fprintf (vect_dump, "==> examining pattern def statement: ");
5283 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5286 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5291 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5293 case vect_internal_def:
5296 case vect_reduction_def:
5297 case vect_nested_cycle:
5298 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5299 || relevance == vect_used_in_outer_by_reduction
5300 || relevance == vect_unused_in_scope));
5303 case vect_induction_def:
5304 case vect_constant_def:
5305 case vect_external_def:
5306 case vect_unknown_def_type:
5313 gcc_assert (PURE_SLP_STMT (stmt_info));
5315 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5316 if (vect_print_dump_info (REPORT_DETAILS))
5318 fprintf (vect_dump, "get vectype for scalar type: ");
5319 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5322 vectype = get_vectype_for_scalar_type (scalar_type);
5325 if (vect_print_dump_info (REPORT_DETAILS))
5327 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5328 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5333 if (vect_print_dump_info (REPORT_DETAILS))
5335 fprintf (vect_dump, "vectype: ");
5336 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5339 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5342 if (STMT_VINFO_RELEVANT_P (stmt_info))
5344 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5345 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5346 *need_to_vectorize = true;
5351 && (STMT_VINFO_RELEVANT_P (stmt_info)
5352 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5353 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5354 || vectorizable_shift (stmt, NULL, NULL, NULL)
5355 || vectorizable_operation (stmt, NULL, NULL, NULL)
5356 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5357 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5358 || vectorizable_call (stmt, NULL, NULL, NULL)
5359 || vectorizable_store (stmt, NULL, NULL, NULL)
5360 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5361 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5365 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5366 || vectorizable_shift (stmt, NULL, NULL, node)
5367 || vectorizable_operation (stmt, NULL, NULL, node)
5368 || vectorizable_assignment (stmt, NULL, NULL, node)
5369 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5370 || vectorizable_call (stmt, NULL, NULL, node)
5371 || vectorizable_store (stmt, NULL, NULL, node)
5372 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5377 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5379 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5380 fprintf (vect_dump, "supported: ");
5381 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5390 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5391 need extra handling, except for vectorizable reductions. */
5392 if (STMT_VINFO_LIVE_P (stmt_info)
5393 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5394 ok = vectorizable_live_operation (stmt, NULL, NULL);
5398 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5400 fprintf (vect_dump, "not vectorized: live stmt not ");
5401 fprintf (vect_dump, "supported: ");
5402 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5412 /* Function vect_transform_stmt.
5414 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5417 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5418 bool *strided_store, slp_tree slp_node,
5419 slp_instance slp_node_instance)
5421 bool is_store = false;
5422 gimple vec_stmt = NULL;
5423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5426 switch (STMT_VINFO_TYPE (stmt_info))
5428 case type_demotion_vec_info_type:
5429 case type_promotion_vec_info_type:
5430 case type_conversion_vec_info_type:
5431 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5435 case induc_vec_info_type:
5436 gcc_assert (!slp_node);
5437 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5441 case shift_vec_info_type:
5442 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5446 case op_vec_info_type:
5447 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5451 case assignment_vec_info_type:
5452 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5456 case load_vec_info_type:
5457 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5462 case store_vec_info_type:
5463 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5465 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5467 /* In case of interleaving, the whole chain is vectorized when the
5468 last store in the chain is reached. Store stmts before the last
5469 one are skipped, and there vec_stmt_info shouldn't be freed
5471 *strided_store = true;
5472 if (STMT_VINFO_VEC_STMT (stmt_info))
5479 case condition_vec_info_type:
5480 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5484 case call_vec_info_type:
5485 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5486 stmt = gsi_stmt (*gsi);
5489 case reduc_vec_info_type:
5490 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5495 if (!STMT_VINFO_LIVE_P (stmt_info))
5497 if (vect_print_dump_info (REPORT_DETAILS))
5498 fprintf (vect_dump, "stmt not supported.");
5503 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5504 is being vectorized, but outside the immediately enclosing loop. */
5506 && STMT_VINFO_LOOP_VINFO (stmt_info)
5507 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5508 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5509 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5510 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5511 || STMT_VINFO_RELEVANT (stmt_info) ==
5512 vect_used_in_outer_by_reduction))
5514 struct loop *innerloop = LOOP_VINFO_LOOP (
5515 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5516 imm_use_iterator imm_iter;
5517 use_operand_p use_p;
5521 if (vect_print_dump_info (REPORT_DETAILS))
5522 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5524 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5525 (to be used when vectorizing outer-loop stmts that use the DEF of
5527 if (gimple_code (stmt) == GIMPLE_PHI)
5528 scalar_dest = PHI_RESULT (stmt);
5530 scalar_dest = gimple_assign_lhs (stmt);
5532 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5534 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5536 exit_phi = USE_STMT (use_p);
5537 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5542 /* Handle stmts whose DEF is used outside the loop-nest that is
5543 being vectorized. */
5544 if (STMT_VINFO_LIVE_P (stmt_info)
5545 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5547 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5552 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5558 /* Remove a group of stores (for SLP or interleaving), free their
5562 vect_remove_stores (gimple first_stmt)
5564 gimple next = first_stmt;
5566 gimple_stmt_iterator next_si;
5570 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5572 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5573 if (is_pattern_stmt_p (stmt_info))
5574 next = STMT_VINFO_RELATED_STMT (stmt_info);
5575 /* Free the attached stmt_vec_info and remove the stmt. */
5576 next_si = gsi_for_stmt (next);
5577 gsi_remove (&next_si, true);
5578 free_stmt_vec_info (next);
5584 /* Function new_stmt_vec_info.
5586 Create and initialize a new stmt_vec_info struct for STMT. */
5589 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5590 bb_vec_info bb_vinfo)
5593 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5595 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5596 STMT_VINFO_STMT (res) = stmt;
5597 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5598 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5599 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5600 STMT_VINFO_LIVE_P (res) = false;
5601 STMT_VINFO_VECTYPE (res) = NULL;
5602 STMT_VINFO_VEC_STMT (res) = NULL;
5603 STMT_VINFO_VECTORIZABLE (res) = true;
5604 STMT_VINFO_IN_PATTERN_P (res) = false;
5605 STMT_VINFO_RELATED_STMT (res) = NULL;
5606 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5607 STMT_VINFO_DATA_REF (res) = NULL;
5609 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5610 STMT_VINFO_DR_OFFSET (res) = NULL;
5611 STMT_VINFO_DR_INIT (res) = NULL;
5612 STMT_VINFO_DR_STEP (res) = NULL;
5613 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5615 if (gimple_code (stmt) == GIMPLE_PHI
5616 && is_loop_header_bb_p (gimple_bb (stmt)))
5617 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5619 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5621 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5622 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5623 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5624 STMT_SLP_TYPE (res) = loop_vect;
5625 GROUP_FIRST_ELEMENT (res) = NULL;
5626 GROUP_NEXT_ELEMENT (res) = NULL;
5627 GROUP_SIZE (res) = 0;
5628 GROUP_STORE_COUNT (res) = 0;
5629 GROUP_GAP (res) = 0;
5630 GROUP_SAME_DR_STMT (res) = NULL;
5631 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5637 /* Create a hash table for stmt_vec_info. */
5640 init_stmt_vec_info_vec (void)
5642 gcc_assert (!stmt_vec_info_vec);
5643 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5647 /* Free hash table for stmt_vec_info. */
5650 free_stmt_vec_info_vec (void)
5652 gcc_assert (stmt_vec_info_vec);
5653 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5657 /* Free stmt vectorization related info. */
5660 free_stmt_vec_info (gimple stmt)
5662 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5667 /* Check if this statement has a related "pattern stmt"
5668 (introduced by the vectorizer during the pattern recognition
5669 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5671 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5673 stmt_vec_info patt_info
5674 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5677 if (STMT_VINFO_PATTERN_DEF_STMT (patt_info))
5678 free_stmt_vec_info (STMT_VINFO_PATTERN_DEF_STMT (patt_info));
5679 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5683 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5684 set_vinfo_for_stmt (stmt, NULL);
5689 /* Function get_vectype_for_scalar_type_and_size.
5691 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5695 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5697 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5698 enum machine_mode simd_mode;
5699 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5706 /* We can't build a vector type of elements with alignment bigger than
5708 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5711 /* For vector types of elements whose mode precision doesn't
5712 match their types precision we use a element type of mode
5713 precision. The vectorization routines will have to make sure
5714 they support the proper result truncation/extension. */
5715 if (INTEGRAL_TYPE_P (scalar_type)
5716 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5717 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5718 TYPE_UNSIGNED (scalar_type));
5720 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5721 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5724 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5725 When the component mode passes the above test simply use a type
5726 corresponding to that mode. The theory is that any use that
5727 would cause problems with this will disable vectorization anyway. */
5728 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5729 && !INTEGRAL_TYPE_P (scalar_type)
5730 && !POINTER_TYPE_P (scalar_type))
5731 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5733 /* If no size was supplied use the mode the target prefers. Otherwise
5734 lookup a vector mode of the specified size. */
5736 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5738 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5739 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5743 vectype = build_vector_type (scalar_type, nunits);
5744 if (vect_print_dump_info (REPORT_DETAILS))
5746 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5747 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5753 if (vect_print_dump_info (REPORT_DETAILS))
5755 fprintf (vect_dump, "vectype: ");
5756 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5759 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5760 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5762 if (vect_print_dump_info (REPORT_DETAILS))
5763 fprintf (vect_dump, "mode not supported by target.");
5770 unsigned int current_vector_size;
5772 /* Function get_vectype_for_scalar_type.
5774 Returns the vector type corresponding to SCALAR_TYPE as supported
5778 get_vectype_for_scalar_type (tree scalar_type)
5781 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5782 current_vector_size);
5784 && current_vector_size == 0)
5785 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5789 /* Function get_same_sized_vectype
5791 Returns a vector type corresponding to SCALAR_TYPE of size
5792 VECTOR_TYPE if supported by the target. */
5795 get_same_sized_vectype (tree scalar_type, tree vector_type)
5797 return get_vectype_for_scalar_type_and_size
5798 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5801 /* Function vect_is_simple_use.
5804 LOOP_VINFO - the vect info of the loop that is being vectorized.
5805 BB_VINFO - the vect info of the basic block that is being vectorized.
5806 OPERAND - operand of a stmt in the loop or bb.
5807 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5809 Returns whether a stmt with OPERAND can be vectorized.
5810 For loops, supportable operands are constants, loop invariants, and operands
5811 that are defined by the current iteration of the loop. Unsupportable
5812 operands are those that are defined by a previous iteration of the loop (as
5813 is the case in reduction/induction computations).
5814 For basic blocks, supportable operands are constants and bb invariants.
5815 For now, operands defined outside the basic block are not supported. */
5818 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5819 bb_vec_info bb_vinfo, gimple *def_stmt,
5820 tree *def, enum vect_def_type *dt)
5823 stmt_vec_info stmt_vinfo;
5824 struct loop *loop = NULL;
5827 loop = LOOP_VINFO_LOOP (loop_vinfo);
5832 if (vect_print_dump_info (REPORT_DETAILS))
5834 fprintf (vect_dump, "vect_is_simple_use: operand ");
5835 print_generic_expr (vect_dump, operand, TDF_SLIM);
5838 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5840 *dt = vect_constant_def;
5844 if (is_gimple_min_invariant (operand))
5847 *dt = vect_external_def;
5851 if (TREE_CODE (operand) == PAREN_EXPR)
5853 if (vect_print_dump_info (REPORT_DETAILS))
5854 fprintf (vect_dump, "non-associatable copy.");
5855 operand = TREE_OPERAND (operand, 0);
5858 if (TREE_CODE (operand) != SSA_NAME)
5860 if (vect_print_dump_info (REPORT_DETAILS))
5861 fprintf (vect_dump, "not ssa-name.");
5865 *def_stmt = SSA_NAME_DEF_STMT (operand);
5866 if (*def_stmt == NULL)
5868 if (vect_print_dump_info (REPORT_DETAILS))
5869 fprintf (vect_dump, "no def_stmt.");
5873 if (vect_print_dump_info (REPORT_DETAILS))
5875 fprintf (vect_dump, "def_stmt: ");
5876 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5879 /* Empty stmt is expected only in case of a function argument.
5880 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5881 if (gimple_nop_p (*def_stmt))
5884 *dt = vect_external_def;
5888 bb = gimple_bb (*def_stmt);
5890 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5891 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5892 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5893 *dt = vect_external_def;
5896 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5897 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5900 if (*dt == vect_unknown_def_type)
5902 if (vect_print_dump_info (REPORT_DETAILS))
5903 fprintf (vect_dump, "Unsupported pattern.");
5907 if (vect_print_dump_info (REPORT_DETAILS))
5908 fprintf (vect_dump, "type of def: %d.",*dt);
5910 switch (gimple_code (*def_stmt))
5913 *def = gimple_phi_result (*def_stmt);
5917 *def = gimple_assign_lhs (*def_stmt);
5921 *def = gimple_call_lhs (*def_stmt);
5926 if (vect_print_dump_info (REPORT_DETAILS))
5927 fprintf (vect_dump, "unsupported defining stmt: ");
5934 /* Function vect_is_simple_use_1.
5936 Same as vect_is_simple_use_1 but also determines the vector operand
5937 type of OPERAND and stores it to *VECTYPE. If the definition of
5938 OPERAND is vect_uninitialized_def, vect_constant_def or
5939 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5940 is responsible to compute the best suited vector type for the
5944 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5945 bb_vec_info bb_vinfo, gimple *def_stmt,
5946 tree *def, enum vect_def_type *dt, tree *vectype)
5948 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5951 /* Now get a vector type if the def is internal, otherwise supply
5952 NULL_TREE and leave it up to the caller to figure out a proper
5953 type for the use stmt. */
5954 if (*dt == vect_internal_def
5955 || *dt == vect_induction_def
5956 || *dt == vect_reduction_def
5957 || *dt == vect_double_reduction_def
5958 || *dt == vect_nested_cycle)
5960 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5962 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5963 && !STMT_VINFO_RELEVANT (stmt_info)
5964 && !STMT_VINFO_LIVE_P (stmt_info))
5965 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5967 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5968 gcc_assert (*vectype != NULL_TREE);
5970 else if (*dt == vect_uninitialized_def
5971 || *dt == vect_constant_def
5972 || *dt == vect_external_def)
5973 *vectype = NULL_TREE;
5981 /* Function supportable_widening_operation
5983 Check whether an operation represented by the code CODE is a
5984 widening operation that is supported by the target platform in
5985 vector form (i.e., when operating on arguments of type VECTYPE_IN
5986 producing a result of type VECTYPE_OUT).
5988 Widening operations we currently support are NOP (CONVERT), FLOAT
5989 and WIDEN_MULT. This function checks if these operations are supported
5990 by the target platform either directly (via vector tree-codes), or via
5994 - CODE1 and CODE2 are codes of vector operations to be used when
5995 vectorizing the operation, if available.
5996 - DECL1 and DECL2 are decls of target builtin functions to be used
5997 when vectorizing the operation, if available. In this case,
5998 CODE1 and CODE2 are CALL_EXPR.
5999 - MULTI_STEP_CVT determines the number of required intermediate steps in
6000 case of multi-step conversion (like char->short->int - in that case
6001 MULTI_STEP_CVT will be 1).
6002 - INTERM_TYPES contains the intermediate type required to perform the
6003 widening operation (short in the above example). */
6006 supportable_widening_operation (enum tree_code code, gimple stmt,
6007 tree vectype_out, tree vectype_in,
6008 tree *decl1, tree *decl2,
6009 enum tree_code *code1, enum tree_code *code2,
6010 int *multi_step_cvt,
6011 VEC (tree, heap) **interm_types)
6013 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6014 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6015 struct loop *vect_loop = NULL;
6017 enum machine_mode vec_mode;
6018 enum insn_code icode1, icode2;
6019 optab optab1, optab2;
6020 tree vectype = vectype_in;
6021 tree wide_vectype = vectype_out;
6022 enum tree_code c1, c2;
6024 tree prev_type, intermediate_type;
6025 enum machine_mode intermediate_mode, prev_mode;
6026 optab optab3, optab4;
6028 *multi_step_cvt = 0;
6030 vect_loop = LOOP_VINFO_LOOP (loop_info);
6032 /* The result of a vectorized widening operation usually requires two vectors
6033 (because the widened results do not fit into one vector). The generated
6034 vector results would normally be expected to be generated in the same
6035 order as in the original scalar computation, i.e. if 8 results are
6036 generated in each vector iteration, they are to be organized as follows:
6037 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6039 However, in the special case that the result of the widening operation is
6040 used in a reduction computation only, the order doesn't matter (because
6041 when vectorizing a reduction we change the order of the computation).
6042 Some targets can take advantage of this and generate more efficient code.
6043 For example, targets like Altivec, that support widen_mult using a sequence
6044 of {mult_even,mult_odd} generate the following vectors:
6045 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6047 When vectorizing outer-loops, we execute the inner-loop sequentially
6048 (each vectorized inner-loop iteration contributes to VF outer-loop
6049 iterations in parallel). We therefore don't allow to change the order
6050 of the computation in the inner-loop during outer-loop vectorization. */
6053 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6054 && !nested_in_vect_loop_p (vect_loop, stmt))
6060 && code == WIDEN_MULT_EXPR
6061 && targetm.vectorize.builtin_mul_widen_even
6062 && targetm.vectorize.builtin_mul_widen_even (vectype)
6063 && targetm.vectorize.builtin_mul_widen_odd
6064 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6066 if (vect_print_dump_info (REPORT_DETAILS))
6067 fprintf (vect_dump, "Unordered widening operation detected.");
6069 *code1 = *code2 = CALL_EXPR;
6070 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6071 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6077 case WIDEN_MULT_EXPR:
6078 c1 = VEC_WIDEN_MULT_LO_EXPR;
6079 c2 = VEC_WIDEN_MULT_HI_EXPR;
6082 case WIDEN_LSHIFT_EXPR:
6083 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6084 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6088 c1 = VEC_UNPACK_LO_EXPR;
6089 c2 = VEC_UNPACK_HI_EXPR;
6093 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6094 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6097 case FIX_TRUNC_EXPR:
6098 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6099 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6100 computing the operation. */
6107 if (BYTES_BIG_ENDIAN)
6109 enum tree_code ctmp = c1;
6114 if (code == FIX_TRUNC_EXPR)
6116 /* The signedness is determined from output operand. */
6117 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6118 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6122 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6123 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6126 if (!optab1 || !optab2)
6129 vec_mode = TYPE_MODE (vectype);
6130 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6131 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6137 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6138 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6141 /* Check if it's a multi-step conversion that can be done using intermediate
6144 prev_type = vectype;
6145 prev_mode = vec_mode;
6147 if (!CONVERT_EXPR_CODE_P (code))
6150 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6151 intermediate steps in promotion sequence. We try
6152 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6154 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6155 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6157 intermediate_mode = insn_data[icode1].operand[0].mode;
6159 = lang_hooks.types.type_for_mode (intermediate_mode,
6160 TYPE_UNSIGNED (prev_type));
6161 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6162 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6164 if (!optab3 || !optab4
6165 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6166 || insn_data[icode1].operand[0].mode != intermediate_mode
6167 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6168 || insn_data[icode2].operand[0].mode != intermediate_mode
6169 || ((icode1 = optab_handler (optab3, intermediate_mode))
6170 == CODE_FOR_nothing)
6171 || ((icode2 = optab_handler (optab4, intermediate_mode))
6172 == CODE_FOR_nothing))
6175 VEC_quick_push (tree, *interm_types, intermediate_type);
6176 (*multi_step_cvt)++;
6178 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6179 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6182 prev_type = intermediate_type;
6183 prev_mode = intermediate_mode;
6186 VEC_free (tree, heap, *interm_types);
6191 /* Function supportable_narrowing_operation
6193 Check whether an operation represented by the code CODE is a
6194 narrowing operation that is supported by the target platform in
6195 vector form (i.e., when operating on arguments of type VECTYPE_IN
6196 and producing a result of type VECTYPE_OUT).
6198 Narrowing operations we currently support are NOP (CONVERT) and
6199 FIX_TRUNC. This function checks if these operations are supported by
6200 the target platform directly via vector tree-codes.
6203 - CODE1 is the code of a vector operation to be used when
6204 vectorizing the operation, if available.
6205 - MULTI_STEP_CVT determines the number of required intermediate steps in
6206 case of multi-step conversion (like int->short->char - in that case
6207 MULTI_STEP_CVT will be 1).
6208 - INTERM_TYPES contains the intermediate type required to perform the
6209 narrowing operation (short in the above example). */
6212 supportable_narrowing_operation (enum tree_code code,
6213 tree vectype_out, tree vectype_in,
6214 enum tree_code *code1, int *multi_step_cvt,
6215 VEC (tree, heap) **interm_types)
6217 enum machine_mode vec_mode;
6218 enum insn_code icode1;
6219 optab optab1, interm_optab;
6220 tree vectype = vectype_in;
6221 tree narrow_vectype = vectype_out;
6223 tree intermediate_type;
6224 enum machine_mode intermediate_mode, prev_mode;
6228 *multi_step_cvt = 0;
6232 c1 = VEC_PACK_TRUNC_EXPR;
6235 case FIX_TRUNC_EXPR:
6236 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6240 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6241 tree code and optabs used for computing the operation. */
6248 if (code == FIX_TRUNC_EXPR)
6249 /* The signedness is determined from output operand. */
6250 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6252 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6257 vec_mode = TYPE_MODE (vectype);
6258 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6263 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6266 /* Check if it's a multi-step conversion that can be done using intermediate
6268 prev_mode = vec_mode;
6269 if (code == FIX_TRUNC_EXPR)
6270 uns = TYPE_UNSIGNED (vectype_out);
6272 uns = TYPE_UNSIGNED (vectype);
6274 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6275 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6276 costly than signed. */
6277 if (code == FIX_TRUNC_EXPR && uns)
6279 enum insn_code icode2;
6282 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6284 = optab_for_tree_code (c1, intermediate_type, optab_default);
6285 if (interm_optab != NULL
6286 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6287 && insn_data[icode1].operand[0].mode
6288 == insn_data[icode2].operand[0].mode)
6291 optab1 = interm_optab;
6296 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6297 intermediate steps in promotion sequence. We try
6298 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6299 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6300 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6302 intermediate_mode = insn_data[icode1].operand[0].mode;
6304 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6306 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6309 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6310 || insn_data[icode1].operand[0].mode != intermediate_mode
6311 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6312 == CODE_FOR_nothing))
6315 VEC_quick_push (tree, *interm_types, intermediate_type);
6316 (*multi_step_cvt)++;
6318 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6321 prev_mode = intermediate_mode;
6322 optab1 = interm_optab;
6325 VEC_free (tree, heap, *interm_types);