1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
337 Generally, LIVE_P and RELEVANT are used to define the liveness and
338 relevance info of the DEF_STMT of this USE:
339 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
340 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
342 - case 1: If USE is used only for address computations (e.g. array indexing),
343 which does not need to be directly vectorized, then the liveness/relevance
344 of the respective DEF_STMT is left unchanged.
345 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
346 skip DEF_STMT cause it had already been processed.
347 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
348 be modified accordingly.
350 Return true if everything is as expected. Return false otherwise. */
353 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
354 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
358 stmt_vec_info dstmt_vinfo;
359 basic_block bb, def_bb;
362 enum vect_def_type dt;
364 /* case 1: we are only interested in uses that need to be vectorized. Uses
365 that are used for address computation are not considered relevant. */
366 if (!exist_non_indexing_operands_for_use_p (use, stmt))
369 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
372 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 if (!def_stmt || gimple_nop_p (def_stmt))
379 def_bb = gimple_bb (def_stmt);
380 if (!flow_bb_inside_loop_p (loop, def_bb))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump, "def_stmt is out of loop.");
387 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
388 DEF_STMT must have already been processed, because this should be the
389 only way that STMT, which is a reduction-phi, was put in the worklist,
390 as there should be no other uses for DEF_STMT in the loop. So we just
391 check that everything is as expected, and we are done. */
392 dstmt_vinfo = vinfo_for_stmt (def_stmt);
393 bb = gimple_bb (stmt);
394 if (gimple_code (stmt) == GIMPLE_PHI
395 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
396 && gimple_code (def_stmt) != GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
398 && bb->loop_father == def_bb->loop_father)
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
402 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
403 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
404 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
405 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
406 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 /* case 3a: outer-loop stmt defining an inner-loop stmt:
411 outer-loop-header-bb:
417 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
419 if (vect_print_dump_info (REPORT_DETAILS))
420 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424 case vect_unused_in_scope:
425 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
426 vect_used_in_scope : vect_unused_in_scope;
429 case vect_used_in_outer_by_reduction:
430 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
431 relevant = vect_used_by_reduction;
434 case vect_used_in_outer:
435 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
436 relevant = vect_used_in_scope;
439 case vect_used_in_scope:
447 /* case 3b: inner-loop stmt defining an outer-loop stmt:
448 outer-loop-header-bb:
452 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
454 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
456 if (vect_print_dump_info (REPORT_DETAILS))
457 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461 case vect_unused_in_scope:
462 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
463 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
464 vect_used_in_outer_by_reduction : vect_unused_in_scope;
467 case vect_used_by_reduction:
468 relevant = vect_used_in_outer_by_reduction;
471 case vect_used_in_scope:
472 relevant = vect_used_in_outer;
480 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
481 is_pattern_stmt_p (stmt_vinfo));
486 /* Function vect_mark_stmts_to_be_vectorized.
488 Not all stmts in the loop need to be vectorized. For example:
497 Stmt 1 and 3 do not need to be vectorized, because loop control and
498 addressing of vectorized data-refs are handled differently.
500 This pass detects such stmts. */
503 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
505 VEC(gimple,heap) *worklist;
506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
507 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
508 unsigned int nbbs = loop->num_nodes;
509 gimple_stmt_iterator si;
512 stmt_vec_info stmt_vinfo;
516 enum vect_relevant relevant, tmp_relevant;
517 enum vect_def_type def_type;
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
522 worklist = VEC_alloc (gimple, heap, 64);
524 /* 1. Init worklist. */
525 for (i = 0; i < nbbs; i++)
528 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
531 if (vect_print_dump_info (REPORT_DETAILS))
533 fprintf (vect_dump, "init: phi relevant? ");
534 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
537 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
538 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
540 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
542 stmt = gsi_stmt (si);
543 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump, "init: stmt relevant? ");
546 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
549 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
550 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
554 /* 2. Process_worklist */
555 while (VEC_length (gimple, worklist) > 0)
560 stmt = VEC_pop (gimple, worklist);
561 if (vect_print_dump_info (REPORT_DETAILS))
563 fprintf (vect_dump, "worklist: examine stmt: ");
564 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
567 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
568 (DEF_STMT) as relevant/irrelevant and live/dead according to the
569 liveness and relevance properties of STMT. */
570 stmt_vinfo = vinfo_for_stmt (stmt);
571 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
572 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
574 /* Generally, the liveness and relevance properties of STMT are
575 propagated as is to the DEF_STMTs of its USEs:
576 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
577 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
579 One exception is when STMT has been identified as defining a reduction
580 variable; in this case we set the liveness/relevance as follows:
582 relevant = vect_used_by_reduction
583 This is because we distinguish between two kinds of relevant stmts -
584 those that are used by a reduction computation, and those that are
585 (also) used by a regular computation. This allows us later on to
586 identify stmts that are used solely by a reduction, and therefore the
587 order of the results that they produce does not have to be kept. */
589 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
590 tmp_relevant = relevant;
593 case vect_reduction_def:
594 switch (tmp_relevant)
596 case vect_unused_in_scope:
597 relevant = vect_used_by_reduction;
600 case vect_used_by_reduction:
601 if (gimple_code (stmt) == GIMPLE_PHI)
606 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "unsupported use of reduction.");
609 VEC_free (gimple, heap, worklist);
616 case vect_nested_cycle:
617 if (tmp_relevant != vect_unused_in_scope
618 && tmp_relevant != vect_used_in_outer_by_reduction
619 && tmp_relevant != vect_used_in_outer)
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "unsupported use of nested cycle.");
624 VEC_free (gimple, heap, worklist);
631 case vect_double_reduction_def:
632 if (tmp_relevant != vect_unused_in_scope
633 && tmp_relevant != vect_used_by_reduction)
635 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "unsupported use of double reduction.");
638 VEC_free (gimple, heap, worklist);
649 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
651 /* Pattern statements are not inserted into the code, so
652 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
653 have to scan the RHS or function arguments instead. */
654 if (is_gimple_assign (stmt))
656 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
657 tree op = gimple_assign_rhs1 (stmt);
660 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
662 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
663 live_p, relevant, &worklist)
664 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
665 live_p, relevant, &worklist))
667 VEC_free (gimple, heap, worklist);
672 for (; i < gimple_num_ops (stmt); i++)
674 op = gimple_op (stmt, i);
675 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
678 VEC_free (gimple, heap, worklist);
683 else if (is_gimple_call (stmt))
685 for (i = 0; i < gimple_call_num_args (stmt); i++)
687 tree arg = gimple_call_arg (stmt, i);
688 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
691 VEC_free (gimple, heap, worklist);
698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
700 tree op = USE_FROM_PTR (use_p);
701 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
704 VEC_free (gimple, heap, worklist);
708 } /* while worklist */
710 VEC_free (gimple, heap, worklist);
715 /* Get cost by calling cost target builtin. */
718 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
720 tree dummy_type = NULL;
723 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
728 /* Get cost for STMT. */
731 cost_for_stmt (gimple stmt)
733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
735 switch (STMT_VINFO_TYPE (stmt_info))
737 case load_vec_info_type:
738 return vect_get_stmt_cost (scalar_load);
739 case store_vec_info_type:
740 return vect_get_stmt_cost (scalar_store);
741 case op_vec_info_type:
742 case condition_vec_info_type:
743 case assignment_vec_info_type:
744 case reduc_vec_info_type:
745 case induc_vec_info_type:
746 case type_promotion_vec_info_type:
747 case type_demotion_vec_info_type:
748 case type_conversion_vec_info_type:
749 case call_vec_info_type:
750 return vect_get_stmt_cost (scalar_stmt);
751 case undef_vec_info_type:
757 /* Function vect_model_simple_cost.
759 Models cost for simple operations, i.e. those that only emit ncopies of a
760 single op. Right now, this does not account for multiple insns that could
761 be generated for the single vector op. We will handle that shortly. */
764 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
765 enum vect_def_type *dt, slp_tree slp_node)
768 int inside_cost = 0, outside_cost = 0;
770 /* The SLP costs were already calculated during SLP tree build. */
771 if (PURE_SLP_STMT (stmt_info))
774 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
776 /* FORNOW: Assuming maximum 2 args per stmts. */
777 for (i = 0; i < 2; i++)
779 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
780 outside_cost += vect_get_stmt_cost (vector_stmt);
783 if (vect_print_dump_info (REPORT_COST))
784 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
785 "outside_cost = %d .", inside_cost, outside_cost);
787 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
788 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
789 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
793 /* Function vect_cost_strided_group_size
795 For strided load or store, return the group_size only if it is the first
796 load or store of a group, else return 1. This ensures that group size is
797 only returned once per group. */
800 vect_cost_strided_group_size (stmt_vec_info stmt_info)
802 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
804 if (first_stmt == STMT_VINFO_STMT (stmt_info))
805 return GROUP_SIZE (stmt_info);
811 /* Function vect_model_store_cost
813 Models cost for stores. In the case of strided accesses, one access
814 has the overhead of the strided access attributed to it. */
817 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
818 bool store_lanes_p, enum vect_def_type dt,
822 unsigned int inside_cost = 0, outside_cost = 0;
823 struct data_reference *first_dr;
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
830 if (dt == vect_constant_def || dt == vect_external_def)
831 outside_cost = vect_get_stmt_cost (scalar_to_vec);
833 /* Strided access? */
834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
838 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
843 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
844 group_size = vect_cost_strided_group_size (stmt_info);
847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
849 /* Not a strided access. */
853 first_dr = STMT_VINFO_DATA_REF (stmt_info);
856 /* We assume that the cost of a single store-lanes instruction is
857 equivalent to the cost of GROUP_SIZE separate stores. If a strided
858 access is instead being provided by a permute-and-store operation,
859 include the cost of the permutes. */
860 if (!store_lanes_p && group_size > 1)
862 /* Uses a high and low interleave operation for each needed permute. */
863 inside_cost = ncopies * exact_log2(group_size) * group_size
864 * vect_get_stmt_cost (vector_stmt);
866 if (vect_print_dump_info (REPORT_COST))
867 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
872 /* Costs of the stores. */
873 vect_get_store_cost (first_dr, ncopies, &inside_cost);
875 if (vect_print_dump_info (REPORT_COST))
876 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
877 "outside_cost = %d .", inside_cost, outside_cost);
879 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
880 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
881 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
885 /* Calculate cost of DR's memory access. */
887 vect_get_store_cost (struct data_reference *dr, int ncopies,
888 unsigned int *inside_cost)
890 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
892 switch (alignment_support_scheme)
896 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: aligned.");
904 case dr_unaligned_supported:
906 gimple stmt = DR_STMT (dr);
907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
910 /* Here, we assign an additional cost for the unaligned store. */
911 *inside_cost += ncopies
912 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
913 vectype, DR_MISALIGNMENT (dr));
915 if (vect_print_dump_info (REPORT_COST))
916 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
928 /* Function vect_model_load_cost
930 Models cost for loads. In the case of strided accesses, the last access
931 has the overhead of the strided access attributed to it. Since unaligned
932 accesses are supported for loads, we also account for the costs of the
933 access scheme chosen. */
936 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
942 unsigned int inside_cost = 0, outside_cost = 0;
944 /* The SLP costs were already calculated during SLP tree build. */
945 if (PURE_SLP_STMT (stmt_info))
948 /* Strided accesses? */
949 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
950 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
952 group_size = vect_cost_strided_group_size (stmt_info);
953 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
955 /* Not a strided access. */
962 /* We assume that the cost of a single load-lanes instruction is
963 equivalent to the cost of GROUP_SIZE separate loads. If a strided
964 access is instead being provided by a load-and-permute operation,
965 include the cost of the permutes. */
966 if (!load_lanes_p && group_size > 1)
968 /* Uses an even and odd extract operations for each needed permute. */
969 inside_cost = ncopies * exact_log2(group_size) * group_size
970 * vect_get_stmt_cost (vector_stmt);
972 if (vect_print_dump_info (REPORT_COST))
973 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
977 /* The loads themselves. */
978 vect_get_load_cost (first_dr, ncopies,
979 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
981 &inside_cost, &outside_cost);
983 if (vect_print_dump_info (REPORT_COST))
984 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
985 "outside_cost = %d .", inside_cost, outside_cost);
987 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
988 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
989 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
993 /* Calculate cost of DR's memory access. */
995 vect_get_load_cost (struct data_reference *dr, int ncopies,
996 bool add_realign_cost, unsigned int *inside_cost,
997 unsigned int *outside_cost)
999 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 switch (alignment_support_scheme)
1005 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1007 if (vect_print_dump_info (REPORT_COST))
1008 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1012 case dr_unaligned_supported:
1014 gimple stmt = DR_STMT (dr);
1015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1016 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1018 /* Here, we assign an additional cost for the unaligned load. */
1019 *inside_cost += ncopies
1020 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1021 vectype, DR_MISALIGNMENT (dr));
1022 if (vect_print_dump_info (REPORT_COST))
1023 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1028 case dr_explicit_realign:
1030 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1031 + vect_get_stmt_cost (vector_stmt));
1033 /* FIXME: If the misalignment remains fixed across the iterations of
1034 the containing loop, the following cost should be added to the
1036 if (targetm.vectorize.builtin_mask_for_load)
1037 *inside_cost += vect_get_stmt_cost (vector_stmt);
1041 case dr_explicit_realign_optimized:
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1047 /* Unaligned software pipeline has a load of an address, an initial
1048 load, and possibly a mask operation to "prime" the loop. However,
1049 if this is an access in a group of loads, which provide strided
1050 access, then the above cost should only be considered for one
1051 access in the group. Inside the loop, there is a load op
1052 and a realignment op. */
1054 if (add_realign_cost)
1056 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1057 if (targetm.vectorize.builtin_mask_for_load)
1058 *outside_cost += vect_get_stmt_cost (vector_stmt);
1061 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1062 + vect_get_stmt_cost (vector_stmt));
1072 /* Function vect_init_vector.
1074 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1075 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1076 is not NULL. Otherwise, place the initialization at the loop preheader.
1077 Return the DEF of INIT_STMT.
1078 It will be used in the vectorization of STMT. */
1081 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1082 gimple_stmt_iterator *gsi)
1084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1092 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1093 add_referenced_var (new_var);
1094 init_stmt = gimple_build_assign (new_var, vector_var);
1095 new_temp = make_ssa_name (new_var, init_stmt);
1096 gimple_assign_set_lhs (init_stmt, new_temp);
1099 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1108 if (nested_in_vect_loop_p (loop, stmt))
1111 pe = loop_preheader_edge (loop);
1112 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1113 gcc_assert (!new_bb);
1117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1119 gimple_stmt_iterator gsi_bb_start;
1121 gcc_assert (bb_vinfo);
1122 bb = BB_VINFO_BB (bb_vinfo);
1123 gsi_bb_start = gsi_after_labels (bb);
1124 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "created new init_stmt: ");
1131 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1134 vec_oprnd = gimple_assign_lhs (init_stmt);
1139 /* Function vect_get_vec_def_for_operand.
1141 OP is an operand in STMT. This function returns a (vector) def that will be
1142 used in the vectorized stmt for STMT.
1144 In the case that OP is an SSA_NAME which is defined in the loop, then
1145 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1147 In case OP is an invariant or constant, a new stmt that creates a vector def
1148 needs to be introduced. */
1151 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1156 stmt_vec_info def_stmt_info = NULL;
1157 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1158 unsigned int nunits;
1159 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1165 enum vect_def_type dt;
1169 if (vect_print_dump_info (REPORT_DETAILS))
1171 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1172 print_generic_expr (vect_dump, op, TDF_SLIM);
1175 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1177 gcc_assert (is_simple_use);
1178 if (vect_print_dump_info (REPORT_DETAILS))
1182 fprintf (vect_dump, "def = ");
1183 print_generic_expr (vect_dump, def, TDF_SLIM);
1187 fprintf (vect_dump, " def_stmt = ");
1188 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1194 /* Case 1: operand is a constant. */
1195 case vect_constant_def:
1197 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1198 gcc_assert (vector_type);
1199 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1204 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1205 if (vect_print_dump_info (REPORT_DETAILS))
1206 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1208 vec_cst = build_vector_from_val (vector_type,
1209 fold_convert (TREE_TYPE (vector_type),
1211 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1214 /* Case 2: operand is defined outside the loop - loop invariant. */
1215 case vect_external_def:
1217 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1218 gcc_assert (vector_type);
1219 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1224 /* Create 'vec_inv = {inv,inv,..,inv}' */
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "Create vector_inv.");
1228 for (i = nunits - 1; i >= 0; --i)
1230 t = tree_cons (NULL_TREE, def, t);
1233 /* FIXME: use build_constructor directly. */
1234 vec_inv = build_constructor_from_list (vector_type, t);
1235 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1238 /* Case 3: operand is defined inside the loop. */
1239 case vect_internal_def:
1242 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1244 /* Get the def from the vectorized stmt. */
1245 def_stmt_info = vinfo_for_stmt (def_stmt);
1247 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1248 /* Get vectorized pattern statement. */
1250 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1251 && !STMT_VINFO_RELEVANT (def_stmt_info))
1252 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1253 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1254 gcc_assert (vec_stmt);
1255 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1256 vec_oprnd = PHI_RESULT (vec_stmt);
1257 else if (is_gimple_call (vec_stmt))
1258 vec_oprnd = gimple_call_lhs (vec_stmt);
1260 vec_oprnd = gimple_assign_lhs (vec_stmt);
1264 /* Case 4: operand is defined by a loop header phi - reduction */
1265 case vect_reduction_def:
1266 case vect_double_reduction_def:
1267 case vect_nested_cycle:
1271 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1272 loop = (gimple_bb (def_stmt))->loop_father;
1274 /* Get the def before the loop */
1275 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1276 return get_initial_def_for_reduction (stmt, op, scalar_def);
1279 /* Case 5: operand is defined by loop-header phi - induction. */
1280 case vect_induction_def:
1282 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1284 /* Get the def from the vectorized stmt. */
1285 def_stmt_info = vinfo_for_stmt (def_stmt);
1286 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1287 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1288 vec_oprnd = PHI_RESULT (vec_stmt);
1290 vec_oprnd = gimple_get_lhs (vec_stmt);
1300 /* Function vect_get_vec_def_for_stmt_copy
1302 Return a vector-def for an operand. This function is used when the
1303 vectorized stmt to be created (by the caller to this function) is a "copy"
1304 created in case the vectorized result cannot fit in one vector, and several
1305 copies of the vector-stmt are required. In this case the vector-def is
1306 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1307 of the stmt that defines VEC_OPRND.
1308 DT is the type of the vector def VEC_OPRND.
1311 In case the vectorization factor (VF) is bigger than the number
1312 of elements that can fit in a vectype (nunits), we have to generate
1313 more than one vector stmt to vectorize the scalar stmt. This situation
1314 arises when there are multiple data-types operated upon in the loop; the
1315 smallest data-type determines the VF, and as a result, when vectorizing
1316 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1317 vector stmt (each computing a vector of 'nunits' results, and together
1318 computing 'VF' results in each iteration). This function is called when
1319 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1320 which VF=16 and nunits=4, so the number of copies required is 4):
1322 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1324 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1325 VS1.1: vx.1 = memref1 VS1.2
1326 VS1.2: vx.2 = memref2 VS1.3
1327 VS1.3: vx.3 = memref3
1329 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1330 VSnew.1: vz1 = vx.1 + ... VSnew.2
1331 VSnew.2: vz2 = vx.2 + ... VSnew.3
1332 VSnew.3: vz3 = vx.3 + ...
1334 The vectorization of S1 is explained in vectorizable_load.
1335 The vectorization of S2:
1336 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1337 the function 'vect_get_vec_def_for_operand' is called to
1338 get the relevant vector-def for each operand of S2. For operand x it
1339 returns the vector-def 'vx.0'.
1341 To create the remaining copies of the vector-stmt (VSnew.j), this
1342 function is called to get the relevant vector-def for each operand. It is
1343 obtained from the respective VS1.j stmt, which is recorded in the
1344 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1346 For example, to obtain the vector-def 'vx.1' in order to create the
1347 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1348 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1349 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1350 and return its def ('vx.1').
1351 Overall, to create the above sequence this function will be called 3 times:
1352 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1353 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1354 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1357 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1359 gimple vec_stmt_for_operand;
1360 stmt_vec_info def_stmt_info;
1362 /* Do nothing; can reuse same def. */
1363 if (dt == vect_external_def || dt == vect_constant_def )
1366 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1367 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1368 gcc_assert (def_stmt_info);
1369 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1370 gcc_assert (vec_stmt_for_operand);
1371 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1372 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1373 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1375 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1380 /* Get vectorized definitions for the operands to create a copy of an original
1381 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1384 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1385 VEC(tree,heap) **vec_oprnds0,
1386 VEC(tree,heap) **vec_oprnds1)
1388 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1390 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1391 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1393 if (vec_oprnds1 && *vec_oprnds1)
1395 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1396 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1397 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1402 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1406 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1407 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1411 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1416 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1417 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1418 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1422 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1423 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1424 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1430 /* Function vect_finish_stmt_generation.
1432 Insert a new stmt. */
1435 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1436 gimple_stmt_iterator *gsi)
1438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1439 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1440 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1442 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1444 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1446 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1449 if (vect_print_dump_info (REPORT_DETAILS))
1451 fprintf (vect_dump, "add new stmt: ");
1452 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1455 gimple_set_location (vec_stmt, gimple_location (stmt));
1458 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1459 a function declaration if the target has a vectorized version
1460 of the function, or NULL_TREE if the function cannot be vectorized. */
1463 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1465 tree fndecl = gimple_call_fndecl (call);
1467 /* We only handle functions that do not read or clobber memory -- i.e.
1468 const or novops ones. */
1469 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1473 || TREE_CODE (fndecl) != FUNCTION_DECL
1474 || !DECL_BUILT_IN (fndecl))
1477 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1481 /* Function vectorizable_call.
1483 Check if STMT performs a function call that can be vectorized.
1484 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1485 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1486 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1489 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1494 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1496 tree vectype_out, vectype_in;
1499 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1500 tree fndecl, new_temp, def, rhs_type;
1502 enum vect_def_type dt[3]
1503 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1504 gimple new_stmt = NULL;
1506 VEC(tree, heap) *vargs = NULL;
1507 enum { NARROW, NONE, WIDEN } modifier;
1511 /* FORNOW: unsupported in basic block SLP. */
1512 gcc_assert (loop_vinfo);
1514 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1517 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1520 /* FORNOW: SLP not supported. */
1521 if (STMT_SLP_TYPE (stmt_info))
1524 /* Is STMT a vectorizable call? */
1525 if (!is_gimple_call (stmt))
1528 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1531 if (stmt_can_throw_internal (stmt))
1534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1536 /* Process function arguments. */
1537 rhs_type = NULL_TREE;
1538 vectype_in = NULL_TREE;
1539 nargs = gimple_call_num_args (stmt);
1541 /* Bail out if the function has more than three arguments, we do not have
1542 interesting builtin functions to vectorize with more than two arguments
1543 except for fma. No arguments is also not good. */
1544 if (nargs == 0 || nargs > 3)
1547 for (i = 0; i < nargs; i++)
1551 op = gimple_call_arg (stmt, i);
1553 /* We can only handle calls with arguments of the same type. */
1555 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "argument types differ.");
1562 rhs_type = TREE_TYPE (op);
1564 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1565 &def_stmt, &def, &dt[i], &opvectype))
1567 if (vect_print_dump_info (REPORT_DETAILS))
1568 fprintf (vect_dump, "use not simple.");
1573 vectype_in = opvectype;
1575 && opvectype != vectype_in)
1577 if (vect_print_dump_info (REPORT_DETAILS))
1578 fprintf (vect_dump, "argument vector types differ.");
1582 /* If all arguments are external or constant defs use a vector type with
1583 the same size as the output vector type. */
1585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1587 gcc_assert (vectype_in);
1590 if (vect_print_dump_info (REPORT_DETAILS))
1592 fprintf (vect_dump, "no vectype for scalar type ");
1593 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1600 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1602 if (nunits_in == nunits_out / 2)
1604 else if (nunits_out == nunits_in)
1606 else if (nunits_out == nunits_in / 2)
1611 /* For now, we only vectorize functions if a target specific builtin
1612 is available. TODO -- in some cases, it might be profitable to
1613 insert the calls for pieces of the vector, in order to be able
1614 to vectorize other operations in the loop. */
1615 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1616 if (fndecl == NULL_TREE)
1618 if (vect_print_dump_info (REPORT_DETAILS))
1619 fprintf (vect_dump, "function is not vectorizable.");
1624 gcc_assert (!gimple_vuse (stmt));
1626 if (modifier == NARROW)
1627 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1629 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1631 /* Sanity check: make sure that at least one copy of the vectorized stmt
1632 needs to be generated. */
1633 gcc_assert (ncopies >= 1);
1635 if (!vec_stmt) /* transformation not required. */
1637 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1638 if (vect_print_dump_info (REPORT_DETAILS))
1639 fprintf (vect_dump, "=== vectorizable_call ===");
1640 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "transform call.");
1650 scalar_dest = gimple_call_lhs (stmt);
1651 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1653 prev_stmt_info = NULL;
1657 for (j = 0; j < ncopies; ++j)
1659 /* Build argument list for the vectorized call. */
1661 vargs = VEC_alloc (tree, heap, nargs);
1663 VEC_truncate (tree, vargs, 0);
1665 for (i = 0; i < nargs; i++)
1667 op = gimple_call_arg (stmt, i);
1670 = vect_get_vec_def_for_operand (op, stmt, NULL);
1673 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1675 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1678 VEC_quick_push (tree, vargs, vec_oprnd0);
1681 new_stmt = gimple_build_call_vec (fndecl, vargs);
1682 new_temp = make_ssa_name (vec_dest, new_stmt);
1683 gimple_call_set_lhs (new_stmt, new_temp);
1685 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1686 mark_symbols_for_renaming (new_stmt);
1689 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1691 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1693 prev_stmt_info = vinfo_for_stmt (new_stmt);
1699 for (j = 0; j < ncopies; ++j)
1701 /* Build argument list for the vectorized call. */
1703 vargs = VEC_alloc (tree, heap, nargs * 2);
1705 VEC_truncate (tree, vargs, 0);
1707 for (i = 0; i < nargs; i++)
1709 op = gimple_call_arg (stmt, i);
1713 = vect_get_vec_def_for_operand (op, stmt, NULL);
1715 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1719 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1721 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1723 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1726 VEC_quick_push (tree, vargs, vec_oprnd0);
1727 VEC_quick_push (tree, vargs, vec_oprnd1);
1730 new_stmt = gimple_build_call_vec (fndecl, vargs);
1731 new_temp = make_ssa_name (vec_dest, new_stmt);
1732 gimple_call_set_lhs (new_stmt, new_temp);
1734 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1735 mark_symbols_for_renaming (new_stmt);
1738 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1742 prev_stmt_info = vinfo_for_stmt (new_stmt);
1745 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1750 /* No current target implements this case. */
1754 VEC_free (tree, heap, vargs);
1756 /* Update the exception handling table with the vector stmt if necessary. */
1757 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1758 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1760 /* The call in STMT might prevent it from being removed in dce.
1761 We however cannot remove it here, due to the way the ssa name
1762 it defines is mapped to the new definition. So just replace
1763 rhs of the statement with something harmless. */
1765 type = TREE_TYPE (scalar_dest);
1766 if (is_pattern_stmt_p (stmt_info))
1767 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1769 lhs = gimple_call_lhs (stmt);
1770 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1771 set_vinfo_for_stmt (new_stmt, stmt_info);
1772 set_vinfo_for_stmt (stmt, NULL);
1773 STMT_VINFO_STMT (stmt_info) = new_stmt;
1774 gsi_replace (gsi, new_stmt, false);
1775 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1781 /* Function vect_gen_widened_results_half
1783 Create a vector stmt whose code, type, number of arguments, and result
1784 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1785 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1786 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1787 needs to be created (DECL is a function-decl of a target-builtin).
1788 STMT is the original scalar stmt that we are vectorizing. */
1791 vect_gen_widened_results_half (enum tree_code code,
1793 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1794 tree vec_dest, gimple_stmt_iterator *gsi,
1800 /* Generate half of the widened result: */
1801 if (code == CALL_EXPR)
1803 /* Target specific support */
1804 if (op_type == binary_op)
1805 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1807 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1808 new_temp = make_ssa_name (vec_dest, new_stmt);
1809 gimple_call_set_lhs (new_stmt, new_temp);
1813 /* Generic support */
1814 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1815 if (op_type != binary_op)
1817 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1819 new_temp = make_ssa_name (vec_dest, new_stmt);
1820 gimple_assign_set_lhs (new_stmt, new_temp);
1822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1827 /* Check if STMT performs a conversion operation, that can be vectorized.
1828 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1829 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1833 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1834 gimple *vec_stmt, slp_tree slp_node)
1839 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1842 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1843 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1847 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1848 gimple new_stmt = NULL;
1849 stmt_vec_info prev_stmt_info;
1852 tree vectype_out, vectype_in;
1855 enum { NARROW, NONE, WIDEN } modifier;
1857 VEC(tree,heap) *vec_oprnds0 = NULL;
1859 VEC(tree,heap) *dummy = NULL;
1862 /* Is STMT a vectorizable conversion? */
1864 /* FORNOW: unsupported in basic block SLP. */
1865 gcc_assert (loop_vinfo);
1867 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1870 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1873 if (!is_gimple_assign (stmt))
1876 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1879 code = gimple_assign_rhs_code (stmt);
1880 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1883 /* Check types of lhs and rhs. */
1884 scalar_dest = gimple_assign_lhs (stmt);
1885 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1887 op0 = gimple_assign_rhs1 (stmt);
1888 rhs_type = TREE_TYPE (op0);
1889 /* Check the operands of the operation. */
1890 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1891 &def_stmt, &def, &dt[0], &vectype_in))
1893 if (vect_print_dump_info (REPORT_DETAILS))
1894 fprintf (vect_dump, "use not simple.");
1897 /* If op0 is an external or constant defs use a vector type of
1898 the same size as the output vector type. */
1900 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1902 gcc_assert (vectype_in);
1905 if (vect_print_dump_info (REPORT_DETAILS))
1907 fprintf (vect_dump, "no vectype for scalar type ");
1908 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1915 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1916 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1917 if (nunits_in == nunits_out / 2)
1919 else if (nunits_out == nunits_in)
1921 else if (nunits_out == nunits_in / 2)
1926 if (modifier == NARROW)
1927 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1929 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1931 /* Multiple types in SLP are handled by creating the appropriate number of
1932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1934 if (slp_node || PURE_SLP_STMT (stmt_info))
1937 /* Sanity check: make sure that at least one copy of the vectorized stmt
1938 needs to be generated. */
1939 gcc_assert (ncopies >= 1);
1941 /* Supportable by target? */
1942 if ((modifier == NONE
1943 && !supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1))
1944 || (modifier == WIDEN
1945 && !supportable_widening_operation (code, stmt,
1946 vectype_out, vectype_in,
1949 &dummy_int, &dummy))
1950 || (modifier == NARROW
1951 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1952 &code1, &dummy_int, &dummy)))
1954 if (vect_print_dump_info (REPORT_DETAILS))
1955 fprintf (vect_dump, "conversion not supported by target.");
1959 if (modifier != NONE)
1961 /* FORNOW: SLP not supported. */
1962 if (STMT_SLP_TYPE (stmt_info))
1966 if (!vec_stmt) /* transformation not required. */
1968 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1973 if (vect_print_dump_info (REPORT_DETAILS))
1974 fprintf (vect_dump, "transform conversion.");
1977 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1979 if (modifier == NONE && !slp_node)
1980 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1982 prev_stmt_info = NULL;
1986 for (j = 0; j < ncopies; j++)
1989 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1991 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1993 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1995 /* Arguments are ready, create the new vector stmt. */
1996 if (code1 == CALL_EXPR)
1998 new_stmt = gimple_build_call (decl1, 1, vop0);
1999 new_temp = make_ssa_name (vec_dest, new_stmt);
2000 gimple_call_set_lhs (new_stmt, new_temp);
2004 gcc_assert (TREE_CODE_LENGTH (code) == unary_op);
2005 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0,
2007 new_temp = make_ssa_name (vec_dest, new_stmt);
2008 gimple_assign_set_lhs (new_stmt, new_temp);
2011 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2013 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2017 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2019 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2020 prev_stmt_info = vinfo_for_stmt (new_stmt);
2025 /* In case the vectorization factor (VF) is bigger than the number
2026 of elements that we can fit in a vectype (nunits), we have to
2027 generate more than one vector stmt - i.e - we need to "unroll"
2028 the vector stmt by a factor VF/nunits. */
2029 for (j = 0; j < ncopies; j++)
2032 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2034 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2036 /* Generate first half of the widened result: */
2038 = vect_gen_widened_results_half (code1, decl1,
2039 vec_oprnd0, vec_oprnd1,
2040 unary_op, vec_dest, gsi, stmt);
2042 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2044 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2045 prev_stmt_info = vinfo_for_stmt (new_stmt);
2047 /* Generate second half of the widened result: */
2049 = vect_gen_widened_results_half (code2, decl2,
2050 vec_oprnd0, vec_oprnd1,
2051 unary_op, vec_dest, gsi, stmt);
2052 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2053 prev_stmt_info = vinfo_for_stmt (new_stmt);
2058 /* In case the vectorization factor (VF) is bigger than the number
2059 of elements that we can fit in a vectype (nunits), we have to
2060 generate more than one vector stmt - i.e - we need to "unroll"
2061 the vector stmt by a factor VF/nunits. */
2062 for (j = 0; j < ncopies; j++)
2067 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2068 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2072 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2073 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2076 /* Arguments are ready. Create the new vector stmt. */
2077 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2079 new_temp = make_ssa_name (vec_dest, new_stmt);
2080 gimple_assign_set_lhs (new_stmt, new_temp);
2081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2084 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2088 prev_stmt_info = vinfo_for_stmt (new_stmt);
2091 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2095 VEC_free (tree, heap, vec_oprnds0);
2101 /* Function vectorizable_assignment.
2103 Check if STMT performs an assignment (copy) that can be vectorized.
2104 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2105 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2106 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2109 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2110 gimple *vec_stmt, slp_tree slp_node)
2115 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2116 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2117 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2122 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2125 VEC(tree,heap) *vec_oprnds = NULL;
2127 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2128 gimple new_stmt = NULL;
2129 stmt_vec_info prev_stmt_info = NULL;
2130 enum tree_code code;
2133 /* Multiple types in SLP are handled by creating the appropriate number of
2134 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2136 if (slp_node || PURE_SLP_STMT (stmt_info))
2139 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2141 gcc_assert (ncopies >= 1);
2143 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2146 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2149 /* Is vectorizable assignment? */
2150 if (!is_gimple_assign (stmt))
2153 scalar_dest = gimple_assign_lhs (stmt);
2154 if (TREE_CODE (scalar_dest) != SSA_NAME)
2157 code = gimple_assign_rhs_code (stmt);
2158 if (gimple_assign_single_p (stmt)
2159 || code == PAREN_EXPR
2160 || CONVERT_EXPR_CODE_P (code))
2161 op = gimple_assign_rhs1 (stmt);
2165 if (code == VIEW_CONVERT_EXPR)
2166 op = TREE_OPERAND (op, 0);
2168 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2169 &def_stmt, &def, &dt[0], &vectype_in))
2171 if (vect_print_dump_info (REPORT_DETAILS))
2172 fprintf (vect_dump, "use not simple.");
2176 /* We can handle NOP_EXPR conversions that do not change the number
2177 of elements or the vector size. */
2178 if ((CONVERT_EXPR_CODE_P (code)
2179 || code == VIEW_CONVERT_EXPR)
2181 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2182 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2183 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2186 /* We do not handle bit-precision changes. */
2187 if ((CONVERT_EXPR_CODE_P (code)
2188 || code == VIEW_CONVERT_EXPR)
2189 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2190 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2191 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2192 || ((TYPE_PRECISION (TREE_TYPE (op))
2193 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2194 /* But a conversion that does not change the bit-pattern is ok. */
2195 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2196 > TYPE_PRECISION (TREE_TYPE (op)))
2197 && TYPE_UNSIGNED (TREE_TYPE (op))))
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump, "type conversion to/from bit-precision "
2205 if (!vec_stmt) /* transformation not required. */
2207 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "=== vectorizable_assignment ===");
2210 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2215 if (vect_print_dump_info (REPORT_DETAILS))
2216 fprintf (vect_dump, "transform assignment.");
2219 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2222 for (j = 0; j < ncopies; j++)
2226 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2228 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2230 /* Arguments are ready. create the new vector stmt. */
2231 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2233 if (CONVERT_EXPR_CODE_P (code)
2234 || code == VIEW_CONVERT_EXPR)
2235 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2236 new_stmt = gimple_build_assign (vec_dest, vop);
2237 new_temp = make_ssa_name (vec_dest, new_stmt);
2238 gimple_assign_set_lhs (new_stmt, new_temp);
2239 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2241 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2248 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2250 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2252 prev_stmt_info = vinfo_for_stmt (new_stmt);
2255 VEC_free (tree, heap, vec_oprnds);
2260 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2261 either as shift by a scalar or by a vector. */
2264 vect_supportable_shift (enum tree_code code, tree scalar_type)
2267 enum machine_mode vec_mode;
2272 vectype = get_vectype_for_scalar_type (scalar_type);
2276 optab = optab_for_tree_code (code, vectype, optab_scalar);
2278 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2280 optab = optab_for_tree_code (code, vectype, optab_vector);
2282 || (optab_handler (optab, TYPE_MODE (vectype))
2283 == CODE_FOR_nothing))
2287 vec_mode = TYPE_MODE (vectype);
2288 icode = (int) optab_handler (optab, vec_mode);
2289 if (icode == CODE_FOR_nothing)
2296 /* Function vectorizable_shift.
2298 Check if STMT performs a shift operation that can be vectorized.
2299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2304 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2305 gimple *vec_stmt, slp_tree slp_node)
2309 tree op0, op1 = NULL;
2310 tree vec_oprnd1 = NULL_TREE;
2311 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2314 enum tree_code code;
2315 enum machine_mode vec_mode;
2319 enum machine_mode optab_op2_mode;
2322 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2323 gimple new_stmt = NULL;
2324 stmt_vec_info prev_stmt_info;
2331 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2334 bool scalar_shift_arg = true;
2335 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2338 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2341 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2344 /* Is STMT a vectorizable binary/unary operation? */
2345 if (!is_gimple_assign (stmt))
2348 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2351 code = gimple_assign_rhs_code (stmt);
2353 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2354 || code == RROTATE_EXPR))
2357 scalar_dest = gimple_assign_lhs (stmt);
2358 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2359 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2360 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2362 if (vect_print_dump_info (REPORT_DETAILS))
2363 fprintf (vect_dump, "bit-precision shifts not supported.");
2367 op0 = gimple_assign_rhs1 (stmt);
2368 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2369 &def_stmt, &def, &dt[0], &vectype))
2371 if (vect_print_dump_info (REPORT_DETAILS))
2372 fprintf (vect_dump, "use not simple.");
2375 /* If op0 is an external or constant def use a vector type with
2376 the same size as the output vector type. */
2378 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2380 gcc_assert (vectype);
2383 if (vect_print_dump_info (REPORT_DETAILS))
2385 fprintf (vect_dump, "no vectype for scalar type ");
2386 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2392 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2393 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2394 if (nunits_out != nunits_in)
2397 op1 = gimple_assign_rhs2 (stmt);
2398 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2399 &dt[1], &op1_vectype))
2401 if (vect_print_dump_info (REPORT_DETAILS))
2402 fprintf (vect_dump, "use not simple.");
2407 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2411 /* Multiple types in SLP are handled by creating the appropriate number of
2412 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2414 if (slp_node || PURE_SLP_STMT (stmt_info))
2417 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2419 gcc_assert (ncopies >= 1);
2421 /* Determine whether the shift amount is a vector, or scalar. If the
2422 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2424 if (dt[1] == vect_internal_def && !slp_node)
2425 scalar_shift_arg = false;
2426 else if (dt[1] == vect_constant_def
2427 || dt[1] == vect_external_def
2428 || dt[1] == vect_internal_def)
2430 /* In SLP, need to check whether the shift count is the same,
2431 in loops if it is a constant or invariant, it is always
2435 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2438 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2439 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2440 scalar_shift_arg = false;
2445 if (vect_print_dump_info (REPORT_DETAILS))
2446 fprintf (vect_dump, "operand mode requires invariant argument.");
2450 /* Vector shifted by vector. */
2451 if (!scalar_shift_arg)
2453 optab = optab_for_tree_code (code, vectype, optab_vector);
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "vector/vector shift/rotate found.");
2456 if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
2458 if (vect_print_dump_info (REPORT_DETAILS))
2459 fprintf (vect_dump, "unusable type for last operand in"
2460 " vector/vector shift/rotate.");
2464 /* See if the machine has a vector shifted by scalar insn and if not
2465 then see if it has a vector shifted by vector insn. */
2468 optab = optab_for_tree_code (code, vectype, optab_scalar);
2470 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2472 if (vect_print_dump_info (REPORT_DETAILS))
2473 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2477 optab = optab_for_tree_code (code, vectype, optab_vector);
2479 && (optab_handler (optab, TYPE_MODE (vectype))
2480 != CODE_FOR_nothing))
2482 scalar_shift_arg = false;
2484 if (vect_print_dump_info (REPORT_DETAILS))
2485 fprintf (vect_dump, "vector/vector shift/rotate found.");
2487 /* Unlike the other binary operators, shifts/rotates have
2488 the rhs being int, instead of the same type as the lhs,
2489 so make sure the scalar is the right type if we are
2490 dealing with vectors of short/char. */
2491 if (dt[1] == vect_constant_def)
2492 op1 = fold_convert (TREE_TYPE (vectype), op1);
2497 /* Supportable by target? */
2500 if (vect_print_dump_info (REPORT_DETAILS))
2501 fprintf (vect_dump, "no optab.");
2504 vec_mode = TYPE_MODE (vectype);
2505 icode = (int) optab_handler (optab, vec_mode);
2506 if (icode == CODE_FOR_nothing)
2508 if (vect_print_dump_info (REPORT_DETAILS))
2509 fprintf (vect_dump, "op not supported by target.");
2510 /* Check only during analysis. */
2511 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2512 || (vf < vect_min_worthwhile_factor (code)
2515 if (vect_print_dump_info (REPORT_DETAILS))
2516 fprintf (vect_dump, "proceeding using word mode.");
2519 /* Worthwhile without SIMD support? Check only during analysis. */
2520 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2521 && vf < vect_min_worthwhile_factor (code)
2524 if (vect_print_dump_info (REPORT_DETAILS))
2525 fprintf (vect_dump, "not worthwhile without SIMD support.");
2529 if (!vec_stmt) /* transformation not required. */
2531 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2532 if (vect_print_dump_info (REPORT_DETAILS))
2533 fprintf (vect_dump, "=== vectorizable_shift ===");
2534 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2540 if (vect_print_dump_info (REPORT_DETAILS))
2541 fprintf (vect_dump, "transform binary/unary operation.");
2544 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2546 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2547 created in the previous stages of the recursion, so no allocation is
2548 needed, except for the case of shift with scalar shift argument. In that
2549 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2550 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2551 In case of loop-based vectorization we allocate VECs of size 1. We
2552 allocate VEC_OPRNDS1 only in case of binary operation. */
2555 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2556 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2558 else if (scalar_shift_arg)
2559 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2561 prev_stmt_info = NULL;
2562 for (j = 0; j < ncopies; j++)
2567 if (scalar_shift_arg)
2569 /* Vector shl and shr insn patterns can be defined with scalar
2570 operand 2 (shift operand). In this case, use constant or loop
2571 invariant op1 directly, without extending it to vector mode
2573 optab_op2_mode = insn_data[icode].operand[2].mode;
2574 if (!VECTOR_MODE_P (optab_op2_mode))
2576 if (vect_print_dump_info (REPORT_DETAILS))
2577 fprintf (vect_dump, "operand 1 using scalar mode.");
2579 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2582 /* Store vec_oprnd1 for every vector stmt to be created
2583 for SLP_NODE. We check during the analysis that all
2584 the shift arguments are the same.
2585 TODO: Allow different constants for different vector
2586 stmts generated for an SLP instance. */
2587 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2588 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2593 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2594 (a special case for certain kind of vector shifts); otherwise,
2595 operand 1 should be of a vector type (the usual case). */
2597 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2600 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2604 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2606 /* Arguments are ready. Create the new vector stmt. */
2607 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2609 vop1 = VEC_index (tree, vec_oprnds1, i);
2610 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2611 new_temp = make_ssa_name (vec_dest, new_stmt);
2612 gimple_assign_set_lhs (new_stmt, new_temp);
2613 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2615 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2622 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2624 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2625 prev_stmt_info = vinfo_for_stmt (new_stmt);
2628 VEC_free (tree, heap, vec_oprnds0);
2629 VEC_free (tree, heap, vec_oprnds1);
2635 /* Function vectorizable_operation.
2637 Check if STMT performs a binary, unary or ternary operation that can
2639 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2640 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2641 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2644 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2645 gimple *vec_stmt, slp_tree slp_node)
2649 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2652 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2653 enum tree_code code;
2654 enum machine_mode vec_mode;
2661 enum vect_def_type dt[3]
2662 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2663 gimple new_stmt = NULL;
2664 stmt_vec_info prev_stmt_info;
2670 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2671 tree vop0, vop1, vop2;
2672 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2675 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2678 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2681 /* Is STMT a vectorizable binary/unary operation? */
2682 if (!is_gimple_assign (stmt))
2685 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2688 code = gimple_assign_rhs_code (stmt);
2690 /* For pointer addition, we should use the normal plus for
2691 the vector addition. */
2692 if (code == POINTER_PLUS_EXPR)
2695 /* Support only unary or binary operations. */
2696 op_type = TREE_CODE_LENGTH (code);
2697 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2699 if (vect_print_dump_info (REPORT_DETAILS))
2700 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2705 scalar_dest = gimple_assign_lhs (stmt);
2706 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2708 /* Most operations cannot handle bit-precision types without extra
2710 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2711 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2712 /* Exception are bitwise binary operations. */
2713 && code != BIT_IOR_EXPR
2714 && code != BIT_XOR_EXPR
2715 && code != BIT_AND_EXPR)
2717 if (vect_print_dump_info (REPORT_DETAILS))
2718 fprintf (vect_dump, "bit-precision arithmetic not supported.");
2722 op0 = gimple_assign_rhs1 (stmt);
2723 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2724 &def_stmt, &def, &dt[0], &vectype))
2726 if (vect_print_dump_info (REPORT_DETAILS))
2727 fprintf (vect_dump, "use not simple.");
2730 /* If op0 is an external or constant def use a vector type with
2731 the same size as the output vector type. */
2733 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2735 gcc_assert (vectype);
2738 if (vect_print_dump_info (REPORT_DETAILS))
2740 fprintf (vect_dump, "no vectype for scalar type ");
2741 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2747 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2748 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2749 if (nunits_out != nunits_in)
2752 if (op_type == binary_op || op_type == ternary_op)
2754 op1 = gimple_assign_rhs2 (stmt);
2755 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2758 if (vect_print_dump_info (REPORT_DETAILS))
2759 fprintf (vect_dump, "use not simple.");
2763 if (op_type == ternary_op)
2765 op2 = gimple_assign_rhs3 (stmt);
2766 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2769 if (vect_print_dump_info (REPORT_DETAILS))
2770 fprintf (vect_dump, "use not simple.");
2776 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2780 /* Multiple types in SLP are handled by creating the appropriate number of
2781 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2783 if (slp_node || PURE_SLP_STMT (stmt_info))
2786 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2788 gcc_assert (ncopies >= 1);
2790 /* Shifts are handled in vectorizable_shift (). */
2791 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2792 || code == RROTATE_EXPR)
2795 optab = optab_for_tree_code (code, vectype, optab_default);
2797 /* Supportable by target? */
2800 if (vect_print_dump_info (REPORT_DETAILS))
2801 fprintf (vect_dump, "no optab.");
2804 vec_mode = TYPE_MODE (vectype);
2805 icode = (int) optab_handler (optab, vec_mode);
2806 if (icode == CODE_FOR_nothing)
2808 if (vect_print_dump_info (REPORT_DETAILS))
2809 fprintf (vect_dump, "op not supported by target.");
2810 /* Check only during analysis. */
2811 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2812 || (vf < vect_min_worthwhile_factor (code)
2815 if (vect_print_dump_info (REPORT_DETAILS))
2816 fprintf (vect_dump, "proceeding using word mode.");
2819 /* Worthwhile without SIMD support? Check only during analysis. */
2820 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2821 && vf < vect_min_worthwhile_factor (code)
2824 if (vect_print_dump_info (REPORT_DETAILS))
2825 fprintf (vect_dump, "not worthwhile without SIMD support.");
2829 if (!vec_stmt) /* transformation not required. */
2831 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2832 if (vect_print_dump_info (REPORT_DETAILS))
2833 fprintf (vect_dump, "=== vectorizable_operation ===");
2834 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2840 if (vect_print_dump_info (REPORT_DETAILS))
2841 fprintf (vect_dump, "transform binary/unary operation.");
2844 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2846 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2847 created in the previous stages of the recursion, so no allocation is
2848 needed, except for the case of shift with scalar shift argument. In that
2849 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2850 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2851 In case of loop-based vectorization we allocate VECs of size 1. We
2852 allocate VEC_OPRNDS1 only in case of binary operation. */
2855 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2856 if (op_type == binary_op || op_type == ternary_op)
2857 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2858 if (op_type == ternary_op)
2859 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2862 /* In case the vectorization factor (VF) is bigger than the number
2863 of elements that we can fit in a vectype (nunits), we have to generate
2864 more than one vector stmt - i.e - we need to "unroll" the
2865 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2866 from one copy of the vector stmt to the next, in the field
2867 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2868 stages to find the correct vector defs to be used when vectorizing
2869 stmts that use the defs of the current stmt. The example below
2870 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2871 we need to create 4 vectorized stmts):
2873 before vectorization:
2874 RELATED_STMT VEC_STMT
2878 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2880 RELATED_STMT VEC_STMT
2881 VS1_0: vx0 = memref0 VS1_1 -
2882 VS1_1: vx1 = memref1 VS1_2 -
2883 VS1_2: vx2 = memref2 VS1_3 -
2884 VS1_3: vx3 = memref3 - -
2885 S1: x = load - VS1_0
2888 step2: vectorize stmt S2 (done here):
2889 To vectorize stmt S2 we first need to find the relevant vector
2890 def for the first operand 'x'. This is, as usual, obtained from
2891 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2892 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2893 relevant vector def 'vx0'. Having found 'vx0' we can generate
2894 the vector stmt VS2_0, and as usual, record it in the
2895 STMT_VINFO_VEC_STMT of stmt S2.
2896 When creating the second copy (VS2_1), we obtain the relevant vector
2897 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2898 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2899 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2900 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2901 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2902 chain of stmts and pointers:
2903 RELATED_STMT VEC_STMT
2904 VS1_0: vx0 = memref0 VS1_1 -
2905 VS1_1: vx1 = memref1 VS1_2 -
2906 VS1_2: vx2 = memref2 VS1_3 -
2907 VS1_3: vx3 = memref3 - -
2908 S1: x = load - VS1_0
2909 VS2_0: vz0 = vx0 + v1 VS2_1 -
2910 VS2_1: vz1 = vx1 + v1 VS2_2 -
2911 VS2_2: vz2 = vx2 + v1 VS2_3 -
2912 VS2_3: vz3 = vx3 + v1 - -
2913 S2: z = x + 1 - VS2_0 */
2915 prev_stmt_info = NULL;
2916 for (j = 0; j < ncopies; j++)
2921 if (op_type == binary_op || op_type == ternary_op)
2922 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2925 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2927 if (op_type == ternary_op)
2929 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2930 VEC_quick_push (tree, vec_oprnds2,
2931 vect_get_vec_def_for_operand (op2, stmt, NULL));
2936 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2937 if (op_type == ternary_op)
2939 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2940 VEC_quick_push (tree, vec_oprnds2,
2941 vect_get_vec_def_for_stmt_copy (dt[2],
2946 /* Arguments are ready. Create the new vector stmt. */
2947 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2949 vop1 = ((op_type == binary_op || op_type == ternary_op)
2950 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2951 vop2 = ((op_type == ternary_op)
2952 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2953 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2955 new_temp = make_ssa_name (vec_dest, new_stmt);
2956 gimple_assign_set_lhs (new_stmt, new_temp);
2957 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2959 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2966 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2968 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2969 prev_stmt_info = vinfo_for_stmt (new_stmt);
2972 VEC_free (tree, heap, vec_oprnds0);
2974 VEC_free (tree, heap, vec_oprnds1);
2976 VEC_free (tree, heap, vec_oprnds2);
2982 /* Get vectorized definitions for loop-based vectorization. For the first
2983 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2984 scalar operand), and for the rest we get a copy with
2985 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2986 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2987 The vectors are collected into VEC_OPRNDS. */
2990 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2991 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2995 /* Get first vector operand. */
2996 /* All the vector operands except the very first one (that is scalar oprnd)
2998 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2999 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3001 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3003 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3005 /* Get second vector operand. */
3006 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3007 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3011 /* For conversion in multiple steps, continue to get operands
3014 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3018 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3019 For multi-step conversions store the resulting vectors and call the function
3023 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
3024 int multi_step_cvt, gimple stmt,
3025 VEC (tree, heap) *vec_dsts,
3026 gimple_stmt_iterator *gsi,
3027 slp_tree slp_node, enum tree_code code,
3028 stmt_vec_info *prev_stmt_info)
3031 tree vop0, vop1, new_tmp, vec_dest;
3033 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3035 vec_dest = VEC_pop (tree, vec_dsts);
3037 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
3039 /* Create demotion operation. */
3040 vop0 = VEC_index (tree, *vec_oprnds, i);
3041 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
3042 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3043 new_tmp = make_ssa_name (vec_dest, new_stmt);
3044 gimple_assign_set_lhs (new_stmt, new_tmp);
3045 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3048 /* Store the resulting vector for next recursive call. */
3049 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
3052 /* This is the last step of the conversion sequence. Store the
3053 vectors in SLP_NODE or in vector info of the scalar statement
3054 (or in STMT_VINFO_RELATED_STMT chain). */
3056 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3059 if (!*prev_stmt_info)
3060 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3062 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3064 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3069 /* For multi-step demotion operations we first generate demotion operations
3070 from the source type to the intermediate types, and then combine the
3071 results (stored in VEC_OPRNDS) in demotion operation to the destination
3075 /* At each level of recursion we have have of the operands we had at the
3077 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3078 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3079 stmt, vec_dsts, gsi, slp_node,
3080 code, prev_stmt_info);
3085 /* Function vectorizable_type_demotion
3087 Check if STMT performs a binary or unary operation that involves
3088 type demotion, and if it can be vectorized.
3089 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3090 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3091 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3094 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3095 gimple *vec_stmt, slp_tree slp_node)
3100 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3101 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3102 enum tree_code code, code1 = ERROR_MARK;
3105 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3106 stmt_vec_info prev_stmt_info;
3113 int multi_step_cvt = 0;
3114 VEC (tree, heap) *vec_oprnds0 = NULL;
3115 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3116 tree last_oprnd, intermediate_type;
3117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3119 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3122 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3125 /* Is STMT a vectorizable type-demotion operation? */
3126 if (!is_gimple_assign (stmt))
3129 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3132 code = gimple_assign_rhs_code (stmt);
3133 if (!CONVERT_EXPR_CODE_P (code))
3136 scalar_dest = gimple_assign_lhs (stmt);
3137 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3139 /* Check the operands of the operation. */
3140 op0 = gimple_assign_rhs1 (stmt);
3141 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3142 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3143 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3144 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
3147 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3148 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3149 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3150 || ((TYPE_PRECISION (TREE_TYPE (op0))
3151 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3153 if (vect_print_dump_info (REPORT_DETAILS))
3154 fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
3158 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3159 &def_stmt, &def, &dt[0], &vectype_in))
3161 if (vect_print_dump_info (REPORT_DETAILS))
3162 fprintf (vect_dump, "use not simple.");
3165 /* If op0 is an external def use a vector type with the
3166 same size as the output vector type if possible. */
3168 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3170 gcc_assert (vectype_in);
3173 if (vect_print_dump_info (REPORT_DETAILS))
3175 fprintf (vect_dump, "no vectype for scalar type ");
3176 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3182 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3183 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3184 if (nunits_in >= nunits_out)
3187 /* Multiple types in SLP are handled by creating the appropriate number of
3188 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3190 if (slp_node || PURE_SLP_STMT (stmt_info))
3193 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3194 gcc_assert (ncopies >= 1);
3196 /* Supportable by target? */
3197 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3198 &code1, &multi_step_cvt, &interm_types))
3201 if (!vec_stmt) /* transformation not required. */
3203 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3204 if (vect_print_dump_info (REPORT_DETAILS))
3205 fprintf (vect_dump, "=== vectorizable_demotion ===");
3206 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3211 if (vect_print_dump_info (REPORT_DETAILS))
3212 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3215 /* In case of multi-step demotion, we first generate demotion operations to
3216 the intermediate types, and then from that types to the final one.
3217 We create vector destinations for the intermediate type (TYPES) received
3218 from supportable_narrowing_operation, and store them in the correct order
3219 for future use in vect_create_vectorized_demotion_stmts(). */
3221 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3223 vec_dsts = VEC_alloc (tree, heap, 1);
3225 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3226 VEC_quick_push (tree, vec_dsts, vec_dest);
3230 for (i = VEC_length (tree, interm_types) - 1;
3231 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3233 vec_dest = vect_create_destination_var (scalar_dest,
3235 VEC_quick_push (tree, vec_dsts, vec_dest);
3239 /* In case the vectorization factor (VF) is bigger than the number
3240 of elements that we can fit in a vectype (nunits), we have to generate
3241 more than one vector stmt - i.e - we need to "unroll" the
3242 vector stmt by a factor VF/nunits. */
3244 prev_stmt_info = NULL;
3245 for (j = 0; j < ncopies; j++)
3249 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3252 VEC_free (tree, heap, vec_oprnds0);
3253 vec_oprnds0 = VEC_alloc (tree, heap,
3254 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3255 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3256 vect_pow2 (multi_step_cvt) - 1);
3259 /* Arguments are ready. Create the new vector stmts. */
3260 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3261 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3262 multi_step_cvt, stmt, tmp_vec_dsts,
3263 gsi, slp_node, code1,
3267 VEC_free (tree, heap, vec_oprnds0);
3268 VEC_free (tree, heap, vec_dsts);
3269 VEC_free (tree, heap, tmp_vec_dsts);
3270 VEC_free (tree, heap, interm_types);
3272 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3277 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3278 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3279 the resulting vectors and call the function recursively. */
3282 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3283 VEC (tree, heap) **vec_oprnds1,
3284 int multi_step_cvt, gimple stmt,
3285 VEC (tree, heap) *vec_dsts,
3286 gimple_stmt_iterator *gsi,
3287 slp_tree slp_node, enum tree_code code1,
3288 enum tree_code code2, tree decl1,
3289 tree decl2, int op_type,
3290 stmt_vec_info *prev_stmt_info)
3293 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3294 gimple new_stmt1, new_stmt2;
3295 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3296 VEC (tree, heap) *vec_tmp;
3298 vec_dest = VEC_pop (tree, vec_dsts);
3299 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3301 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3303 if (op_type == binary_op)
3304 vop1 = VEC_index (tree, *vec_oprnds1, i);
3308 /* Generate the two halves of promotion operation. */
3309 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3310 op_type, vec_dest, gsi, stmt);
3311 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3312 op_type, vec_dest, gsi, stmt);
3313 if (is_gimple_call (new_stmt1))
3315 new_tmp1 = gimple_call_lhs (new_stmt1);
3316 new_tmp2 = gimple_call_lhs (new_stmt2);
3320 new_tmp1 = gimple_assign_lhs (new_stmt1);
3321 new_tmp2 = gimple_assign_lhs (new_stmt2);
3326 /* Store the results for the recursive call. */
3327 VEC_quick_push (tree, vec_tmp, new_tmp1);
3328 VEC_quick_push (tree, vec_tmp, new_tmp2);
3332 /* Last step of promotion sequience - store the results. */
3335 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3336 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3340 if (!*prev_stmt_info)
3341 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3343 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3345 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3346 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3347 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3354 /* For multi-step promotion operation we first generate we call the
3355 function recurcively for every stage. We start from the input type,
3356 create promotion operations to the intermediate types, and then
3357 create promotions to the output type. */
3358 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3359 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3360 multi_step_cvt - 1, stmt,
3361 vec_dsts, gsi, slp_node, code1,
3362 code2, decl2, decl2, op_type,
3366 VEC_free (tree, heap, vec_tmp);
3370 /* Function vectorizable_type_promotion
3372 Check if STMT performs a binary or unary operation that involves
3373 type promotion, and if it can be vectorized.
3374 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3375 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3376 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3379 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3380 gimple *vec_stmt, slp_tree slp_node)
3384 tree op0, op1 = NULL;
3385 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3386 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3387 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3388 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3389 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3393 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3394 stmt_vec_info prev_stmt_info;
3401 tree intermediate_type = NULL_TREE;
3402 int multi_step_cvt = 0;
3403 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3404 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3405 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3408 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3411 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3414 /* Is STMT a vectorizable type-promotion operation? */
3415 if (!is_gimple_assign (stmt))
3418 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3421 code = gimple_assign_rhs_code (stmt);
3422 if (!CONVERT_EXPR_CODE_P (code)
3423 && code != WIDEN_MULT_EXPR
3424 && code != WIDEN_LSHIFT_EXPR)
3427 scalar_dest = gimple_assign_lhs (stmt);
3428 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3430 /* Check the operands of the operation. */
3431 op0 = gimple_assign_rhs1 (stmt);
3432 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3433 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3434 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3435 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3436 && CONVERT_EXPR_CODE_P (code))))
3439 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3440 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3441 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3442 || ((TYPE_PRECISION (TREE_TYPE (op0))
3443 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3445 if (vect_print_dump_info (REPORT_DETAILS))
3446 fprintf (vect_dump, "type promotion to/from bit-precision "
3451 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3452 &def_stmt, &def, &dt[0], &vectype_in))
3454 if (vect_print_dump_info (REPORT_DETAILS))
3455 fprintf (vect_dump, "use not simple.");
3459 op_type = TREE_CODE_LENGTH (code);
3460 if (op_type == binary_op)
3464 op1 = gimple_assign_rhs2 (stmt);
3465 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3467 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3469 if (CONSTANT_CLASS_P (op0))
3470 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3471 &def_stmt, &def, &dt[1], &vectype_in);
3473 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3478 if (vect_print_dump_info (REPORT_DETAILS))
3479 fprintf (vect_dump, "use not simple.");
3485 /* If op0 is an external or constant def use a vector type with
3486 the same size as the output vector type. */
3488 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3490 gcc_assert (vectype_in);
3493 if (vect_print_dump_info (REPORT_DETAILS))
3495 fprintf (vect_dump, "no vectype for scalar type ");
3496 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3502 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3503 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3504 if (nunits_in <= nunits_out)
3507 /* Multiple types in SLP are handled by creating the appropriate number of
3508 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3510 if (slp_node || PURE_SLP_STMT (stmt_info))
3513 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3515 gcc_assert (ncopies >= 1);
3517 /* Supportable by target? */
3518 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3519 &decl1, &decl2, &code1, &code2,
3520 &multi_step_cvt, &interm_types))
3523 /* Binary widening operation can only be supported directly by the
3525 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3527 if (!vec_stmt) /* transformation not required. */
3529 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3530 if (vect_print_dump_info (REPORT_DETAILS))
3531 fprintf (vect_dump, "=== vectorizable_promotion ===");
3532 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3538 if (vect_print_dump_info (REPORT_DETAILS))
3539 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3542 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3544 if (CONSTANT_CLASS_P (op0))
3545 op0 = fold_convert (TREE_TYPE (op1), op0);
3546 else if (CONSTANT_CLASS_P (op1))
3547 op1 = fold_convert (TREE_TYPE (op0), op1);
3551 /* In case of multi-step promotion, we first generate promotion operations
3552 to the intermediate types, and then from that types to the final one.
3553 We store vector destination in VEC_DSTS in the correct order for
3554 recursive creation of promotion operations in
3555 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3556 according to TYPES recieved from supportable_widening_operation(). */
3558 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3560 vec_dsts = VEC_alloc (tree, heap, 1);
3562 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3563 VEC_quick_push (tree, vec_dsts, vec_dest);
3567 for (i = VEC_length (tree, interm_types) - 1;
3568 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3570 vec_dest = vect_create_destination_var (scalar_dest,
3572 VEC_quick_push (tree, vec_dsts, vec_dest);
3578 vec_oprnds0 = VEC_alloc (tree, heap,
3579 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3580 if (op_type == binary_op)
3581 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3583 else if (code == WIDEN_LSHIFT_EXPR)
3584 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3586 /* In case the vectorization factor (VF) is bigger than the number
3587 of elements that we can fit in a vectype (nunits), we have to generate
3588 more than one vector stmt - i.e - we need to "unroll" the
3589 vector stmt by a factor VF/nunits. */
3591 prev_stmt_info = NULL;
3592 for (j = 0; j < ncopies; j++)
3599 if (code == WIDEN_LSHIFT_EXPR)
3602 /* Store vec_oprnd1 for every vector stmt to be created
3603 for SLP_NODE. We check during the analysis that all
3604 the shift arguments are the same. */
3605 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3606 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3608 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
3612 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3617 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3618 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3619 if (op_type == binary_op)
3621 if (code == WIDEN_LSHIFT_EXPR)
3624 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3625 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3631 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3632 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3633 if (op_type == binary_op)
3635 if (code == WIDEN_LSHIFT_EXPR)
3638 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3639 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3643 /* Arguments are ready. Create the new vector stmts. */
3644 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3645 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3646 multi_step_cvt, stmt,
3648 gsi, slp_node, code1, code2,
3649 decl1, decl2, op_type,
3653 VEC_free (tree, heap, vec_dsts);
3654 VEC_free (tree, heap, tmp_vec_dsts);
3655 VEC_free (tree, heap, interm_types);
3656 VEC_free (tree, heap, vec_oprnds0);
3657 VEC_free (tree, heap, vec_oprnds1);
3659 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3664 /* Function vectorizable_store.
3666 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3668 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3669 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3670 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3673 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3679 tree vec_oprnd = NULL_TREE;
3680 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3681 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3682 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3684 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3685 struct loop *loop = NULL;
3686 enum machine_mode vec_mode;
3688 enum dr_alignment_support alignment_support_scheme;
3691 enum vect_def_type dt;
3692 stmt_vec_info prev_stmt_info = NULL;
3693 tree dataref_ptr = NULL_TREE;
3694 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3697 gimple next_stmt, first_stmt = NULL;
3698 bool strided_store = false;
3699 bool store_lanes_p = false;
3700 unsigned int group_size, i;
3701 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3703 VEC(tree,heap) *vec_oprnds = NULL;
3704 bool slp = (slp_node != NULL);
3705 unsigned int vec_num;
3706 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3710 loop = LOOP_VINFO_LOOP (loop_vinfo);
3712 /* Multiple types in SLP are handled by creating the appropriate number of
3713 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3715 if (slp || PURE_SLP_STMT (stmt_info))
3718 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3720 gcc_assert (ncopies >= 1);
3722 /* FORNOW. This restriction should be relaxed. */
3723 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3725 if (vect_print_dump_info (REPORT_DETAILS))
3726 fprintf (vect_dump, "multiple types in nested loop.");
3730 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3733 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3736 /* Is vectorizable store? */
3738 if (!is_gimple_assign (stmt))
3741 scalar_dest = gimple_assign_lhs (stmt);
3742 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3743 && is_pattern_stmt_p (stmt_info))
3744 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3745 if (TREE_CODE (scalar_dest) != ARRAY_REF
3746 && TREE_CODE (scalar_dest) != INDIRECT_REF
3747 && TREE_CODE (scalar_dest) != COMPONENT_REF
3748 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3749 && TREE_CODE (scalar_dest) != REALPART_EXPR
3750 && TREE_CODE (scalar_dest) != MEM_REF)
3753 gcc_assert (gimple_assign_single_p (stmt));
3754 op = gimple_assign_rhs1 (stmt);
3755 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3757 if (vect_print_dump_info (REPORT_DETAILS))
3758 fprintf (vect_dump, "use not simple.");
3762 elem_type = TREE_TYPE (vectype);
3763 vec_mode = TYPE_MODE (vectype);
3765 /* FORNOW. In some cases can vectorize even if data-type not supported
3766 (e.g. - array initialization with 0). */
3767 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3770 if (!STMT_VINFO_DATA_REF (stmt_info))
3773 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3775 if (vect_print_dump_info (REPORT_DETAILS))
3776 fprintf (vect_dump, "negative step for store.");
3780 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3782 strided_store = true;
3783 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3784 if (!slp && !PURE_SLP_STMT (stmt_info))
3786 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3787 if (vect_store_lanes_supported (vectype, group_size))
3788 store_lanes_p = true;
3789 else if (!vect_strided_store_supported (vectype, group_size))
3793 if (first_stmt == stmt)
3795 /* STMT is the leader of the group. Check the operands of all the
3796 stmts of the group. */
3797 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3800 gcc_assert (gimple_assign_single_p (next_stmt));
3801 op = gimple_assign_rhs1 (next_stmt);
3802 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3805 if (vect_print_dump_info (REPORT_DETAILS))
3806 fprintf (vect_dump, "use not simple.");
3809 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3814 if (!vec_stmt) /* transformation not required. */
3816 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3817 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3825 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3826 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3828 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3831 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3833 /* We vectorize all the stmts of the interleaving group when we
3834 reach the last stmt in the group. */
3835 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3836 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3845 strided_store = false;
3846 /* VEC_NUM is the number of vect stmts to be created for this
3848 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3849 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3850 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3853 /* VEC_NUM is the number of vect stmts to be created for this
3855 vec_num = group_size;
3861 group_size = vec_num = 1;
3864 if (vect_print_dump_info (REPORT_DETAILS))
3865 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3867 dr_chain = VEC_alloc (tree, heap, group_size);
3868 oprnds = VEC_alloc (tree, heap, group_size);
3870 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3871 gcc_assert (alignment_support_scheme);
3872 /* Targets with store-lane instructions must not require explicit
3874 gcc_assert (!store_lanes_p
3875 || alignment_support_scheme == dr_aligned
3876 || alignment_support_scheme == dr_unaligned_supported);
3879 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3881 aggr_type = vectype;
3883 /* In case the vectorization factor (VF) is bigger than the number
3884 of elements that we can fit in a vectype (nunits), we have to generate
3885 more than one vector stmt - i.e - we need to "unroll" the
3886 vector stmt by a factor VF/nunits. For more details see documentation in
3887 vect_get_vec_def_for_copy_stmt. */
3889 /* In case of interleaving (non-unit strided access):
3896 We create vectorized stores starting from base address (the access of the
3897 first stmt in the chain (S2 in the above example), when the last store stmt
3898 of the chain (S4) is reached:
3901 VS2: &base + vec_size*1 = vx0
3902 VS3: &base + vec_size*2 = vx1
3903 VS4: &base + vec_size*3 = vx3
3905 Then permutation statements are generated:
3907 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3908 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3911 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3912 (the order of the data-refs in the output of vect_permute_store_chain
3913 corresponds to the order of scalar stmts in the interleaving chain - see
3914 the documentation of vect_permute_store_chain()).
3916 In case of both multiple types and interleaving, above vector stores and
3917 permutation stmts are created for every copy. The result vector stmts are
3918 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3919 STMT_VINFO_RELATED_STMT for the next copies.
3922 prev_stmt_info = NULL;
3923 for (j = 0; j < ncopies; j++)
3932 /* Get vectorized arguments for SLP_NODE. */
3933 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3936 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3940 /* For interleaved stores we collect vectorized defs for all the
3941 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3942 used as an input to vect_permute_store_chain(), and OPRNDS as
3943 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3945 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3946 OPRNDS are of size 1. */
3947 next_stmt = first_stmt;
3948 for (i = 0; i < group_size; i++)
3950 /* Since gaps are not supported for interleaved stores,
3951 GROUP_SIZE is the exact number of stmts in the chain.
3952 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3953 there is no interleaving, GROUP_SIZE is 1, and only one
3954 iteration of the loop will be executed. */
3955 gcc_assert (next_stmt
3956 && gimple_assign_single_p (next_stmt));
3957 op = gimple_assign_rhs1 (next_stmt);
3959 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3961 VEC_quick_push(tree, dr_chain, vec_oprnd);
3962 VEC_quick_push(tree, oprnds, vec_oprnd);
3963 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3967 /* We should have catched mismatched types earlier. */
3968 gcc_assert (useless_type_conversion_p (vectype,
3969 TREE_TYPE (vec_oprnd)));
3970 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3971 NULL_TREE, &dummy, gsi,
3972 &ptr_incr, false, &inv_p);
3973 gcc_assert (bb_vinfo || !inv_p);
3977 /* For interleaved stores we created vectorized defs for all the
3978 defs stored in OPRNDS in the previous iteration (previous copy).
3979 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3980 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3982 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3983 OPRNDS are of size 1. */
3984 for (i = 0; i < group_size; i++)
3986 op = VEC_index (tree, oprnds, i);
3987 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3989 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3990 VEC_replace(tree, dr_chain, i, vec_oprnd);
3991 VEC_replace(tree, oprnds, i, vec_oprnd);
3993 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3994 TYPE_SIZE_UNIT (aggr_type));
4001 /* Combine all the vectors into an array. */
4002 vec_array = create_vector_array (vectype, vec_num);
4003 for (i = 0; i < vec_num; i++)
4005 vec_oprnd = VEC_index (tree, dr_chain, i);
4006 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4010 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4011 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4012 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4013 gimple_call_set_lhs (new_stmt, data_ref);
4014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4015 mark_symbols_for_renaming (new_stmt);
4022 result_chain = VEC_alloc (tree, heap, group_size);
4024 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4028 next_stmt = first_stmt;
4029 for (i = 0; i < vec_num; i++)
4031 struct ptr_info_def *pi;
4034 /* Bump the vector pointer. */
4035 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4039 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4040 else if (strided_store)
4041 /* For strided stores vectorized defs are interleaved in
4042 vect_permute_store_chain(). */
4043 vec_oprnd = VEC_index (tree, result_chain, i);
4045 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4046 build_int_cst (reference_alias_ptr_type
4047 (DR_REF (first_dr)), 0));
4048 pi = get_ptr_info (dataref_ptr);
4049 pi->align = TYPE_ALIGN_UNIT (vectype);
4050 if (aligned_access_p (first_dr))
4052 else if (DR_MISALIGNMENT (first_dr) == -1)
4054 TREE_TYPE (data_ref)
4055 = build_aligned_type (TREE_TYPE (data_ref),
4056 TYPE_ALIGN (elem_type));
4057 pi->align = TYPE_ALIGN_UNIT (elem_type);
4062 TREE_TYPE (data_ref)
4063 = build_aligned_type (TREE_TYPE (data_ref),
4064 TYPE_ALIGN (elem_type));
4065 pi->misalign = DR_MISALIGNMENT (first_dr);
4068 /* Arguments are ready. Create the new vector stmt. */
4069 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4071 mark_symbols_for_renaming (new_stmt);
4076 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4084 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4087 prev_stmt_info = vinfo_for_stmt (new_stmt);
4091 VEC_free (tree, heap, dr_chain);
4092 VEC_free (tree, heap, oprnds);
4094 VEC_free (tree, heap, result_chain);
4096 VEC_free (tree, heap, vec_oprnds);
4101 /* Given a vector type VECTYPE returns a builtin DECL to be used
4102 for vector permutation and returns the mask that implements
4103 reversal of the vector elements. If that is impossible to do,
4107 perm_mask_for_reverse (tree vectype)
4109 tree mask_elt_type, mask_type, mask_vec;
4113 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4114 sel = XALLOCAVEC (unsigned char, nunits);
4116 for (i = 0; i < nunits; ++i)
4117 sel[i] = nunits - 1 - i;
4119 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4123 = lang_hooks.types.type_for_size
4124 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4125 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4128 for (i = 0; i < nunits; i++)
4129 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec);
4130 mask_vec = build_vector (mask_type, mask_vec);
4135 /* Given a vector variable X, that was generated for the scalar LHS of
4136 STMT, generate instructions to reverse the vector elements of X,
4137 insert them a *GSI and return the permuted vector variable. */
4140 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4142 tree vectype = TREE_TYPE (x);
4143 tree mask_vec, perm_dest, data_ref;
4146 mask_vec = perm_mask_for_reverse (vectype);
4148 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4150 /* Generate the permute statement. */
4151 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4153 data_ref = make_ssa_name (perm_dest, perm_stmt);
4154 gimple_set_lhs (perm_stmt, data_ref);
4155 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4160 /* vectorizable_load.
4162 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4164 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4165 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4166 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4169 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4170 slp_tree slp_node, slp_instance slp_node_instance)
4173 tree vec_dest = NULL;
4174 tree data_ref = NULL;
4175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4176 stmt_vec_info prev_stmt_info;
4177 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4178 struct loop *loop = NULL;
4179 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4180 bool nested_in_vect_loop = false;
4181 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4182 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4185 enum machine_mode mode;
4186 gimple new_stmt = NULL;
4188 enum dr_alignment_support alignment_support_scheme;
4189 tree dataref_ptr = NULL_TREE;
4191 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4193 int i, j, group_size;
4194 tree msq = NULL_TREE, lsq;
4195 tree offset = NULL_TREE;
4196 tree realignment_token = NULL_TREE;
4198 VEC(tree,heap) *dr_chain = NULL;
4199 bool strided_load = false;
4200 bool load_lanes_p = false;
4204 bool compute_in_loop = false;
4205 struct loop *at_loop;
4207 bool slp = (slp_node != NULL);
4208 bool slp_perm = false;
4209 enum tree_code code;
4210 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4216 loop = LOOP_VINFO_LOOP (loop_vinfo);
4217 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4218 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4223 /* Multiple types in SLP are handled by creating the appropriate number of
4224 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4226 if (slp || PURE_SLP_STMT (stmt_info))
4229 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4231 gcc_assert (ncopies >= 1);
4233 /* FORNOW. This restriction should be relaxed. */
4234 if (nested_in_vect_loop && ncopies > 1)
4236 if (vect_print_dump_info (REPORT_DETAILS))
4237 fprintf (vect_dump, "multiple types in nested loop.");
4241 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4244 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4247 /* Is vectorizable load? */
4248 if (!is_gimple_assign (stmt))
4251 scalar_dest = gimple_assign_lhs (stmt);
4252 if (TREE_CODE (scalar_dest) != SSA_NAME)
4255 code = gimple_assign_rhs_code (stmt);
4256 if (code != ARRAY_REF
4257 && code != INDIRECT_REF
4258 && code != COMPONENT_REF
4259 && code != IMAGPART_EXPR
4260 && code != REALPART_EXPR
4262 && TREE_CODE_CLASS (code) != tcc_declaration)
4265 if (!STMT_VINFO_DATA_REF (stmt_info))
4268 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4269 if (negative && ncopies > 1)
4271 if (vect_print_dump_info (REPORT_DETAILS))
4272 fprintf (vect_dump, "multiple types with negative step.");
4276 elem_type = TREE_TYPE (vectype);
4277 mode = TYPE_MODE (vectype);
4279 /* FORNOW. In some cases can vectorize even if data-type not supported
4280 (e.g. - data copies). */
4281 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4283 if (vect_print_dump_info (REPORT_DETAILS))
4284 fprintf (vect_dump, "Aligned load, but unsupported type.");
4288 /* Check if the load is a part of an interleaving chain. */
4289 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4291 strided_load = true;
4293 gcc_assert (! nested_in_vect_loop);
4295 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4296 if (!slp && !PURE_SLP_STMT (stmt_info))
4298 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4299 if (vect_load_lanes_supported (vectype, group_size))
4300 load_lanes_p = true;
4301 else if (!vect_strided_load_supported (vectype, group_size))
4308 gcc_assert (!strided_load);
4309 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4310 if (alignment_support_scheme != dr_aligned
4311 && alignment_support_scheme != dr_unaligned_supported)
4313 if (vect_print_dump_info (REPORT_DETAILS))
4314 fprintf (vect_dump, "negative step but alignment required.");
4317 if (!perm_mask_for_reverse (vectype))
4319 if (vect_print_dump_info (REPORT_DETAILS))
4320 fprintf (vect_dump, "negative step and reversing not supported.");
4325 if (!vec_stmt) /* transformation not required. */
4327 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4328 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4332 if (vect_print_dump_info (REPORT_DETAILS))
4333 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4339 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4341 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4342 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4343 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4345 /* Check if the chain of loads is already vectorized. */
4346 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4348 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4351 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4352 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4354 /* VEC_NUM is the number of vect stmts to be created for this group. */
4357 strided_load = false;
4358 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4359 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4363 vec_num = group_size;
4369 group_size = vec_num = 1;
4372 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4373 gcc_assert (alignment_support_scheme);
4374 /* Targets with load-lane instructions must not require explicit
4376 gcc_assert (!load_lanes_p
4377 || alignment_support_scheme == dr_aligned
4378 || alignment_support_scheme == dr_unaligned_supported);
4380 /* In case the vectorization factor (VF) is bigger than the number
4381 of elements that we can fit in a vectype (nunits), we have to generate
4382 more than one vector stmt - i.e - we need to "unroll" the
4383 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4384 from one copy of the vector stmt to the next, in the field
4385 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4386 stages to find the correct vector defs to be used when vectorizing
4387 stmts that use the defs of the current stmt. The example below
4388 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4389 need to create 4 vectorized stmts):
4391 before vectorization:
4392 RELATED_STMT VEC_STMT
4396 step 1: vectorize stmt S1:
4397 We first create the vector stmt VS1_0, and, as usual, record a
4398 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4399 Next, we create the vector stmt VS1_1, and record a pointer to
4400 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4401 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4403 RELATED_STMT VEC_STMT
4404 VS1_0: vx0 = memref0 VS1_1 -
4405 VS1_1: vx1 = memref1 VS1_2 -
4406 VS1_2: vx2 = memref2 VS1_3 -
4407 VS1_3: vx3 = memref3 - -
4408 S1: x = load - VS1_0
4411 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4412 information we recorded in RELATED_STMT field is used to vectorize
4415 /* In case of interleaving (non-unit strided access):
4422 Vectorized loads are created in the order of memory accesses
4423 starting from the access of the first stmt of the chain:
4426 VS2: vx1 = &base + vec_size*1
4427 VS3: vx3 = &base + vec_size*2
4428 VS4: vx4 = &base + vec_size*3
4430 Then permutation statements are generated:
4432 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4433 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4436 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4437 (the order of the data-refs in the output of vect_permute_load_chain
4438 corresponds to the order of scalar stmts in the interleaving chain - see
4439 the documentation of vect_permute_load_chain()).
4440 The generation of permutation stmts and recording them in
4441 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4443 In case of both multiple types and interleaving, the vector loads and
4444 permutation stmts above are created for every copy. The result vector
4445 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4446 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4448 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4449 on a target that supports unaligned accesses (dr_unaligned_supported)
4450 we generate the following code:
4454 p = p + indx * vectype_size;
4459 Otherwise, the data reference is potentially unaligned on a target that
4460 does not support unaligned accesses (dr_explicit_realign_optimized) -
4461 then generate the following code, in which the data in each iteration is
4462 obtained by two vector loads, one from the previous iteration, and one
4463 from the current iteration:
4465 msq_init = *(floor(p1))
4466 p2 = initial_addr + VS - 1;
4467 realignment_token = call target_builtin;
4470 p2 = p2 + indx * vectype_size
4472 vec_dest = realign_load (msq, lsq, realignment_token)
4477 /* If the misalignment remains the same throughout the execution of the
4478 loop, we can create the init_addr and permutation mask at the loop
4479 preheader. Otherwise, it needs to be created inside the loop.
4480 This can only occur when vectorizing memory accesses in the inner-loop
4481 nested within an outer-loop that is being vectorized. */
4483 if (loop && nested_in_vect_loop_p (loop, stmt)
4484 && (TREE_INT_CST_LOW (DR_STEP (dr))
4485 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4487 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4488 compute_in_loop = true;
4491 if ((alignment_support_scheme == dr_explicit_realign_optimized
4492 || alignment_support_scheme == dr_explicit_realign)
4493 && !compute_in_loop)
4495 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4496 alignment_support_scheme, NULL_TREE,
4498 if (alignment_support_scheme == dr_explicit_realign_optimized)
4500 phi = SSA_NAME_DEF_STMT (msq);
4501 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4508 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4511 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4513 aggr_type = vectype;
4515 prev_stmt_info = NULL;
4516 for (j = 0; j < ncopies; j++)
4518 /* 1. Create the vector or array pointer update chain. */
4520 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4521 offset, &dummy, gsi,
4522 &ptr_incr, false, &inv_p);
4524 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4525 TYPE_SIZE_UNIT (aggr_type));
4527 if (strided_load || slp_perm)
4528 dr_chain = VEC_alloc (tree, heap, vec_num);
4534 vec_array = create_vector_array (vectype, vec_num);
4537 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4538 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4539 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4540 gimple_call_set_lhs (new_stmt, vec_array);
4541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4542 mark_symbols_for_renaming (new_stmt);
4544 /* Extract each vector into an SSA_NAME. */
4545 for (i = 0; i < vec_num; i++)
4547 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4549 VEC_quick_push (tree, dr_chain, new_temp);
4552 /* Record the mapping between SSA_NAMEs and statements. */
4553 vect_record_strided_load_vectors (stmt, dr_chain);
4557 for (i = 0; i < vec_num; i++)
4560 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4563 /* 2. Create the vector-load in the loop. */
4564 switch (alignment_support_scheme)
4567 case dr_unaligned_supported:
4569 struct ptr_info_def *pi;
4571 = build2 (MEM_REF, vectype, dataref_ptr,
4572 build_int_cst (reference_alias_ptr_type
4573 (DR_REF (first_dr)), 0));
4574 pi = get_ptr_info (dataref_ptr);
4575 pi->align = TYPE_ALIGN_UNIT (vectype);
4576 if (alignment_support_scheme == dr_aligned)
4578 gcc_assert (aligned_access_p (first_dr));
4581 else if (DR_MISALIGNMENT (first_dr) == -1)
4583 TREE_TYPE (data_ref)
4584 = build_aligned_type (TREE_TYPE (data_ref),
4585 TYPE_ALIGN (elem_type));
4586 pi->align = TYPE_ALIGN_UNIT (elem_type);
4591 TREE_TYPE (data_ref)
4592 = build_aligned_type (TREE_TYPE (data_ref),
4593 TYPE_ALIGN (elem_type));
4594 pi->misalign = DR_MISALIGNMENT (first_dr);
4598 case dr_explicit_realign:
4603 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4605 if (compute_in_loop)
4606 msq = vect_setup_realignment (first_stmt, gsi,
4608 dr_explicit_realign,
4611 new_stmt = gimple_build_assign_with_ops
4612 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4614 (TREE_TYPE (dataref_ptr),
4615 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4616 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4617 gimple_assign_set_lhs (new_stmt, ptr);
4618 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4620 = build2 (MEM_REF, vectype, ptr,
4621 build_int_cst (reference_alias_ptr_type
4622 (DR_REF (first_dr)), 0));
4623 vec_dest = vect_create_destination_var (scalar_dest,
4625 new_stmt = gimple_build_assign (vec_dest, data_ref);
4626 new_temp = make_ssa_name (vec_dest, new_stmt);
4627 gimple_assign_set_lhs (new_stmt, new_temp);
4628 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4629 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4630 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4633 bump = size_binop (MULT_EXPR, vs_minus_1,
4634 TYPE_SIZE_UNIT (elem_type));
4635 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4636 new_stmt = gimple_build_assign_with_ops
4637 (BIT_AND_EXPR, NULL_TREE, ptr,
4640 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4641 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4642 gimple_assign_set_lhs (new_stmt, ptr);
4643 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4645 = build2 (MEM_REF, vectype, ptr,
4646 build_int_cst (reference_alias_ptr_type
4647 (DR_REF (first_dr)), 0));
4650 case dr_explicit_realign_optimized:
4651 new_stmt = gimple_build_assign_with_ops
4652 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4654 (TREE_TYPE (dataref_ptr),
4655 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4656 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4658 gimple_assign_set_lhs (new_stmt, new_temp);
4659 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4661 = build2 (MEM_REF, vectype, new_temp,
4662 build_int_cst (reference_alias_ptr_type
4663 (DR_REF (first_dr)), 0));
4668 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4669 new_stmt = gimple_build_assign (vec_dest, data_ref);
4670 new_temp = make_ssa_name (vec_dest, new_stmt);
4671 gimple_assign_set_lhs (new_stmt, new_temp);
4672 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4673 mark_symbols_for_renaming (new_stmt);
4675 /* 3. Handle explicit realignment if necessary/supported.
4677 vec_dest = realign_load (msq, lsq, realignment_token) */
4678 if (alignment_support_scheme == dr_explicit_realign_optimized
4679 || alignment_support_scheme == dr_explicit_realign)
4681 lsq = gimple_assign_lhs (new_stmt);
4682 if (!realignment_token)
4683 realignment_token = dataref_ptr;
4684 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4686 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4689 new_temp = make_ssa_name (vec_dest, new_stmt);
4690 gimple_assign_set_lhs (new_stmt, new_temp);
4691 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4693 if (alignment_support_scheme == dr_explicit_realign_optimized)
4696 if (i == vec_num - 1 && j == ncopies - 1)
4697 add_phi_arg (phi, lsq,
4698 loop_latch_edge (containing_loop),
4704 /* 4. Handle invariant-load. */
4705 if (inv_p && !bb_vinfo)
4708 gimple_stmt_iterator gsi2 = *gsi;
4709 gcc_assert (!strided_load);
4711 vec_inv = build_vector_from_val (vectype, scalar_dest);
4712 new_temp = vect_init_vector (stmt, vec_inv,
4714 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4719 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4720 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4723 /* Collect vector loads and later create their permutation in
4724 vect_transform_strided_load (). */
4725 if (strided_load || slp_perm)
4726 VEC_quick_push (tree, dr_chain, new_temp);
4728 /* Store vector loads in the corresponding SLP_NODE. */
4729 if (slp && !slp_perm)
4730 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4735 if (slp && !slp_perm)
4740 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4741 slp_node_instance, false))
4743 VEC_free (tree, heap, dr_chain);
4752 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4753 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4758 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4760 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4761 prev_stmt_info = vinfo_for_stmt (new_stmt);
4765 VEC_free (tree, heap, dr_chain);
4771 /* Function vect_is_simple_cond.
4774 LOOP - the loop that is being vectorized.
4775 COND - Condition that is checked for simple use.
4778 *COMP_VECTYPE - the vector type for the comparison.
4780 Returns whether a COND can be vectorized. Checks whether
4781 condition operands are supportable using vec_is_simple_use. */
4784 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4788 enum vect_def_type dt;
4789 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4791 if (!COMPARISON_CLASS_P (cond))
4794 lhs = TREE_OPERAND (cond, 0);
4795 rhs = TREE_OPERAND (cond, 1);
4797 if (TREE_CODE (lhs) == SSA_NAME)
4799 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4800 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4804 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4805 && TREE_CODE (lhs) != FIXED_CST)
4808 if (TREE_CODE (rhs) == SSA_NAME)
4810 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4811 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4815 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4816 && TREE_CODE (rhs) != FIXED_CST)
4819 *comp_vectype = vectype1 ? vectype1 : vectype2;
4823 /* vectorizable_condition.
4825 Check if STMT is conditional modify expression that can be vectorized.
4826 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4827 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4830 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4831 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4832 else caluse if it is 2).
4834 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4837 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4838 gimple *vec_stmt, tree reduc_def, int reduc_index)
4840 tree scalar_dest = NULL_TREE;
4841 tree vec_dest = NULL_TREE;
4842 tree cond_expr, then_clause, else_clause;
4843 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4844 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4846 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4847 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4848 tree vec_compare, vec_cond_expr;
4850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4852 enum vect_def_type dt, dts[4];
4853 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4854 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4855 enum tree_code code;
4856 stmt_vec_info prev_stmt_info = NULL;
4859 /* FORNOW: unsupported in basic block SLP. */
4860 gcc_assert (loop_vinfo);
4862 /* FORNOW: SLP not supported. */
4863 if (STMT_SLP_TYPE (stmt_info))
4866 gcc_assert (ncopies >= 1);
4867 if (reduc_index && ncopies > 1)
4868 return false; /* FORNOW */
4870 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4873 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4874 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4878 /* FORNOW: not yet supported. */
4879 if (STMT_VINFO_LIVE_P (stmt_info))
4881 if (vect_print_dump_info (REPORT_DETAILS))
4882 fprintf (vect_dump, "value used after loop.");
4886 /* Is vectorizable conditional operation? */
4887 if (!is_gimple_assign (stmt))
4890 code = gimple_assign_rhs_code (stmt);
4892 if (code != COND_EXPR)
4895 cond_expr = gimple_assign_rhs1 (stmt);
4896 then_clause = gimple_assign_rhs2 (stmt);
4897 else_clause = gimple_assign_rhs3 (stmt);
4899 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4903 if (TREE_CODE (then_clause) == SSA_NAME)
4905 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4906 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4907 &then_def_stmt, &def, &dt))
4910 else if (TREE_CODE (then_clause) != INTEGER_CST
4911 && TREE_CODE (then_clause) != REAL_CST
4912 && TREE_CODE (then_clause) != FIXED_CST)
4915 if (TREE_CODE (else_clause) == SSA_NAME)
4917 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4918 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4919 &else_def_stmt, &def, &dt))
4922 else if (TREE_CODE (else_clause) != INTEGER_CST
4923 && TREE_CODE (else_clause) != REAL_CST
4924 && TREE_CODE (else_clause) != FIXED_CST)
4929 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4930 return expand_vec_cond_expr_p (vectype, comp_vectype);
4936 scalar_dest = gimple_assign_lhs (stmt);
4937 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4939 /* Handle cond expr. */
4940 for (j = 0; j < ncopies; j++)
4947 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4949 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4950 NULL, >emp, &def, &dts[0]);
4952 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4954 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4955 NULL, >emp, &def, &dts[1]);
4956 if (reduc_index == 1)
4957 vec_then_clause = reduc_def;
4960 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4962 vect_is_simple_use (then_clause, loop_vinfo,
4963 NULL, >emp, &def, &dts[2]);
4965 if (reduc_index == 2)
4966 vec_else_clause = reduc_def;
4969 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4971 vect_is_simple_use (else_clause, loop_vinfo,
4972 NULL, >emp, &def, &dts[3]);
4977 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4978 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4979 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4981 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4985 /* Arguments are ready. Create the new vector stmt. */
4986 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4987 vec_cond_lhs, vec_cond_rhs);
4988 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4989 vec_compare, vec_then_clause, vec_else_clause);
4991 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4992 new_temp = make_ssa_name (vec_dest, new_stmt);
4993 gimple_assign_set_lhs (new_stmt, new_temp);
4994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5000 prev_stmt_info = vinfo_for_stmt (new_stmt);
5007 /* Make sure the statement is vectorizable. */
5010 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5012 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5013 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5014 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5016 tree scalar_type, vectype;
5017 gimple pattern_stmt, pattern_def_stmt;
5019 if (vect_print_dump_info (REPORT_DETAILS))
5021 fprintf (vect_dump, "==> examining statement: ");
5022 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5025 if (gimple_has_volatile_ops (stmt))
5027 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5028 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5033 /* Skip stmts that do not need to be vectorized. In loops this is expected
5035 - the COND_EXPR which is the loop exit condition
5036 - any LABEL_EXPRs in the loop
5037 - computations that are used only for array indexing or loop control.
5038 In basic blocks we only analyze statements that are a part of some SLP
5039 instance, therefore, all the statements are relevant.
5041 Pattern statement need to be analyzed instead of the original statement
5042 if the original statement is not relevant. Otherwise, we analyze both
5045 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5046 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5047 && !STMT_VINFO_LIVE_P (stmt_info))
5049 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5051 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5052 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5054 /* Analyze PATTERN_STMT instead of the original stmt. */
5055 stmt = pattern_stmt;
5056 stmt_info = vinfo_for_stmt (pattern_stmt);
5057 if (vect_print_dump_info (REPORT_DETAILS))
5059 fprintf (vect_dump, "==> examining pattern statement: ");
5060 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5065 if (vect_print_dump_info (REPORT_DETAILS))
5066 fprintf (vect_dump, "irrelevant.");
5071 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5073 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5074 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5076 /* Analyze PATTERN_STMT too. */
5077 if (vect_print_dump_info (REPORT_DETAILS))
5079 fprintf (vect_dump, "==> examining pattern statement: ");
5080 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5083 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5087 if (is_pattern_stmt_p (stmt_info)
5088 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5089 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5090 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5092 /* Analyze def stmt of STMT if it's a pattern stmt. */
5093 if (vect_print_dump_info (REPORT_DETAILS))
5095 fprintf (vect_dump, "==> examining pattern def statement: ");
5096 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5099 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5104 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5106 case vect_internal_def:
5109 case vect_reduction_def:
5110 case vect_nested_cycle:
5111 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5112 || relevance == vect_used_in_outer_by_reduction
5113 || relevance == vect_unused_in_scope));
5116 case vect_induction_def:
5117 case vect_constant_def:
5118 case vect_external_def:
5119 case vect_unknown_def_type:
5126 gcc_assert (PURE_SLP_STMT (stmt_info));
5128 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5129 if (vect_print_dump_info (REPORT_DETAILS))
5131 fprintf (vect_dump, "get vectype for scalar type: ");
5132 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5135 vectype = get_vectype_for_scalar_type (scalar_type);
5138 if (vect_print_dump_info (REPORT_DETAILS))
5140 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5141 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5146 if (vect_print_dump_info (REPORT_DETAILS))
5148 fprintf (vect_dump, "vectype: ");
5149 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5152 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5155 if (STMT_VINFO_RELEVANT_P (stmt_info))
5157 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5158 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5159 *need_to_vectorize = true;
5164 && (STMT_VINFO_RELEVANT_P (stmt_info)
5165 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5166 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5167 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5168 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5169 || vectorizable_shift (stmt, NULL, NULL, NULL)
5170 || vectorizable_operation (stmt, NULL, NULL, NULL)
5171 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5172 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5173 || vectorizable_call (stmt, NULL, NULL)
5174 || vectorizable_store (stmt, NULL, NULL, NULL)
5175 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5176 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5180 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5181 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5182 || vectorizable_shift (stmt, NULL, NULL, node)
5183 || vectorizable_operation (stmt, NULL, NULL, node)
5184 || vectorizable_assignment (stmt, NULL, NULL, node)
5185 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5186 || vectorizable_store (stmt, NULL, NULL, node));
5191 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5193 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5194 fprintf (vect_dump, "supported: ");
5195 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5204 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5205 need extra handling, except for vectorizable reductions. */
5206 if (STMT_VINFO_LIVE_P (stmt_info)
5207 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5208 ok = vectorizable_live_operation (stmt, NULL, NULL);
5212 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5214 fprintf (vect_dump, "not vectorized: live stmt not ");
5215 fprintf (vect_dump, "supported: ");
5216 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5226 /* Function vect_transform_stmt.
5228 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5231 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5232 bool *strided_store, slp_tree slp_node,
5233 slp_instance slp_node_instance)
5235 bool is_store = false;
5236 gimple vec_stmt = NULL;
5237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5240 switch (STMT_VINFO_TYPE (stmt_info))
5242 case type_demotion_vec_info_type:
5243 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5247 case type_promotion_vec_info_type:
5248 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5252 case type_conversion_vec_info_type:
5253 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5257 case induc_vec_info_type:
5258 gcc_assert (!slp_node);
5259 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5263 case shift_vec_info_type:
5264 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5268 case op_vec_info_type:
5269 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5273 case assignment_vec_info_type:
5274 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5278 case load_vec_info_type:
5279 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5284 case store_vec_info_type:
5285 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5287 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5289 /* In case of interleaving, the whole chain is vectorized when the
5290 last store in the chain is reached. Store stmts before the last
5291 one are skipped, and there vec_stmt_info shouldn't be freed
5293 *strided_store = true;
5294 if (STMT_VINFO_VEC_STMT (stmt_info))
5301 case condition_vec_info_type:
5302 gcc_assert (!slp_node);
5303 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5307 case call_vec_info_type:
5308 gcc_assert (!slp_node);
5309 done = vectorizable_call (stmt, gsi, &vec_stmt);
5310 stmt = gsi_stmt (*gsi);
5313 case reduc_vec_info_type:
5314 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5319 if (!STMT_VINFO_LIVE_P (stmt_info))
5321 if (vect_print_dump_info (REPORT_DETAILS))
5322 fprintf (vect_dump, "stmt not supported.");
5327 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5328 is being vectorized, but outside the immediately enclosing loop. */
5330 && STMT_VINFO_LOOP_VINFO (stmt_info)
5331 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5332 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5333 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5334 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5335 || STMT_VINFO_RELEVANT (stmt_info) ==
5336 vect_used_in_outer_by_reduction))
5338 struct loop *innerloop = LOOP_VINFO_LOOP (
5339 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5340 imm_use_iterator imm_iter;
5341 use_operand_p use_p;
5345 if (vect_print_dump_info (REPORT_DETAILS))
5346 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5348 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5349 (to be used when vectorizing outer-loop stmts that use the DEF of
5351 if (gimple_code (stmt) == GIMPLE_PHI)
5352 scalar_dest = PHI_RESULT (stmt);
5354 scalar_dest = gimple_assign_lhs (stmt);
5356 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5358 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5360 exit_phi = USE_STMT (use_p);
5361 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5366 /* Handle stmts whose DEF is used outside the loop-nest that is
5367 being vectorized. */
5368 if (STMT_VINFO_LIVE_P (stmt_info)
5369 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5371 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5376 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5382 /* Remove a group of stores (for SLP or interleaving), free their
5386 vect_remove_stores (gimple first_stmt)
5388 gimple next = first_stmt;
5390 gimple_stmt_iterator next_si;
5394 /* Free the attached stmt_vec_info and remove the stmt. */
5395 next_si = gsi_for_stmt (next);
5396 gsi_remove (&next_si, true);
5397 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5398 free_stmt_vec_info (next);
5404 /* Function new_stmt_vec_info.
5406 Create and initialize a new stmt_vec_info struct for STMT. */
5409 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5410 bb_vec_info bb_vinfo)
5413 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5415 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5416 STMT_VINFO_STMT (res) = stmt;
5417 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5418 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5419 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5420 STMT_VINFO_LIVE_P (res) = false;
5421 STMT_VINFO_VECTYPE (res) = NULL;
5422 STMT_VINFO_VEC_STMT (res) = NULL;
5423 STMT_VINFO_VECTORIZABLE (res) = true;
5424 STMT_VINFO_IN_PATTERN_P (res) = false;
5425 STMT_VINFO_RELATED_STMT (res) = NULL;
5426 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5427 STMT_VINFO_DATA_REF (res) = NULL;
5429 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5430 STMT_VINFO_DR_OFFSET (res) = NULL;
5431 STMT_VINFO_DR_INIT (res) = NULL;
5432 STMT_VINFO_DR_STEP (res) = NULL;
5433 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5435 if (gimple_code (stmt) == GIMPLE_PHI
5436 && is_loop_header_bb_p (gimple_bb (stmt)))
5437 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5439 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5441 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5442 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5443 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5444 STMT_SLP_TYPE (res) = loop_vect;
5445 GROUP_FIRST_ELEMENT (res) = NULL;
5446 GROUP_NEXT_ELEMENT (res) = NULL;
5447 GROUP_SIZE (res) = 0;
5448 GROUP_STORE_COUNT (res) = 0;
5449 GROUP_GAP (res) = 0;
5450 GROUP_SAME_DR_STMT (res) = NULL;
5451 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5457 /* Create a hash table for stmt_vec_info. */
5460 init_stmt_vec_info_vec (void)
5462 gcc_assert (!stmt_vec_info_vec);
5463 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5467 /* Free hash table for stmt_vec_info. */
5470 free_stmt_vec_info_vec (void)
5472 gcc_assert (stmt_vec_info_vec);
5473 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5477 /* Free stmt vectorization related info. */
5480 free_stmt_vec_info (gimple stmt)
5482 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5487 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5488 set_vinfo_for_stmt (stmt, NULL);
5493 /* Function get_vectype_for_scalar_type_and_size.
5495 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5499 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5501 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5502 enum machine_mode simd_mode;
5503 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5510 /* We can't build a vector type of elements with alignment bigger than
5512 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5515 /* For vector types of elements whose mode precision doesn't
5516 match their types precision we use a element type of mode
5517 precision. The vectorization routines will have to make sure
5518 they support the proper result truncation/extension. */
5519 if (INTEGRAL_TYPE_P (scalar_type)
5520 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5521 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5522 TYPE_UNSIGNED (scalar_type));
5524 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5525 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5528 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5529 When the component mode passes the above test simply use a type
5530 corresponding to that mode. The theory is that any use that
5531 would cause problems with this will disable vectorization anyway. */
5532 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5533 && !INTEGRAL_TYPE_P (scalar_type)
5534 && !POINTER_TYPE_P (scalar_type))
5535 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5537 /* If no size was supplied use the mode the target prefers. Otherwise
5538 lookup a vector mode of the specified size. */
5540 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5542 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5543 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5547 vectype = build_vector_type (scalar_type, nunits);
5548 if (vect_print_dump_info (REPORT_DETAILS))
5550 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5551 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5557 if (vect_print_dump_info (REPORT_DETAILS))
5559 fprintf (vect_dump, "vectype: ");
5560 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5563 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5564 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5566 if (vect_print_dump_info (REPORT_DETAILS))
5567 fprintf (vect_dump, "mode not supported by target.");
5574 unsigned int current_vector_size;
5576 /* Function get_vectype_for_scalar_type.
5578 Returns the vector type corresponding to SCALAR_TYPE as supported
5582 get_vectype_for_scalar_type (tree scalar_type)
5585 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5586 current_vector_size);
5588 && current_vector_size == 0)
5589 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5593 /* Function get_same_sized_vectype
5595 Returns a vector type corresponding to SCALAR_TYPE of size
5596 VECTOR_TYPE if supported by the target. */
5599 get_same_sized_vectype (tree scalar_type, tree vector_type)
5601 return get_vectype_for_scalar_type_and_size
5602 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5605 /* Function vect_is_simple_use.
5608 LOOP_VINFO - the vect info of the loop that is being vectorized.
5609 BB_VINFO - the vect info of the basic block that is being vectorized.
5610 OPERAND - operand of a stmt in the loop or bb.
5611 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5613 Returns whether a stmt with OPERAND can be vectorized.
5614 For loops, supportable operands are constants, loop invariants, and operands
5615 that are defined by the current iteration of the loop. Unsupportable
5616 operands are those that are defined by a previous iteration of the loop (as
5617 is the case in reduction/induction computations).
5618 For basic blocks, supportable operands are constants and bb invariants.
5619 For now, operands defined outside the basic block are not supported. */
5622 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5623 bb_vec_info bb_vinfo, gimple *def_stmt,
5624 tree *def, enum vect_def_type *dt)
5627 stmt_vec_info stmt_vinfo;
5628 struct loop *loop = NULL;
5631 loop = LOOP_VINFO_LOOP (loop_vinfo);
5636 if (vect_print_dump_info (REPORT_DETAILS))
5638 fprintf (vect_dump, "vect_is_simple_use: operand ");
5639 print_generic_expr (vect_dump, operand, TDF_SLIM);
5642 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5644 *dt = vect_constant_def;
5648 if (is_gimple_min_invariant (operand))
5651 *dt = vect_external_def;
5655 if (TREE_CODE (operand) == PAREN_EXPR)
5657 if (vect_print_dump_info (REPORT_DETAILS))
5658 fprintf (vect_dump, "non-associatable copy.");
5659 operand = TREE_OPERAND (operand, 0);
5662 if (TREE_CODE (operand) != SSA_NAME)
5664 if (vect_print_dump_info (REPORT_DETAILS))
5665 fprintf (vect_dump, "not ssa-name.");
5669 *def_stmt = SSA_NAME_DEF_STMT (operand);
5670 if (*def_stmt == NULL)
5672 if (vect_print_dump_info (REPORT_DETAILS))
5673 fprintf (vect_dump, "no def_stmt.");
5677 if (vect_print_dump_info (REPORT_DETAILS))
5679 fprintf (vect_dump, "def_stmt: ");
5680 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5683 /* Empty stmt is expected only in case of a function argument.
5684 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5685 if (gimple_nop_p (*def_stmt))
5688 *dt = vect_external_def;
5692 bb = gimple_bb (*def_stmt);
5694 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5695 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5696 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5697 *dt = vect_external_def;
5700 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5701 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5704 if (*dt == vect_unknown_def_type)
5706 if (vect_print_dump_info (REPORT_DETAILS))
5707 fprintf (vect_dump, "Unsupported pattern.");
5711 if (vect_print_dump_info (REPORT_DETAILS))
5712 fprintf (vect_dump, "type of def: %d.",*dt);
5714 switch (gimple_code (*def_stmt))
5717 *def = gimple_phi_result (*def_stmt);
5721 *def = gimple_assign_lhs (*def_stmt);
5725 *def = gimple_call_lhs (*def_stmt);
5730 if (vect_print_dump_info (REPORT_DETAILS))
5731 fprintf (vect_dump, "unsupported defining stmt: ");
5738 /* Function vect_is_simple_use_1.
5740 Same as vect_is_simple_use_1 but also determines the vector operand
5741 type of OPERAND and stores it to *VECTYPE. If the definition of
5742 OPERAND is vect_uninitialized_def, vect_constant_def or
5743 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5744 is responsible to compute the best suited vector type for the
5748 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5749 bb_vec_info bb_vinfo, gimple *def_stmt,
5750 tree *def, enum vect_def_type *dt, tree *vectype)
5752 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5755 /* Now get a vector type if the def is internal, otherwise supply
5756 NULL_TREE and leave it up to the caller to figure out a proper
5757 type for the use stmt. */
5758 if (*dt == vect_internal_def
5759 || *dt == vect_induction_def
5760 || *dt == vect_reduction_def
5761 || *dt == vect_double_reduction_def
5762 || *dt == vect_nested_cycle)
5764 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5766 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5767 && !STMT_VINFO_RELEVANT (stmt_info)
5768 && !STMT_VINFO_LIVE_P (stmt_info))
5769 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5771 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5772 gcc_assert (*vectype != NULL_TREE);
5774 else if (*dt == vect_uninitialized_def
5775 || *dt == vect_constant_def
5776 || *dt == vect_external_def)
5777 *vectype = NULL_TREE;
5785 /* Function supportable_widening_operation
5787 Check whether an operation represented by the code CODE is a
5788 widening operation that is supported by the target platform in
5789 vector form (i.e., when operating on arguments of type VECTYPE_IN
5790 producing a result of type VECTYPE_OUT).
5792 Widening operations we currently support are NOP (CONVERT), FLOAT
5793 and WIDEN_MULT. This function checks if these operations are supported
5794 by the target platform either directly (via vector tree-codes), or via
5798 - CODE1 and CODE2 are codes of vector operations to be used when
5799 vectorizing the operation, if available.
5800 - DECL1 and DECL2 are decls of target builtin functions to be used
5801 when vectorizing the operation, if available. In this case,
5802 CODE1 and CODE2 are CALL_EXPR.
5803 - MULTI_STEP_CVT determines the number of required intermediate steps in
5804 case of multi-step conversion (like char->short->int - in that case
5805 MULTI_STEP_CVT will be 1).
5806 - INTERM_TYPES contains the intermediate type required to perform the
5807 widening operation (short in the above example). */
5810 supportable_widening_operation (enum tree_code code, gimple stmt,
5811 tree vectype_out, tree vectype_in,
5812 tree *decl1, tree *decl2,
5813 enum tree_code *code1, enum tree_code *code2,
5814 int *multi_step_cvt,
5815 VEC (tree, heap) **interm_types)
5817 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5818 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5819 struct loop *vect_loop = NULL;
5821 enum machine_mode vec_mode;
5822 enum insn_code icode1, icode2;
5823 optab optab1, optab2;
5824 tree vectype = vectype_in;
5825 tree wide_vectype = vectype_out;
5826 enum tree_code c1, c2;
5829 vect_loop = LOOP_VINFO_LOOP (loop_info);
5831 /* The result of a vectorized widening operation usually requires two vectors
5832 (because the widened results do not fit int one vector). The generated
5833 vector results would normally be expected to be generated in the same
5834 order as in the original scalar computation, i.e. if 8 results are
5835 generated in each vector iteration, they are to be organized as follows:
5836 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5838 However, in the special case that the result of the widening operation is
5839 used in a reduction computation only, the order doesn't matter (because
5840 when vectorizing a reduction we change the order of the computation).
5841 Some targets can take advantage of this and generate more efficient code.
5842 For example, targets like Altivec, that support widen_mult using a sequence
5843 of {mult_even,mult_odd} generate the following vectors:
5844 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5846 When vectorizing outer-loops, we execute the inner-loop sequentially
5847 (each vectorized inner-loop iteration contributes to VF outer-loop
5848 iterations in parallel). We therefore don't allow to change the order
5849 of the computation in the inner-loop during outer-loop vectorization. */
5852 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5853 && !nested_in_vect_loop_p (vect_loop, stmt))
5859 && code == WIDEN_MULT_EXPR
5860 && targetm.vectorize.builtin_mul_widen_even
5861 && targetm.vectorize.builtin_mul_widen_even (vectype)
5862 && targetm.vectorize.builtin_mul_widen_odd
5863 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5865 if (vect_print_dump_info (REPORT_DETAILS))
5866 fprintf (vect_dump, "Unordered widening operation detected.");
5868 *code1 = *code2 = CALL_EXPR;
5869 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5870 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5876 case WIDEN_MULT_EXPR:
5877 if (BYTES_BIG_ENDIAN)
5879 c1 = VEC_WIDEN_MULT_HI_EXPR;
5880 c2 = VEC_WIDEN_MULT_LO_EXPR;
5884 c2 = VEC_WIDEN_MULT_HI_EXPR;
5885 c1 = VEC_WIDEN_MULT_LO_EXPR;
5889 case WIDEN_LSHIFT_EXPR:
5890 if (BYTES_BIG_ENDIAN)
5892 c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
5893 c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
5897 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5898 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5903 if (BYTES_BIG_ENDIAN)
5905 c1 = VEC_UNPACK_HI_EXPR;
5906 c2 = VEC_UNPACK_LO_EXPR;
5910 c2 = VEC_UNPACK_HI_EXPR;
5911 c1 = VEC_UNPACK_LO_EXPR;
5916 if (BYTES_BIG_ENDIAN)
5918 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5919 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5923 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5924 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5928 case FIX_TRUNC_EXPR:
5929 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5930 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5931 computing the operation. */
5938 if (code == FIX_TRUNC_EXPR)
5940 /* The signedness is determined from output operand. */
5941 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5942 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5946 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5947 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5950 if (!optab1 || !optab2)
5953 vec_mode = TYPE_MODE (vectype);
5954 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5955 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5958 /* Check if it's a multi-step conversion that can be done using intermediate
5960 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5961 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5964 tree prev_type = vectype, intermediate_type;
5965 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5966 optab optab3, optab4;
5968 if (!CONVERT_EXPR_CODE_P (code))
5974 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5975 intermediate steps in promotion sequence. We try
5976 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5978 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5979 for (i = 0; i < 3; i++)
5981 intermediate_mode = insn_data[icode1].operand[0].mode;
5982 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5983 TYPE_UNSIGNED (prev_type));
5984 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5985 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5987 if (!optab3 || !optab4
5988 || ((icode1 = optab_handler (optab1, prev_mode))
5989 == CODE_FOR_nothing)
5990 || insn_data[icode1].operand[0].mode != intermediate_mode
5991 || ((icode2 = optab_handler (optab2, prev_mode))
5992 == CODE_FOR_nothing)
5993 || insn_data[icode2].operand[0].mode != intermediate_mode
5994 || ((icode1 = optab_handler (optab3, intermediate_mode))
5995 == CODE_FOR_nothing)
5996 || ((icode2 = optab_handler (optab4, intermediate_mode))
5997 == CODE_FOR_nothing))
6000 VEC_quick_push (tree, *interm_types, intermediate_type);
6001 (*multi_step_cvt)++;
6003 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6004 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6007 prev_type = intermediate_type;
6008 prev_mode = intermediate_mode;
6020 /* Function supportable_narrowing_operation
6022 Check whether an operation represented by the code CODE is a
6023 narrowing operation that is supported by the target platform in
6024 vector form (i.e., when operating on arguments of type VECTYPE_IN
6025 and producing a result of type VECTYPE_OUT).
6027 Narrowing operations we currently support are NOP (CONVERT) and
6028 FIX_TRUNC. This function checks if these operations are supported by
6029 the target platform directly via vector tree-codes.
6032 - CODE1 is the code of a vector operation to be used when
6033 vectorizing the operation, if available.
6034 - MULTI_STEP_CVT determines the number of required intermediate steps in
6035 case of multi-step conversion (like int->short->char - in that case
6036 MULTI_STEP_CVT will be 1).
6037 - INTERM_TYPES contains the intermediate type required to perform the
6038 narrowing operation (short in the above example). */
6041 supportable_narrowing_operation (enum tree_code code,
6042 tree vectype_out, tree vectype_in,
6043 enum tree_code *code1, int *multi_step_cvt,
6044 VEC (tree, heap) **interm_types)
6046 enum machine_mode vec_mode;
6047 enum insn_code icode1;
6048 optab optab1, interm_optab;
6049 tree vectype = vectype_in;
6050 tree narrow_vectype = vectype_out;
6052 tree intermediate_type, prev_type;
6058 c1 = VEC_PACK_TRUNC_EXPR;
6061 case FIX_TRUNC_EXPR:
6062 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6066 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6067 tree code and optabs used for computing the operation. */
6074 if (code == FIX_TRUNC_EXPR)
6075 /* The signedness is determined from output operand. */
6076 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6078 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6083 vec_mode = TYPE_MODE (vectype);
6084 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6087 /* Check if it's a multi-step conversion that can be done using intermediate
6089 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
6091 enum machine_mode intermediate_mode, prev_mode = vec_mode;
6094 prev_type = vectype;
6095 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6096 intermediate steps in promotion sequence. We try
6097 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6099 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6100 for (i = 0; i < 3; i++)
6102 intermediate_mode = insn_data[icode1].operand[0].mode;
6103 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6104 TYPE_UNSIGNED (prev_type));
6105 interm_optab = optab_for_tree_code (c1, intermediate_type,
6108 || ((icode1 = optab_handler (optab1, prev_mode))
6109 == CODE_FOR_nothing)
6110 || insn_data[icode1].operand[0].mode != intermediate_mode
6111 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6112 == CODE_FOR_nothing))
6115 VEC_quick_push (tree, *interm_types, intermediate_type);
6116 (*multi_step_cvt)++;
6118 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6121 prev_type = intermediate_type;
6122 prev_mode = intermediate_mode;