1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
337 Generally, LIVE_P and RELEVANT are used to define the liveness and
338 relevance info of the DEF_STMT of this USE:
339 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
340 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
342 - case 1: If USE is used only for address computations (e.g. array indexing),
343 which does not need to be directly vectorized, then the liveness/relevance
344 of the respective DEF_STMT is left unchanged.
345 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
346 skip DEF_STMT cause it had already been processed.
347 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
348 be modified accordingly.
350 Return true if everything is as expected. Return false otherwise. */
353 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
354 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
358 stmt_vec_info dstmt_vinfo;
359 basic_block bb, def_bb;
362 enum vect_def_type dt;
364 /* case 1: we are only interested in uses that need to be vectorized. Uses
365 that are used for address computation are not considered relevant. */
366 if (!exist_non_indexing_operands_for_use_p (use, stmt))
369 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
372 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 if (!def_stmt || gimple_nop_p (def_stmt))
379 def_bb = gimple_bb (def_stmt);
380 if (!flow_bb_inside_loop_p (loop, def_bb))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump, "def_stmt is out of loop.");
387 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
388 DEF_STMT must have already been processed, because this should be the
389 only way that STMT, which is a reduction-phi, was put in the worklist,
390 as there should be no other uses for DEF_STMT in the loop. So we just
391 check that everything is as expected, and we are done. */
392 dstmt_vinfo = vinfo_for_stmt (def_stmt);
393 bb = gimple_bb (stmt);
394 if (gimple_code (stmt) == GIMPLE_PHI
395 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
396 && gimple_code (def_stmt) != GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
398 && bb->loop_father == def_bb->loop_father)
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
402 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
403 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
404 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
405 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
406 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 /* case 3a: outer-loop stmt defining an inner-loop stmt:
411 outer-loop-header-bb:
417 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
419 if (vect_print_dump_info (REPORT_DETAILS))
420 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424 case vect_unused_in_scope:
425 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
426 vect_used_in_scope : vect_unused_in_scope;
429 case vect_used_in_outer_by_reduction:
430 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
431 relevant = vect_used_by_reduction;
434 case vect_used_in_outer:
435 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
436 relevant = vect_used_in_scope;
439 case vect_used_in_scope:
447 /* case 3b: inner-loop stmt defining an outer-loop stmt:
448 outer-loop-header-bb:
452 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
454 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
456 if (vect_print_dump_info (REPORT_DETAILS))
457 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461 case vect_unused_in_scope:
462 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
463 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
464 vect_used_in_outer_by_reduction : vect_unused_in_scope;
467 case vect_used_by_reduction:
468 relevant = vect_used_in_outer_by_reduction;
471 case vect_used_in_scope:
472 relevant = vect_used_in_outer;
480 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
481 is_pattern_stmt_p (stmt_vinfo));
486 /* Function vect_mark_stmts_to_be_vectorized.
488 Not all stmts in the loop need to be vectorized. For example:
497 Stmt 1 and 3 do not need to be vectorized, because loop control and
498 addressing of vectorized data-refs are handled differently.
500 This pass detects such stmts. */
503 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
505 VEC(gimple,heap) *worklist;
506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
507 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
508 unsigned int nbbs = loop->num_nodes;
509 gimple_stmt_iterator si;
512 stmt_vec_info stmt_vinfo;
516 enum vect_relevant relevant, tmp_relevant;
517 enum vect_def_type def_type;
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
522 worklist = VEC_alloc (gimple, heap, 64);
524 /* 1. Init worklist. */
525 for (i = 0; i < nbbs; i++)
528 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
531 if (vect_print_dump_info (REPORT_DETAILS))
533 fprintf (vect_dump, "init: phi relevant? ");
534 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
537 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
538 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
540 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
542 stmt = gsi_stmt (si);
543 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump, "init: stmt relevant? ");
546 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
549 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
550 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
554 /* 2. Process_worklist */
555 while (VEC_length (gimple, worklist) > 0)
560 stmt = VEC_pop (gimple, worklist);
561 if (vect_print_dump_info (REPORT_DETAILS))
563 fprintf (vect_dump, "worklist: examine stmt: ");
564 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
567 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
568 (DEF_STMT) as relevant/irrelevant and live/dead according to the
569 liveness and relevance properties of STMT. */
570 stmt_vinfo = vinfo_for_stmt (stmt);
571 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
572 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
574 /* Generally, the liveness and relevance properties of STMT are
575 propagated as is to the DEF_STMTs of its USEs:
576 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
577 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
579 One exception is when STMT has been identified as defining a reduction
580 variable; in this case we set the liveness/relevance as follows:
582 relevant = vect_used_by_reduction
583 This is because we distinguish between two kinds of relevant stmts -
584 those that are used by a reduction computation, and those that are
585 (also) used by a regular computation. This allows us later on to
586 identify stmts that are used solely by a reduction, and therefore the
587 order of the results that they produce does not have to be kept. */
589 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
590 tmp_relevant = relevant;
593 case vect_reduction_def:
594 switch (tmp_relevant)
596 case vect_unused_in_scope:
597 relevant = vect_used_by_reduction;
600 case vect_used_by_reduction:
601 if (gimple_code (stmt) == GIMPLE_PHI)
606 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "unsupported use of reduction.");
609 VEC_free (gimple, heap, worklist);
616 case vect_nested_cycle:
617 if (tmp_relevant != vect_unused_in_scope
618 && tmp_relevant != vect_used_in_outer_by_reduction
619 && tmp_relevant != vect_used_in_outer)
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "unsupported use of nested cycle.");
624 VEC_free (gimple, heap, worklist);
631 case vect_double_reduction_def:
632 if (tmp_relevant != vect_unused_in_scope
633 && tmp_relevant != vect_used_by_reduction)
635 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "unsupported use of double reduction.");
638 VEC_free (gimple, heap, worklist);
649 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
651 /* Pattern statements are not inserted into the code, so
652 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
653 have to scan the RHS or function arguments instead. */
654 if (is_gimple_assign (stmt))
656 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
657 tree op = gimple_assign_rhs1 (stmt);
660 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
662 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
663 live_p, relevant, &worklist)
664 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
665 live_p, relevant, &worklist))
667 VEC_free (gimple, heap, worklist);
672 for (; i < gimple_num_ops (stmt); i++)
674 op = gimple_op (stmt, i);
675 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
678 VEC_free (gimple, heap, worklist);
683 else if (is_gimple_call (stmt))
685 for (i = 0; i < gimple_call_num_args (stmt); i++)
687 tree arg = gimple_call_arg (stmt, i);
688 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
691 VEC_free (gimple, heap, worklist);
698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
700 tree op = USE_FROM_PTR (use_p);
701 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
704 VEC_free (gimple, heap, worklist);
708 } /* while worklist */
710 VEC_free (gimple, heap, worklist);
715 /* Get cost by calling cost target builtin. */
718 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
720 tree dummy_type = NULL;
723 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
728 /* Get cost for STMT. */
731 cost_for_stmt (gimple stmt)
733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
735 switch (STMT_VINFO_TYPE (stmt_info))
737 case load_vec_info_type:
738 return vect_get_stmt_cost (scalar_load);
739 case store_vec_info_type:
740 return vect_get_stmt_cost (scalar_store);
741 case op_vec_info_type:
742 case condition_vec_info_type:
743 case assignment_vec_info_type:
744 case reduc_vec_info_type:
745 case induc_vec_info_type:
746 case type_promotion_vec_info_type:
747 case type_demotion_vec_info_type:
748 case type_conversion_vec_info_type:
749 case call_vec_info_type:
750 return vect_get_stmt_cost (scalar_stmt);
751 case undef_vec_info_type:
757 /* Function vect_model_simple_cost.
759 Models cost for simple operations, i.e. those that only emit ncopies of a
760 single op. Right now, this does not account for multiple insns that could
761 be generated for the single vector op. We will handle that shortly. */
764 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
765 enum vect_def_type *dt, slp_tree slp_node)
768 int inside_cost = 0, outside_cost = 0;
770 /* The SLP costs were already calculated during SLP tree build. */
771 if (PURE_SLP_STMT (stmt_info))
774 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
776 /* FORNOW: Assuming maximum 2 args per stmts. */
777 for (i = 0; i < 2; i++)
779 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
780 outside_cost += vect_get_stmt_cost (vector_stmt);
783 if (vect_print_dump_info (REPORT_COST))
784 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
785 "outside_cost = %d .", inside_cost, outside_cost);
787 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
788 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
789 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
793 /* Function vect_cost_strided_group_size
795 For strided load or store, return the group_size only if it is the first
796 load or store of a group, else return 1. This ensures that group size is
797 only returned once per group. */
800 vect_cost_strided_group_size (stmt_vec_info stmt_info)
802 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
804 if (first_stmt == STMT_VINFO_STMT (stmt_info))
805 return GROUP_SIZE (stmt_info);
811 /* Function vect_model_store_cost
813 Models cost for stores. In the case of strided accesses, one access
814 has the overhead of the strided access attributed to it. */
817 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
818 bool store_lanes_p, enum vect_def_type dt,
822 unsigned int inside_cost = 0, outside_cost = 0;
823 struct data_reference *first_dr;
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
830 if (dt == vect_constant_def || dt == vect_external_def)
831 outside_cost = vect_get_stmt_cost (scalar_to_vec);
833 /* Strided access? */
834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
838 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
843 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
844 group_size = vect_cost_strided_group_size (stmt_info);
847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
849 /* Not a strided access. */
853 first_dr = STMT_VINFO_DATA_REF (stmt_info);
856 /* We assume that the cost of a single store-lanes instruction is
857 equivalent to the cost of GROUP_SIZE separate stores. If a strided
858 access is instead being provided by a permute-and-store operation,
859 include the cost of the permutes. */
860 if (!store_lanes_p && group_size > 1)
862 /* Uses a high and low interleave operation for each needed permute. */
863 inside_cost = ncopies * exact_log2(group_size) * group_size
864 * vect_get_stmt_cost (vector_stmt);
866 if (vect_print_dump_info (REPORT_COST))
867 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
872 /* Costs of the stores. */
873 vect_get_store_cost (first_dr, ncopies, &inside_cost);
875 if (vect_print_dump_info (REPORT_COST))
876 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
877 "outside_cost = %d .", inside_cost, outside_cost);
879 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
880 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
881 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
885 /* Calculate cost of DR's memory access. */
887 vect_get_store_cost (struct data_reference *dr, int ncopies,
888 unsigned int *inside_cost)
890 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
892 switch (alignment_support_scheme)
896 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: aligned.");
904 case dr_unaligned_supported:
906 gimple stmt = DR_STMT (dr);
907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
910 /* Here, we assign an additional cost for the unaligned store. */
911 *inside_cost += ncopies
912 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
913 vectype, DR_MISALIGNMENT (dr));
915 if (vect_print_dump_info (REPORT_COST))
916 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
928 /* Function vect_model_load_cost
930 Models cost for loads. In the case of strided accesses, the last access
931 has the overhead of the strided access attributed to it. Since unaligned
932 accesses are supported for loads, we also account for the costs of the
933 access scheme chosen. */
936 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
942 unsigned int inside_cost = 0, outside_cost = 0;
944 /* The SLP costs were already calculated during SLP tree build. */
945 if (PURE_SLP_STMT (stmt_info))
948 /* Strided accesses? */
949 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
950 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
952 group_size = vect_cost_strided_group_size (stmt_info);
953 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
955 /* Not a strided access. */
962 /* We assume that the cost of a single load-lanes instruction is
963 equivalent to the cost of GROUP_SIZE separate loads. If a strided
964 access is instead being provided by a load-and-permute operation,
965 include the cost of the permutes. */
966 if (!load_lanes_p && group_size > 1)
968 /* Uses an even and odd extract operations for each needed permute. */
969 inside_cost = ncopies * exact_log2(group_size) * group_size
970 * vect_get_stmt_cost (vector_stmt);
972 if (vect_print_dump_info (REPORT_COST))
973 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
977 /* The loads themselves. */
978 vect_get_load_cost (first_dr, ncopies,
979 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
981 &inside_cost, &outside_cost);
983 if (vect_print_dump_info (REPORT_COST))
984 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
985 "outside_cost = %d .", inside_cost, outside_cost);
987 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
988 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
989 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
993 /* Calculate cost of DR's memory access. */
995 vect_get_load_cost (struct data_reference *dr, int ncopies,
996 bool add_realign_cost, unsigned int *inside_cost,
997 unsigned int *outside_cost)
999 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 switch (alignment_support_scheme)
1005 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1007 if (vect_print_dump_info (REPORT_COST))
1008 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1012 case dr_unaligned_supported:
1014 gimple stmt = DR_STMT (dr);
1015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1016 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1018 /* Here, we assign an additional cost for the unaligned load. */
1019 *inside_cost += ncopies
1020 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1021 vectype, DR_MISALIGNMENT (dr));
1022 if (vect_print_dump_info (REPORT_COST))
1023 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1028 case dr_explicit_realign:
1030 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1031 + vect_get_stmt_cost (vector_stmt));
1033 /* FIXME: If the misalignment remains fixed across the iterations of
1034 the containing loop, the following cost should be added to the
1036 if (targetm.vectorize.builtin_mask_for_load)
1037 *inside_cost += vect_get_stmt_cost (vector_stmt);
1041 case dr_explicit_realign_optimized:
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1047 /* Unaligned software pipeline has a load of an address, an initial
1048 load, and possibly a mask operation to "prime" the loop. However,
1049 if this is an access in a group of loads, which provide strided
1050 access, then the above cost should only be considered for one
1051 access in the group. Inside the loop, there is a load op
1052 and a realignment op. */
1054 if (add_realign_cost)
1056 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1057 if (targetm.vectorize.builtin_mask_for_load)
1058 *outside_cost += vect_get_stmt_cost (vector_stmt);
1061 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1062 + vect_get_stmt_cost (vector_stmt));
1072 /* Function vect_init_vector.
1074 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1075 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1076 is not NULL. Otherwise, place the initialization at the loop preheader.
1077 Return the DEF of INIT_STMT.
1078 It will be used in the vectorization of STMT. */
1081 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1082 gimple_stmt_iterator *gsi)
1084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1092 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1093 add_referenced_var (new_var);
1094 init_stmt = gimple_build_assign (new_var, vector_var);
1095 new_temp = make_ssa_name (new_var, init_stmt);
1096 gimple_assign_set_lhs (init_stmt, new_temp);
1099 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1108 if (nested_in_vect_loop_p (loop, stmt))
1111 pe = loop_preheader_edge (loop);
1112 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1113 gcc_assert (!new_bb);
1117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1119 gimple_stmt_iterator gsi_bb_start;
1121 gcc_assert (bb_vinfo);
1122 bb = BB_VINFO_BB (bb_vinfo);
1123 gsi_bb_start = gsi_after_labels (bb);
1124 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "created new init_stmt: ");
1131 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1134 vec_oprnd = gimple_assign_lhs (init_stmt);
1139 /* Function vect_get_vec_def_for_operand.
1141 OP is an operand in STMT. This function returns a (vector) def that will be
1142 used in the vectorized stmt for STMT.
1144 In the case that OP is an SSA_NAME which is defined in the loop, then
1145 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1147 In case OP is an invariant or constant, a new stmt that creates a vector def
1148 needs to be introduced. */
1151 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1156 stmt_vec_info def_stmt_info = NULL;
1157 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1158 unsigned int nunits;
1159 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1165 enum vect_def_type dt;
1169 if (vect_print_dump_info (REPORT_DETAILS))
1171 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1172 print_generic_expr (vect_dump, op, TDF_SLIM);
1175 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1177 gcc_assert (is_simple_use);
1178 if (vect_print_dump_info (REPORT_DETAILS))
1182 fprintf (vect_dump, "def = ");
1183 print_generic_expr (vect_dump, def, TDF_SLIM);
1187 fprintf (vect_dump, " def_stmt = ");
1188 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1194 /* Case 1: operand is a constant. */
1195 case vect_constant_def:
1197 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1198 gcc_assert (vector_type);
1199 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1204 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1205 if (vect_print_dump_info (REPORT_DETAILS))
1206 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1208 vec_cst = build_vector_from_val (vector_type,
1209 fold_convert (TREE_TYPE (vector_type),
1211 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1214 /* Case 2: operand is defined outside the loop - loop invariant. */
1215 case vect_external_def:
1217 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1218 gcc_assert (vector_type);
1219 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1224 /* Create 'vec_inv = {inv,inv,..,inv}' */
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "Create vector_inv.");
1228 for (i = nunits - 1; i >= 0; --i)
1230 t = tree_cons (NULL_TREE, def, t);
1233 /* FIXME: use build_constructor directly. */
1234 vec_inv = build_constructor_from_list (vector_type, t);
1235 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1238 /* Case 3: operand is defined inside the loop. */
1239 case vect_internal_def:
1242 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1244 /* Get the def from the vectorized stmt. */
1245 def_stmt_info = vinfo_for_stmt (def_stmt);
1247 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1248 /* Get vectorized pattern statement. */
1250 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1251 && !STMT_VINFO_RELEVANT (def_stmt_info))
1252 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1253 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1254 gcc_assert (vec_stmt);
1255 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1256 vec_oprnd = PHI_RESULT (vec_stmt);
1257 else if (is_gimple_call (vec_stmt))
1258 vec_oprnd = gimple_call_lhs (vec_stmt);
1260 vec_oprnd = gimple_assign_lhs (vec_stmt);
1264 /* Case 4: operand is defined by a loop header phi - reduction */
1265 case vect_reduction_def:
1266 case vect_double_reduction_def:
1267 case vect_nested_cycle:
1271 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1272 loop = (gimple_bb (def_stmt))->loop_father;
1274 /* Get the def before the loop */
1275 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1276 return get_initial_def_for_reduction (stmt, op, scalar_def);
1279 /* Case 5: operand is defined by loop-header phi - induction. */
1280 case vect_induction_def:
1282 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1284 /* Get the def from the vectorized stmt. */
1285 def_stmt_info = vinfo_for_stmt (def_stmt);
1286 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1287 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1288 vec_oprnd = PHI_RESULT (vec_stmt);
1290 vec_oprnd = gimple_get_lhs (vec_stmt);
1300 /* Function vect_get_vec_def_for_stmt_copy
1302 Return a vector-def for an operand. This function is used when the
1303 vectorized stmt to be created (by the caller to this function) is a "copy"
1304 created in case the vectorized result cannot fit in one vector, and several
1305 copies of the vector-stmt are required. In this case the vector-def is
1306 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1307 of the stmt that defines VEC_OPRND.
1308 DT is the type of the vector def VEC_OPRND.
1311 In case the vectorization factor (VF) is bigger than the number
1312 of elements that can fit in a vectype (nunits), we have to generate
1313 more than one vector stmt to vectorize the scalar stmt. This situation
1314 arises when there are multiple data-types operated upon in the loop; the
1315 smallest data-type determines the VF, and as a result, when vectorizing
1316 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1317 vector stmt (each computing a vector of 'nunits' results, and together
1318 computing 'VF' results in each iteration). This function is called when
1319 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1320 which VF=16 and nunits=4, so the number of copies required is 4):
1322 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1324 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1325 VS1.1: vx.1 = memref1 VS1.2
1326 VS1.2: vx.2 = memref2 VS1.3
1327 VS1.3: vx.3 = memref3
1329 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1330 VSnew.1: vz1 = vx.1 + ... VSnew.2
1331 VSnew.2: vz2 = vx.2 + ... VSnew.3
1332 VSnew.3: vz3 = vx.3 + ...
1334 The vectorization of S1 is explained in vectorizable_load.
1335 The vectorization of S2:
1336 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1337 the function 'vect_get_vec_def_for_operand' is called to
1338 get the relevant vector-def for each operand of S2. For operand x it
1339 returns the vector-def 'vx.0'.
1341 To create the remaining copies of the vector-stmt (VSnew.j), this
1342 function is called to get the relevant vector-def for each operand. It is
1343 obtained from the respective VS1.j stmt, which is recorded in the
1344 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1346 For example, to obtain the vector-def 'vx.1' in order to create the
1347 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1348 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1349 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1350 and return its def ('vx.1').
1351 Overall, to create the above sequence this function will be called 3 times:
1352 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1353 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1354 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1357 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1359 gimple vec_stmt_for_operand;
1360 stmt_vec_info def_stmt_info;
1362 /* Do nothing; can reuse same def. */
1363 if (dt == vect_external_def || dt == vect_constant_def )
1366 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1367 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1368 gcc_assert (def_stmt_info);
1369 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1370 gcc_assert (vec_stmt_for_operand);
1371 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1372 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1373 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1375 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1380 /* Get vectorized definitions for the operands to create a copy of an original
1381 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1384 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1385 VEC(tree,heap) **vec_oprnds0,
1386 VEC(tree,heap) **vec_oprnds1)
1388 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1390 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1391 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1393 if (vec_oprnds1 && *vec_oprnds1)
1395 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1396 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1397 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1402 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1406 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1407 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1411 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1416 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1417 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1418 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1422 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1423 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1424 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1430 /* Function vect_finish_stmt_generation.
1432 Insert a new stmt. */
1435 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1436 gimple_stmt_iterator *gsi)
1438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1439 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1440 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1442 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1444 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1446 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1449 if (vect_print_dump_info (REPORT_DETAILS))
1451 fprintf (vect_dump, "add new stmt: ");
1452 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1455 gimple_set_location (vec_stmt, gimple_location (stmt));
1458 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1459 a function declaration if the target has a vectorized version
1460 of the function, or NULL_TREE if the function cannot be vectorized. */
1463 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1465 tree fndecl = gimple_call_fndecl (call);
1467 /* We only handle functions that do not read or clobber memory -- i.e.
1468 const or novops ones. */
1469 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1473 || TREE_CODE (fndecl) != FUNCTION_DECL
1474 || !DECL_BUILT_IN (fndecl))
1477 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1481 /* Function vectorizable_call.
1483 Check if STMT performs a function call that can be vectorized.
1484 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1485 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1486 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1489 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1494 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1496 tree vectype_out, vectype_in;
1499 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1500 tree fndecl, new_temp, def, rhs_type;
1502 enum vect_def_type dt[3]
1503 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1504 gimple new_stmt = NULL;
1506 VEC(tree, heap) *vargs = NULL;
1507 enum { NARROW, NONE, WIDEN } modifier;
1511 /* FORNOW: unsupported in basic block SLP. */
1512 gcc_assert (loop_vinfo);
1514 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1517 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1520 /* FORNOW: SLP not supported. */
1521 if (STMT_SLP_TYPE (stmt_info))
1524 /* Is STMT a vectorizable call? */
1525 if (!is_gimple_call (stmt))
1528 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1531 if (stmt_can_throw_internal (stmt))
1534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1536 /* Process function arguments. */
1537 rhs_type = NULL_TREE;
1538 vectype_in = NULL_TREE;
1539 nargs = gimple_call_num_args (stmt);
1541 /* Bail out if the function has more than three arguments, we do not have
1542 interesting builtin functions to vectorize with more than two arguments
1543 except for fma. No arguments is also not good. */
1544 if (nargs == 0 || nargs > 3)
1547 for (i = 0; i < nargs; i++)
1551 op = gimple_call_arg (stmt, i);
1553 /* We can only handle calls with arguments of the same type. */
1555 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "argument types differ.");
1562 rhs_type = TREE_TYPE (op);
1564 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1565 &def_stmt, &def, &dt[i], &opvectype))
1567 if (vect_print_dump_info (REPORT_DETAILS))
1568 fprintf (vect_dump, "use not simple.");
1573 vectype_in = opvectype;
1575 && opvectype != vectype_in)
1577 if (vect_print_dump_info (REPORT_DETAILS))
1578 fprintf (vect_dump, "argument vector types differ.");
1582 /* If all arguments are external or constant defs use a vector type with
1583 the same size as the output vector type. */
1585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1587 gcc_assert (vectype_in);
1590 if (vect_print_dump_info (REPORT_DETAILS))
1592 fprintf (vect_dump, "no vectype for scalar type ");
1593 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1600 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1602 if (nunits_in == nunits_out / 2)
1604 else if (nunits_out == nunits_in)
1606 else if (nunits_out == nunits_in / 2)
1611 /* For now, we only vectorize functions if a target specific builtin
1612 is available. TODO -- in some cases, it might be profitable to
1613 insert the calls for pieces of the vector, in order to be able
1614 to vectorize other operations in the loop. */
1615 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1616 if (fndecl == NULL_TREE)
1618 if (vect_print_dump_info (REPORT_DETAILS))
1619 fprintf (vect_dump, "function is not vectorizable.");
1624 gcc_assert (!gimple_vuse (stmt));
1626 if (modifier == NARROW)
1627 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1629 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1631 /* Sanity check: make sure that at least one copy of the vectorized stmt
1632 needs to be generated. */
1633 gcc_assert (ncopies >= 1);
1635 if (!vec_stmt) /* transformation not required. */
1637 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1638 if (vect_print_dump_info (REPORT_DETAILS))
1639 fprintf (vect_dump, "=== vectorizable_call ===");
1640 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "transform call.");
1650 scalar_dest = gimple_call_lhs (stmt);
1651 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1653 prev_stmt_info = NULL;
1657 for (j = 0; j < ncopies; ++j)
1659 /* Build argument list for the vectorized call. */
1661 vargs = VEC_alloc (tree, heap, nargs);
1663 VEC_truncate (tree, vargs, 0);
1665 for (i = 0; i < nargs; i++)
1667 op = gimple_call_arg (stmt, i);
1670 = vect_get_vec_def_for_operand (op, stmt, NULL);
1673 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1675 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1678 VEC_quick_push (tree, vargs, vec_oprnd0);
1681 new_stmt = gimple_build_call_vec (fndecl, vargs);
1682 new_temp = make_ssa_name (vec_dest, new_stmt);
1683 gimple_call_set_lhs (new_stmt, new_temp);
1685 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1686 mark_symbols_for_renaming (new_stmt);
1689 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1691 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1693 prev_stmt_info = vinfo_for_stmt (new_stmt);
1699 for (j = 0; j < ncopies; ++j)
1701 /* Build argument list for the vectorized call. */
1703 vargs = VEC_alloc (tree, heap, nargs * 2);
1705 VEC_truncate (tree, vargs, 0);
1707 for (i = 0; i < nargs; i++)
1709 op = gimple_call_arg (stmt, i);
1713 = vect_get_vec_def_for_operand (op, stmt, NULL);
1715 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1719 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1721 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1723 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1726 VEC_quick_push (tree, vargs, vec_oprnd0);
1727 VEC_quick_push (tree, vargs, vec_oprnd1);
1730 new_stmt = gimple_build_call_vec (fndecl, vargs);
1731 new_temp = make_ssa_name (vec_dest, new_stmt);
1732 gimple_call_set_lhs (new_stmt, new_temp);
1734 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1735 mark_symbols_for_renaming (new_stmt);
1738 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1742 prev_stmt_info = vinfo_for_stmt (new_stmt);
1745 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1750 /* No current target implements this case. */
1754 VEC_free (tree, heap, vargs);
1756 /* Update the exception handling table with the vector stmt if necessary. */
1757 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1758 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1760 /* The call in STMT might prevent it from being removed in dce.
1761 We however cannot remove it here, due to the way the ssa name
1762 it defines is mapped to the new definition. So just replace
1763 rhs of the statement with something harmless. */
1765 type = TREE_TYPE (scalar_dest);
1766 if (is_pattern_stmt_p (stmt_info))
1767 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1769 lhs = gimple_call_lhs (stmt);
1770 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1771 set_vinfo_for_stmt (new_stmt, stmt_info);
1772 set_vinfo_for_stmt (stmt, NULL);
1773 STMT_VINFO_STMT (stmt_info) = new_stmt;
1774 gsi_replace (gsi, new_stmt, false);
1775 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1781 /* Function vect_gen_widened_results_half
1783 Create a vector stmt whose code, type, number of arguments, and result
1784 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1785 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1786 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1787 needs to be created (DECL is a function-decl of a target-builtin).
1788 STMT is the original scalar stmt that we are vectorizing. */
1791 vect_gen_widened_results_half (enum tree_code code,
1793 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1794 tree vec_dest, gimple_stmt_iterator *gsi,
1800 /* Generate half of the widened result: */
1801 if (code == CALL_EXPR)
1803 /* Target specific support */
1804 if (op_type == binary_op)
1805 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1807 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1808 new_temp = make_ssa_name (vec_dest, new_stmt);
1809 gimple_call_set_lhs (new_stmt, new_temp);
1813 /* Generic support */
1814 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1815 if (op_type != binary_op)
1817 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1819 new_temp = make_ssa_name (vec_dest, new_stmt);
1820 gimple_assign_set_lhs (new_stmt, new_temp);
1822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1827 /* Check if STMT performs a conversion operation, that can be vectorized.
1828 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1829 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1833 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1834 gimple *vec_stmt, slp_tree slp_node)
1839 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1842 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1843 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1847 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1848 gimple new_stmt = NULL;
1849 stmt_vec_info prev_stmt_info;
1852 tree vectype_out, vectype_in;
1855 enum { NARROW, NONE, WIDEN } modifier;
1857 VEC(tree,heap) *vec_oprnds0 = NULL;
1859 VEC(tree,heap) *dummy = NULL;
1862 /* Is STMT a vectorizable conversion? */
1864 /* FORNOW: unsupported in basic block SLP. */
1865 gcc_assert (loop_vinfo);
1867 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1870 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1873 if (!is_gimple_assign (stmt))
1876 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1879 code = gimple_assign_rhs_code (stmt);
1880 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1883 /* Check types of lhs and rhs. */
1884 scalar_dest = gimple_assign_lhs (stmt);
1885 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1887 op0 = gimple_assign_rhs1 (stmt);
1888 rhs_type = TREE_TYPE (op0);
1889 /* Check the operands of the operation. */
1890 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1891 &def_stmt, &def, &dt[0], &vectype_in))
1893 if (vect_print_dump_info (REPORT_DETAILS))
1894 fprintf (vect_dump, "use not simple.");
1897 /* If op0 is an external or constant defs use a vector type of
1898 the same size as the output vector type. */
1900 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1902 gcc_assert (vectype_in);
1905 if (vect_print_dump_info (REPORT_DETAILS))
1907 fprintf (vect_dump, "no vectype for scalar type ");
1908 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1915 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1916 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1917 if (nunits_in == nunits_out / 2)
1919 else if (nunits_out == nunits_in)
1921 else if (nunits_out == nunits_in / 2)
1926 if (modifier == NARROW)
1927 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1929 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1931 /* Multiple types in SLP are handled by creating the appropriate number of
1932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1934 if (slp_node || PURE_SLP_STMT (stmt_info))
1937 /* Sanity check: make sure that at least one copy of the vectorized stmt
1938 needs to be generated. */
1939 gcc_assert (ncopies >= 1);
1941 /* Supportable by target? */
1942 if ((modifier == NONE
1943 && !supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1))
1944 || (modifier == WIDEN
1945 && !supportable_widening_operation (code, stmt,
1946 vectype_out, vectype_in,
1949 &dummy_int, &dummy))
1950 || (modifier == NARROW
1951 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1952 &code1, &dummy_int, &dummy)))
1954 if (vect_print_dump_info (REPORT_DETAILS))
1955 fprintf (vect_dump, "conversion not supported by target.");
1959 if (modifier != NONE)
1961 /* FORNOW: SLP not supported. */
1962 if (STMT_SLP_TYPE (stmt_info))
1966 if (!vec_stmt) /* transformation not required. */
1968 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1973 if (vect_print_dump_info (REPORT_DETAILS))
1974 fprintf (vect_dump, "transform conversion.");
1977 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1979 if (modifier == NONE && !slp_node)
1980 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1982 prev_stmt_info = NULL;
1986 for (j = 0; j < ncopies; j++)
1989 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1991 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1993 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1995 /* Arguments are ready, create the new vector stmt. */
1996 if (code1 == CALL_EXPR)
1998 new_stmt = gimple_build_call (decl1, 1, vop0);
1999 new_temp = make_ssa_name (vec_dest, new_stmt);
2000 gimple_call_set_lhs (new_stmt, new_temp);
2004 gcc_assert (TREE_CODE_LENGTH (code) == unary_op);
2005 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0,
2007 new_temp = make_ssa_name (vec_dest, new_stmt);
2008 gimple_assign_set_lhs (new_stmt, new_temp);
2011 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2013 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2017 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2019 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2020 prev_stmt_info = vinfo_for_stmt (new_stmt);
2025 /* In case the vectorization factor (VF) is bigger than the number
2026 of elements that we can fit in a vectype (nunits), we have to
2027 generate more than one vector stmt - i.e - we need to "unroll"
2028 the vector stmt by a factor VF/nunits. */
2029 for (j = 0; j < ncopies; j++)
2032 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2034 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2036 /* Generate first half of the widened result: */
2038 = vect_gen_widened_results_half (code1, decl1,
2039 vec_oprnd0, vec_oprnd1,
2040 unary_op, vec_dest, gsi, stmt);
2042 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2044 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2045 prev_stmt_info = vinfo_for_stmt (new_stmt);
2047 /* Generate second half of the widened result: */
2049 = vect_gen_widened_results_half (code2, decl2,
2050 vec_oprnd0, vec_oprnd1,
2051 unary_op, vec_dest, gsi, stmt);
2052 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2053 prev_stmt_info = vinfo_for_stmt (new_stmt);
2058 /* In case the vectorization factor (VF) is bigger than the number
2059 of elements that we can fit in a vectype (nunits), we have to
2060 generate more than one vector stmt - i.e - we need to "unroll"
2061 the vector stmt by a factor VF/nunits. */
2062 for (j = 0; j < ncopies; j++)
2067 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2068 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2072 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2073 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2076 /* Arguments are ready. Create the new vector stmt. */
2077 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2079 new_temp = make_ssa_name (vec_dest, new_stmt);
2080 gimple_assign_set_lhs (new_stmt, new_temp);
2081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2084 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2088 prev_stmt_info = vinfo_for_stmt (new_stmt);
2091 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2095 VEC_free (tree, heap, vec_oprnds0);
2101 /* Function vectorizable_assignment.
2103 Check if STMT performs an assignment (copy) that can be vectorized.
2104 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2105 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2106 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2109 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2110 gimple *vec_stmt, slp_tree slp_node)
2115 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2116 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2117 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2122 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2125 VEC(tree,heap) *vec_oprnds = NULL;
2127 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2128 gimple new_stmt = NULL;
2129 stmt_vec_info prev_stmt_info = NULL;
2130 enum tree_code code;
2133 /* Multiple types in SLP are handled by creating the appropriate number of
2134 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2136 if (slp_node || PURE_SLP_STMT (stmt_info))
2139 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2141 gcc_assert (ncopies >= 1);
2143 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2146 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2149 /* Is vectorizable assignment? */
2150 if (!is_gimple_assign (stmt))
2153 scalar_dest = gimple_assign_lhs (stmt);
2154 if (TREE_CODE (scalar_dest) != SSA_NAME)
2157 code = gimple_assign_rhs_code (stmt);
2158 if (gimple_assign_single_p (stmt)
2159 || code == PAREN_EXPR
2160 || CONVERT_EXPR_CODE_P (code))
2161 op = gimple_assign_rhs1 (stmt);
2165 if (code == VIEW_CONVERT_EXPR)
2166 op = TREE_OPERAND (op, 0);
2168 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2169 &def_stmt, &def, &dt[0], &vectype_in))
2171 if (vect_print_dump_info (REPORT_DETAILS))
2172 fprintf (vect_dump, "use not simple.");
2176 /* We can handle NOP_EXPR conversions that do not change the number
2177 of elements or the vector size. */
2178 if ((CONVERT_EXPR_CODE_P (code)
2179 || code == VIEW_CONVERT_EXPR)
2181 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2182 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2183 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2186 /* We do not handle bit-precision changes. */
2187 if ((CONVERT_EXPR_CODE_P (code)
2188 || code == VIEW_CONVERT_EXPR)
2189 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2190 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2191 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2192 || ((TYPE_PRECISION (TREE_TYPE (op))
2193 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2194 /* But a conversion that does not change the bit-pattern is ok. */
2195 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2196 > TYPE_PRECISION (TREE_TYPE (op)))
2197 && TYPE_UNSIGNED (TREE_TYPE (op))))
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump, "type conversion to/from bit-precision "
2205 if (!vec_stmt) /* transformation not required. */
2207 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "=== vectorizable_assignment ===");
2210 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2215 if (vect_print_dump_info (REPORT_DETAILS))
2216 fprintf (vect_dump, "transform assignment.");
2219 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2222 for (j = 0; j < ncopies; j++)
2226 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2228 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2230 /* Arguments are ready. create the new vector stmt. */
2231 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2233 if (CONVERT_EXPR_CODE_P (code)
2234 || code == VIEW_CONVERT_EXPR)
2235 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2236 new_stmt = gimple_build_assign (vec_dest, vop);
2237 new_temp = make_ssa_name (vec_dest, new_stmt);
2238 gimple_assign_set_lhs (new_stmt, new_temp);
2239 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2241 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2248 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2250 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2252 prev_stmt_info = vinfo_for_stmt (new_stmt);
2255 VEC_free (tree, heap, vec_oprnds);
2260 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2261 either as shift by a scalar or by a vector. */
2264 vect_supportable_shift (enum tree_code code, tree scalar_type)
2267 enum machine_mode vec_mode;
2272 vectype = get_vectype_for_scalar_type (scalar_type);
2276 optab = optab_for_tree_code (code, vectype, optab_scalar);
2278 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2280 optab = optab_for_tree_code (code, vectype, optab_vector);
2282 || (optab_handler (optab, TYPE_MODE (vectype))
2283 == CODE_FOR_nothing))
2287 vec_mode = TYPE_MODE (vectype);
2288 icode = (int) optab_handler (optab, vec_mode);
2289 if (icode == CODE_FOR_nothing)
2296 /* Function vectorizable_shift.
2298 Check if STMT performs a shift operation that can be vectorized.
2299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2304 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2305 gimple *vec_stmt, slp_tree slp_node)
2309 tree op0, op1 = NULL;
2310 tree vec_oprnd1 = NULL_TREE;
2311 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2314 enum tree_code code;
2315 enum machine_mode vec_mode;
2319 enum machine_mode optab_op2_mode;
2322 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2323 gimple new_stmt = NULL;
2324 stmt_vec_info prev_stmt_info;
2331 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2334 bool scalar_shift_arg = true;
2335 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2338 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2341 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2344 /* Is STMT a vectorizable binary/unary operation? */
2345 if (!is_gimple_assign (stmt))
2348 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2351 code = gimple_assign_rhs_code (stmt);
2353 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2354 || code == RROTATE_EXPR))
2357 scalar_dest = gimple_assign_lhs (stmt);
2358 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2359 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2360 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2362 if (vect_print_dump_info (REPORT_DETAILS))
2363 fprintf (vect_dump, "bit-precision shifts not supported.");
2367 op0 = gimple_assign_rhs1 (stmt);
2368 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2369 &def_stmt, &def, &dt[0], &vectype))
2371 if (vect_print_dump_info (REPORT_DETAILS))
2372 fprintf (vect_dump, "use not simple.");
2375 /* If op0 is an external or constant def use a vector type with
2376 the same size as the output vector type. */
2378 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2380 gcc_assert (vectype);
2383 if (vect_print_dump_info (REPORT_DETAILS))
2385 fprintf (vect_dump, "no vectype for scalar type ");
2386 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2392 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2393 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2394 if (nunits_out != nunits_in)
2397 op1 = gimple_assign_rhs2 (stmt);
2398 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2399 &dt[1], &op1_vectype))
2401 if (vect_print_dump_info (REPORT_DETAILS))
2402 fprintf (vect_dump, "use not simple.");
2407 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2411 /* Multiple types in SLP are handled by creating the appropriate number of
2412 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2414 if (slp_node || PURE_SLP_STMT (stmt_info))
2417 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2419 gcc_assert (ncopies >= 1);
2421 /* Determine whether the shift amount is a vector, or scalar. If the
2422 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2424 if (dt[1] == vect_internal_def && !slp_node)
2425 scalar_shift_arg = false;
2426 else if (dt[1] == vect_constant_def
2427 || dt[1] == vect_external_def
2428 || dt[1] == vect_internal_def)
2430 /* In SLP, need to check whether the shift count is the same,
2431 in loops if it is a constant or invariant, it is always
2435 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2438 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2439 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2440 scalar_shift_arg = false;
2445 if (vect_print_dump_info (REPORT_DETAILS))
2446 fprintf (vect_dump, "operand mode requires invariant argument.");
2450 /* Vector shifted by vector. */
2451 if (!scalar_shift_arg)
2453 optab = optab_for_tree_code (code, vectype, optab_vector);
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "vector/vector shift/rotate found.");
2457 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
2458 if (op1_vectype == NULL_TREE
2459 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
2461 if (vect_print_dump_info (REPORT_DETAILS))
2462 fprintf (vect_dump, "unusable type for last operand in"
2463 " vector/vector shift/rotate.");
2467 /* See if the machine has a vector shifted by scalar insn and if not
2468 then see if it has a vector shifted by vector insn. */
2471 optab = optab_for_tree_code (code, vectype, optab_scalar);
2473 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2475 if (vect_print_dump_info (REPORT_DETAILS))
2476 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2480 optab = optab_for_tree_code (code, vectype, optab_vector);
2482 && (optab_handler (optab, TYPE_MODE (vectype))
2483 != CODE_FOR_nothing))
2485 scalar_shift_arg = false;
2487 if (vect_print_dump_info (REPORT_DETAILS))
2488 fprintf (vect_dump, "vector/vector shift/rotate found.");
2490 /* Unlike the other binary operators, shifts/rotates have
2491 the rhs being int, instead of the same type as the lhs,
2492 so make sure the scalar is the right type if we are
2493 dealing with vectors of long long/long/short/char. */
2494 if (dt[1] == vect_constant_def)
2495 op1 = fold_convert (TREE_TYPE (vectype), op1);
2496 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
2500 && TYPE_MODE (TREE_TYPE (vectype))
2501 != TYPE_MODE (TREE_TYPE (op1)))
2503 if (vect_print_dump_info (REPORT_DETAILS))
2504 fprintf (vect_dump, "unusable type for last operand in"
2505 " vector/vector shift/rotate.");
2508 if (vec_stmt && !slp_node)
2510 op1 = fold_convert (TREE_TYPE (vectype), op1);
2511 op1 = vect_init_vector (stmt, op1,
2512 TREE_TYPE (vectype), NULL);
2519 /* Supportable by target? */
2522 if (vect_print_dump_info (REPORT_DETAILS))
2523 fprintf (vect_dump, "no optab.");
2526 vec_mode = TYPE_MODE (vectype);
2527 icode = (int) optab_handler (optab, vec_mode);
2528 if (icode == CODE_FOR_nothing)
2530 if (vect_print_dump_info (REPORT_DETAILS))
2531 fprintf (vect_dump, "op not supported by target.");
2532 /* Check only during analysis. */
2533 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2534 || (vf < vect_min_worthwhile_factor (code)
2537 if (vect_print_dump_info (REPORT_DETAILS))
2538 fprintf (vect_dump, "proceeding using word mode.");
2541 /* Worthwhile without SIMD support? Check only during analysis. */
2542 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2543 && vf < vect_min_worthwhile_factor (code)
2546 if (vect_print_dump_info (REPORT_DETAILS))
2547 fprintf (vect_dump, "not worthwhile without SIMD support.");
2551 if (!vec_stmt) /* transformation not required. */
2553 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2554 if (vect_print_dump_info (REPORT_DETAILS))
2555 fprintf (vect_dump, "=== vectorizable_shift ===");
2556 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2562 if (vect_print_dump_info (REPORT_DETAILS))
2563 fprintf (vect_dump, "transform binary/unary operation.");
2566 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2568 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2569 created in the previous stages of the recursion, so no allocation is
2570 needed, except for the case of shift with scalar shift argument. In that
2571 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2572 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2573 In case of loop-based vectorization we allocate VECs of size 1. We
2574 allocate VEC_OPRNDS1 only in case of binary operation. */
2577 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2578 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2580 else if (scalar_shift_arg)
2581 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2583 prev_stmt_info = NULL;
2584 for (j = 0; j < ncopies; j++)
2589 if (scalar_shift_arg)
2591 /* Vector shl and shr insn patterns can be defined with scalar
2592 operand 2 (shift operand). In this case, use constant or loop
2593 invariant op1 directly, without extending it to vector mode
2595 optab_op2_mode = insn_data[icode].operand[2].mode;
2596 if (!VECTOR_MODE_P (optab_op2_mode))
2598 if (vect_print_dump_info (REPORT_DETAILS))
2599 fprintf (vect_dump, "operand 1 using scalar mode.");
2601 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2604 /* Store vec_oprnd1 for every vector stmt to be created
2605 for SLP_NODE. We check during the analysis that all
2606 the shift arguments are the same.
2607 TODO: Allow different constants for different vector
2608 stmts generated for an SLP instance. */
2609 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2610 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2615 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2616 (a special case for certain kind of vector shifts); otherwise,
2617 operand 1 should be of a vector type (the usual case). */
2619 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2622 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2626 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2628 /* Arguments are ready. Create the new vector stmt. */
2629 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2631 vop1 = VEC_index (tree, vec_oprnds1, i);
2632 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2633 new_temp = make_ssa_name (vec_dest, new_stmt);
2634 gimple_assign_set_lhs (new_stmt, new_temp);
2635 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2637 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2644 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2646 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2647 prev_stmt_info = vinfo_for_stmt (new_stmt);
2650 VEC_free (tree, heap, vec_oprnds0);
2651 VEC_free (tree, heap, vec_oprnds1);
2657 /* Function vectorizable_operation.
2659 Check if STMT performs a binary, unary or ternary operation that can
2661 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2662 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2663 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2666 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2667 gimple *vec_stmt, slp_tree slp_node)
2671 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2674 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2675 enum tree_code code;
2676 enum machine_mode vec_mode;
2683 enum vect_def_type dt[3]
2684 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2685 gimple new_stmt = NULL;
2686 stmt_vec_info prev_stmt_info;
2692 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2693 tree vop0, vop1, vop2;
2694 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2697 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2700 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2703 /* Is STMT a vectorizable binary/unary operation? */
2704 if (!is_gimple_assign (stmt))
2707 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2710 code = gimple_assign_rhs_code (stmt);
2712 /* For pointer addition, we should use the normal plus for
2713 the vector addition. */
2714 if (code == POINTER_PLUS_EXPR)
2717 /* Support only unary or binary operations. */
2718 op_type = TREE_CODE_LENGTH (code);
2719 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2721 if (vect_print_dump_info (REPORT_DETAILS))
2722 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2727 scalar_dest = gimple_assign_lhs (stmt);
2728 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2730 /* Most operations cannot handle bit-precision types without extra
2732 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2733 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2734 /* Exception are bitwise binary operations. */
2735 && code != BIT_IOR_EXPR
2736 && code != BIT_XOR_EXPR
2737 && code != BIT_AND_EXPR)
2739 if (vect_print_dump_info (REPORT_DETAILS))
2740 fprintf (vect_dump, "bit-precision arithmetic not supported.");
2744 op0 = gimple_assign_rhs1 (stmt);
2745 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2746 &def_stmt, &def, &dt[0], &vectype))
2748 if (vect_print_dump_info (REPORT_DETAILS))
2749 fprintf (vect_dump, "use not simple.");
2752 /* If op0 is an external or constant def use a vector type with
2753 the same size as the output vector type. */
2755 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2757 gcc_assert (vectype);
2760 if (vect_print_dump_info (REPORT_DETAILS))
2762 fprintf (vect_dump, "no vectype for scalar type ");
2763 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2769 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2770 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2771 if (nunits_out != nunits_in)
2774 if (op_type == binary_op || op_type == ternary_op)
2776 op1 = gimple_assign_rhs2 (stmt);
2777 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2780 if (vect_print_dump_info (REPORT_DETAILS))
2781 fprintf (vect_dump, "use not simple.");
2785 if (op_type == ternary_op)
2787 op2 = gimple_assign_rhs3 (stmt);
2788 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2791 if (vect_print_dump_info (REPORT_DETAILS))
2792 fprintf (vect_dump, "use not simple.");
2798 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2802 /* Multiple types in SLP are handled by creating the appropriate number of
2803 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2805 if (slp_node || PURE_SLP_STMT (stmt_info))
2808 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2810 gcc_assert (ncopies >= 1);
2812 /* Shifts are handled in vectorizable_shift (). */
2813 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2814 || code == RROTATE_EXPR)
2817 optab = optab_for_tree_code (code, vectype, optab_default);
2819 /* Supportable by target? */
2822 if (vect_print_dump_info (REPORT_DETAILS))
2823 fprintf (vect_dump, "no optab.");
2826 vec_mode = TYPE_MODE (vectype);
2827 icode = (int) optab_handler (optab, vec_mode);
2828 if (icode == CODE_FOR_nothing)
2830 if (vect_print_dump_info (REPORT_DETAILS))
2831 fprintf (vect_dump, "op not supported by target.");
2832 /* Check only during analysis. */
2833 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2834 || (vf < vect_min_worthwhile_factor (code)
2837 if (vect_print_dump_info (REPORT_DETAILS))
2838 fprintf (vect_dump, "proceeding using word mode.");
2841 /* Worthwhile without SIMD support? Check only during analysis. */
2842 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2843 && vf < vect_min_worthwhile_factor (code)
2846 if (vect_print_dump_info (REPORT_DETAILS))
2847 fprintf (vect_dump, "not worthwhile without SIMD support.");
2851 if (!vec_stmt) /* transformation not required. */
2853 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2854 if (vect_print_dump_info (REPORT_DETAILS))
2855 fprintf (vect_dump, "=== vectorizable_operation ===");
2856 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2862 if (vect_print_dump_info (REPORT_DETAILS))
2863 fprintf (vect_dump, "transform binary/unary operation.");
2866 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2868 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2869 created in the previous stages of the recursion, so no allocation is
2870 needed, except for the case of shift with scalar shift argument. In that
2871 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2872 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2873 In case of loop-based vectorization we allocate VECs of size 1. We
2874 allocate VEC_OPRNDS1 only in case of binary operation. */
2877 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2878 if (op_type == binary_op || op_type == ternary_op)
2879 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2880 if (op_type == ternary_op)
2881 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2884 /* In case the vectorization factor (VF) is bigger than the number
2885 of elements that we can fit in a vectype (nunits), we have to generate
2886 more than one vector stmt - i.e - we need to "unroll" the
2887 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2888 from one copy of the vector stmt to the next, in the field
2889 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2890 stages to find the correct vector defs to be used when vectorizing
2891 stmts that use the defs of the current stmt. The example below
2892 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2893 we need to create 4 vectorized stmts):
2895 before vectorization:
2896 RELATED_STMT VEC_STMT
2900 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2902 RELATED_STMT VEC_STMT
2903 VS1_0: vx0 = memref0 VS1_1 -
2904 VS1_1: vx1 = memref1 VS1_2 -
2905 VS1_2: vx2 = memref2 VS1_3 -
2906 VS1_3: vx3 = memref3 - -
2907 S1: x = load - VS1_0
2910 step2: vectorize stmt S2 (done here):
2911 To vectorize stmt S2 we first need to find the relevant vector
2912 def for the first operand 'x'. This is, as usual, obtained from
2913 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2914 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2915 relevant vector def 'vx0'. Having found 'vx0' we can generate
2916 the vector stmt VS2_0, and as usual, record it in the
2917 STMT_VINFO_VEC_STMT of stmt S2.
2918 When creating the second copy (VS2_1), we obtain the relevant vector
2919 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2920 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2921 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2922 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2923 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2924 chain of stmts and pointers:
2925 RELATED_STMT VEC_STMT
2926 VS1_0: vx0 = memref0 VS1_1 -
2927 VS1_1: vx1 = memref1 VS1_2 -
2928 VS1_2: vx2 = memref2 VS1_3 -
2929 VS1_3: vx3 = memref3 - -
2930 S1: x = load - VS1_0
2931 VS2_0: vz0 = vx0 + v1 VS2_1 -
2932 VS2_1: vz1 = vx1 + v1 VS2_2 -
2933 VS2_2: vz2 = vx2 + v1 VS2_3 -
2934 VS2_3: vz3 = vx3 + v1 - -
2935 S2: z = x + 1 - VS2_0 */
2937 prev_stmt_info = NULL;
2938 for (j = 0; j < ncopies; j++)
2943 if (op_type == binary_op || op_type == ternary_op)
2944 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2947 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2949 if (op_type == ternary_op)
2951 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2952 VEC_quick_push (tree, vec_oprnds2,
2953 vect_get_vec_def_for_operand (op2, stmt, NULL));
2958 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2959 if (op_type == ternary_op)
2961 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2962 VEC_quick_push (tree, vec_oprnds2,
2963 vect_get_vec_def_for_stmt_copy (dt[2],
2968 /* Arguments are ready. Create the new vector stmt. */
2969 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2971 vop1 = ((op_type == binary_op || op_type == ternary_op)
2972 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2973 vop2 = ((op_type == ternary_op)
2974 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2975 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2977 new_temp = make_ssa_name (vec_dest, new_stmt);
2978 gimple_assign_set_lhs (new_stmt, new_temp);
2979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2981 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2988 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2991 prev_stmt_info = vinfo_for_stmt (new_stmt);
2994 VEC_free (tree, heap, vec_oprnds0);
2996 VEC_free (tree, heap, vec_oprnds1);
2998 VEC_free (tree, heap, vec_oprnds2);
3004 /* Get vectorized definitions for loop-based vectorization. For the first
3005 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3006 scalar operand), and for the rest we get a copy with
3007 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3008 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3009 The vectors are collected into VEC_OPRNDS. */
3012 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3013 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
3017 /* Get first vector operand. */
3018 /* All the vector operands except the very first one (that is scalar oprnd)
3020 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3021 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3023 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3025 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3027 /* Get second vector operand. */
3028 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3029 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3033 /* For conversion in multiple steps, continue to get operands
3036 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3040 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3041 For multi-step conversions store the resulting vectors and call the function
3045 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
3046 int multi_step_cvt, gimple stmt,
3047 VEC (tree, heap) *vec_dsts,
3048 gimple_stmt_iterator *gsi,
3049 slp_tree slp_node, enum tree_code code,
3050 stmt_vec_info *prev_stmt_info)
3053 tree vop0, vop1, new_tmp, vec_dest;
3055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3057 vec_dest = VEC_pop (tree, vec_dsts);
3059 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
3061 /* Create demotion operation. */
3062 vop0 = VEC_index (tree, *vec_oprnds, i);
3063 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
3064 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3065 new_tmp = make_ssa_name (vec_dest, new_stmt);
3066 gimple_assign_set_lhs (new_stmt, new_tmp);
3067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3070 /* Store the resulting vector for next recursive call. */
3071 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
3074 /* This is the last step of the conversion sequence. Store the
3075 vectors in SLP_NODE or in vector info of the scalar statement
3076 (or in STMT_VINFO_RELATED_STMT chain). */
3078 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3081 if (!*prev_stmt_info)
3082 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3084 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3086 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3091 /* For multi-step demotion operations we first generate demotion operations
3092 from the source type to the intermediate types, and then combine the
3093 results (stored in VEC_OPRNDS) in demotion operation to the destination
3097 /* At each level of recursion we have have of the operands we had at the
3099 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3100 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3101 stmt, vec_dsts, gsi, slp_node,
3102 code, prev_stmt_info);
3107 /* Function vectorizable_type_demotion
3109 Check if STMT performs a binary or unary operation that involves
3110 type demotion, and if it can be vectorized.
3111 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3112 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3113 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3116 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3117 gimple *vec_stmt, slp_tree slp_node)
3122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3124 enum tree_code code, code1 = ERROR_MARK;
3127 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3128 stmt_vec_info prev_stmt_info;
3135 int multi_step_cvt = 0;
3136 VEC (tree, heap) *vec_oprnds0 = NULL;
3137 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3138 tree last_oprnd, intermediate_type;
3139 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3141 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3144 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3147 /* Is STMT a vectorizable type-demotion operation? */
3148 if (!is_gimple_assign (stmt))
3151 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3154 code = gimple_assign_rhs_code (stmt);
3155 if (!CONVERT_EXPR_CODE_P (code))
3158 scalar_dest = gimple_assign_lhs (stmt);
3159 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3161 /* Check the operands of the operation. */
3162 op0 = gimple_assign_rhs1 (stmt);
3163 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3164 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3165 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3166 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
3169 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3170 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3171 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3172 || ((TYPE_PRECISION (TREE_TYPE (op0))
3173 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3175 if (vect_print_dump_info (REPORT_DETAILS))
3176 fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
3180 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3181 &def_stmt, &def, &dt[0], &vectype_in))
3183 if (vect_print_dump_info (REPORT_DETAILS))
3184 fprintf (vect_dump, "use not simple.");
3187 /* If op0 is an external def use a vector type with the
3188 same size as the output vector type if possible. */
3190 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3192 gcc_assert (vectype_in);
3195 if (vect_print_dump_info (REPORT_DETAILS))
3197 fprintf (vect_dump, "no vectype for scalar type ");
3198 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3204 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3205 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3206 if (nunits_in >= nunits_out)
3209 /* Multiple types in SLP are handled by creating the appropriate number of
3210 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3212 if (slp_node || PURE_SLP_STMT (stmt_info))
3215 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3216 gcc_assert (ncopies >= 1);
3218 /* Supportable by target? */
3219 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3220 &code1, &multi_step_cvt, &interm_types))
3223 if (!vec_stmt) /* transformation not required. */
3225 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3226 if (vect_print_dump_info (REPORT_DETAILS))
3227 fprintf (vect_dump, "=== vectorizable_demotion ===");
3228 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3233 if (vect_print_dump_info (REPORT_DETAILS))
3234 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3237 /* In case of multi-step demotion, we first generate demotion operations to
3238 the intermediate types, and then from that types to the final one.
3239 We create vector destinations for the intermediate type (TYPES) received
3240 from supportable_narrowing_operation, and store them in the correct order
3241 for future use in vect_create_vectorized_demotion_stmts(). */
3243 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3245 vec_dsts = VEC_alloc (tree, heap, 1);
3247 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3248 VEC_quick_push (tree, vec_dsts, vec_dest);
3252 for (i = VEC_length (tree, interm_types) - 1;
3253 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3255 vec_dest = vect_create_destination_var (scalar_dest,
3257 VEC_quick_push (tree, vec_dsts, vec_dest);
3261 /* In case the vectorization factor (VF) is bigger than the number
3262 of elements that we can fit in a vectype (nunits), we have to generate
3263 more than one vector stmt - i.e - we need to "unroll" the
3264 vector stmt by a factor VF/nunits. */
3266 prev_stmt_info = NULL;
3267 for (j = 0; j < ncopies; j++)
3271 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3274 VEC_free (tree, heap, vec_oprnds0);
3275 vec_oprnds0 = VEC_alloc (tree, heap,
3276 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3277 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3278 vect_pow2 (multi_step_cvt) - 1);
3281 /* Arguments are ready. Create the new vector stmts. */
3282 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3283 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3284 multi_step_cvt, stmt, tmp_vec_dsts,
3285 gsi, slp_node, code1,
3289 VEC_free (tree, heap, vec_oprnds0);
3290 VEC_free (tree, heap, vec_dsts);
3291 VEC_free (tree, heap, tmp_vec_dsts);
3292 VEC_free (tree, heap, interm_types);
3294 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3299 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3300 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3301 the resulting vectors and call the function recursively. */
3304 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3305 VEC (tree, heap) **vec_oprnds1,
3306 int multi_step_cvt, gimple stmt,
3307 VEC (tree, heap) *vec_dsts,
3308 gimple_stmt_iterator *gsi,
3309 slp_tree slp_node, enum tree_code code1,
3310 enum tree_code code2, tree decl1,
3311 tree decl2, int op_type,
3312 stmt_vec_info *prev_stmt_info)
3315 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3316 gimple new_stmt1, new_stmt2;
3317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3318 VEC (tree, heap) *vec_tmp;
3320 vec_dest = VEC_pop (tree, vec_dsts);
3321 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3323 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3325 if (op_type == binary_op)
3326 vop1 = VEC_index (tree, *vec_oprnds1, i);
3330 /* Generate the two halves of promotion operation. */
3331 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3332 op_type, vec_dest, gsi, stmt);
3333 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3334 op_type, vec_dest, gsi, stmt);
3335 if (is_gimple_call (new_stmt1))
3337 new_tmp1 = gimple_call_lhs (new_stmt1);
3338 new_tmp2 = gimple_call_lhs (new_stmt2);
3342 new_tmp1 = gimple_assign_lhs (new_stmt1);
3343 new_tmp2 = gimple_assign_lhs (new_stmt2);
3348 /* Store the results for the recursive call. */
3349 VEC_quick_push (tree, vec_tmp, new_tmp1);
3350 VEC_quick_push (tree, vec_tmp, new_tmp2);
3354 /* Last step of promotion sequience - store the results. */
3357 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3358 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3362 if (!*prev_stmt_info)
3363 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3365 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3367 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3368 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3369 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3376 /* For multi-step promotion operation we first generate we call the
3377 function recurcively for every stage. We start from the input type,
3378 create promotion operations to the intermediate types, and then
3379 create promotions to the output type. */
3380 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3381 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3382 multi_step_cvt - 1, stmt,
3383 vec_dsts, gsi, slp_node, code1,
3384 code2, decl2, decl2, op_type,
3388 VEC_free (tree, heap, vec_tmp);
3392 /* Function vectorizable_type_promotion
3394 Check if STMT performs a binary or unary operation that involves
3395 type promotion, and if it can be vectorized.
3396 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3397 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3398 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3401 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3402 gimple *vec_stmt, slp_tree slp_node)
3406 tree op0, op1 = NULL;
3407 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3409 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3410 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3411 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3415 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3416 stmt_vec_info prev_stmt_info;
3423 tree intermediate_type = NULL_TREE;