1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
337 Generally, LIVE_P and RELEVANT are used to define the liveness and
338 relevance info of the DEF_STMT of this USE:
339 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
340 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
342 - case 1: If USE is used only for address computations (e.g. array indexing),
343 which does not need to be directly vectorized, then the liveness/relevance
344 of the respective DEF_STMT is left unchanged.
345 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
346 skip DEF_STMT cause it had already been processed.
347 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
348 be modified accordingly.
350 Return true if everything is as expected. Return false otherwise. */
353 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
354 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
358 stmt_vec_info dstmt_vinfo;
359 basic_block bb, def_bb;
362 enum vect_def_type dt;
364 /* case 1: we are only interested in uses that need to be vectorized. Uses
365 that are used for address computation are not considered relevant. */
366 if (!exist_non_indexing_operands_for_use_p (use, stmt))
369 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
372 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 if (!def_stmt || gimple_nop_p (def_stmt))
379 def_bb = gimple_bb (def_stmt);
380 if (!flow_bb_inside_loop_p (loop, def_bb))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump, "def_stmt is out of loop.");
387 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
388 DEF_STMT must have already been processed, because this should be the
389 only way that STMT, which is a reduction-phi, was put in the worklist,
390 as there should be no other uses for DEF_STMT in the loop. So we just
391 check that everything is as expected, and we are done. */
392 dstmt_vinfo = vinfo_for_stmt (def_stmt);
393 bb = gimple_bb (stmt);
394 if (gimple_code (stmt) == GIMPLE_PHI
395 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
396 && gimple_code (def_stmt) != GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
398 && bb->loop_father == def_bb->loop_father)
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
402 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
403 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
404 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
405 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
406 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 /* case 3a: outer-loop stmt defining an inner-loop stmt:
411 outer-loop-header-bb:
417 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
419 if (vect_print_dump_info (REPORT_DETAILS))
420 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424 case vect_unused_in_scope:
425 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
426 vect_used_in_scope : vect_unused_in_scope;
429 case vect_used_in_outer_by_reduction:
430 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
431 relevant = vect_used_by_reduction;
434 case vect_used_in_outer:
435 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
436 relevant = vect_used_in_scope;
439 case vect_used_in_scope:
447 /* case 3b: inner-loop stmt defining an outer-loop stmt:
448 outer-loop-header-bb:
452 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
454 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
456 if (vect_print_dump_info (REPORT_DETAILS))
457 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461 case vect_unused_in_scope:
462 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
463 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
464 vect_used_in_outer_by_reduction : vect_unused_in_scope;
467 case vect_used_by_reduction:
468 relevant = vect_used_in_outer_by_reduction;
471 case vect_used_in_scope:
472 relevant = vect_used_in_outer;
480 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
481 is_pattern_stmt_p (stmt_vinfo));
486 /* Function vect_mark_stmts_to_be_vectorized.
488 Not all stmts in the loop need to be vectorized. For example:
497 Stmt 1 and 3 do not need to be vectorized, because loop control and
498 addressing of vectorized data-refs are handled differently.
500 This pass detects such stmts. */
503 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
505 VEC(gimple,heap) *worklist;
506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
507 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
508 unsigned int nbbs = loop->num_nodes;
509 gimple_stmt_iterator si;
512 stmt_vec_info stmt_vinfo;
516 enum vect_relevant relevant, tmp_relevant;
517 enum vect_def_type def_type;
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
522 worklist = VEC_alloc (gimple, heap, 64);
524 /* 1. Init worklist. */
525 for (i = 0; i < nbbs; i++)
528 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
531 if (vect_print_dump_info (REPORT_DETAILS))
533 fprintf (vect_dump, "init: phi relevant? ");
534 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
537 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
538 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
540 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
542 stmt = gsi_stmt (si);
543 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump, "init: stmt relevant? ");
546 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
549 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
550 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
554 /* 2. Process_worklist */
555 while (VEC_length (gimple, worklist) > 0)
560 stmt = VEC_pop (gimple, worklist);
561 if (vect_print_dump_info (REPORT_DETAILS))
563 fprintf (vect_dump, "worklist: examine stmt: ");
564 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
567 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
568 (DEF_STMT) as relevant/irrelevant and live/dead according to the
569 liveness and relevance properties of STMT. */
570 stmt_vinfo = vinfo_for_stmt (stmt);
571 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
572 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
574 /* Generally, the liveness and relevance properties of STMT are
575 propagated as is to the DEF_STMTs of its USEs:
576 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
577 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
579 One exception is when STMT has been identified as defining a reduction
580 variable; in this case we set the liveness/relevance as follows:
582 relevant = vect_used_by_reduction
583 This is because we distinguish between two kinds of relevant stmts -
584 those that are used by a reduction computation, and those that are
585 (also) used by a regular computation. This allows us later on to
586 identify stmts that are used solely by a reduction, and therefore the
587 order of the results that they produce does not have to be kept. */
589 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
590 tmp_relevant = relevant;
593 case vect_reduction_def:
594 switch (tmp_relevant)
596 case vect_unused_in_scope:
597 relevant = vect_used_by_reduction;
600 case vect_used_by_reduction:
601 if (gimple_code (stmt) == GIMPLE_PHI)
606 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "unsupported use of reduction.");
609 VEC_free (gimple, heap, worklist);
616 case vect_nested_cycle:
617 if (tmp_relevant != vect_unused_in_scope
618 && tmp_relevant != vect_used_in_outer_by_reduction
619 && tmp_relevant != vect_used_in_outer)
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "unsupported use of nested cycle.");
624 VEC_free (gimple, heap, worklist);
631 case vect_double_reduction_def:
632 if (tmp_relevant != vect_unused_in_scope
633 && tmp_relevant != vect_used_by_reduction)
635 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "unsupported use of double reduction.");
638 VEC_free (gimple, heap, worklist);
649 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
651 /* Pattern statements are not inserted into the code, so
652 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
653 have to scan the RHS or function arguments instead. */
654 if (is_gimple_assign (stmt))
656 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
657 tree op = gimple_assign_rhs1 (stmt);
660 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
662 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
663 live_p, relevant, &worklist)
664 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
665 live_p, relevant, &worklist))
667 VEC_free (gimple, heap, worklist);
672 for (; i < gimple_num_ops (stmt); i++)
674 op = gimple_op (stmt, i);
675 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
678 VEC_free (gimple, heap, worklist);
683 else if (is_gimple_call (stmt))
685 for (i = 0; i < gimple_call_num_args (stmt); i++)
687 tree arg = gimple_call_arg (stmt, i);
688 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
691 VEC_free (gimple, heap, worklist);
698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
700 tree op = USE_FROM_PTR (use_p);
701 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
704 VEC_free (gimple, heap, worklist);
708 } /* while worklist */
710 VEC_free (gimple, heap, worklist);
715 /* Get cost by calling cost target builtin. */
718 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
720 tree dummy_type = NULL;
723 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
728 /* Get cost for STMT. */
731 cost_for_stmt (gimple stmt)
733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
735 switch (STMT_VINFO_TYPE (stmt_info))
737 case load_vec_info_type:
738 return vect_get_stmt_cost (scalar_load);
739 case store_vec_info_type:
740 return vect_get_stmt_cost (scalar_store);
741 case op_vec_info_type:
742 case condition_vec_info_type:
743 case assignment_vec_info_type:
744 case reduc_vec_info_type:
745 case induc_vec_info_type:
746 case type_promotion_vec_info_type:
747 case type_demotion_vec_info_type:
748 case type_conversion_vec_info_type:
749 case call_vec_info_type:
750 return vect_get_stmt_cost (scalar_stmt);
751 case undef_vec_info_type:
757 /* Function vect_model_simple_cost.
759 Models cost for simple operations, i.e. those that only emit ncopies of a
760 single op. Right now, this does not account for multiple insns that could
761 be generated for the single vector op. We will handle that shortly. */
764 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
765 enum vect_def_type *dt, slp_tree slp_node)
768 int inside_cost = 0, outside_cost = 0;
770 /* The SLP costs were already calculated during SLP tree build. */
771 if (PURE_SLP_STMT (stmt_info))
774 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
776 /* FORNOW: Assuming maximum 2 args per stmts. */
777 for (i = 0; i < 2; i++)
779 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
780 outside_cost += vect_get_stmt_cost (vector_stmt);
783 if (vect_print_dump_info (REPORT_COST))
784 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
785 "outside_cost = %d .", inside_cost, outside_cost);
787 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
788 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
789 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
793 /* Function vect_cost_strided_group_size
795 For strided load or store, return the group_size only if it is the first
796 load or store of a group, else return 1. This ensures that group size is
797 only returned once per group. */
800 vect_cost_strided_group_size (stmt_vec_info stmt_info)
802 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
804 if (first_stmt == STMT_VINFO_STMT (stmt_info))
805 return GROUP_SIZE (stmt_info);
811 /* Function vect_model_store_cost
813 Models cost for stores. In the case of strided accesses, one access
814 has the overhead of the strided access attributed to it. */
817 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
818 bool store_lanes_p, enum vect_def_type dt,
822 unsigned int inside_cost = 0, outside_cost = 0;
823 struct data_reference *first_dr;
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
830 if (dt == vect_constant_def || dt == vect_external_def)
831 outside_cost = vect_get_stmt_cost (scalar_to_vec);
833 /* Strided access? */
834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
838 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
843 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
844 group_size = vect_cost_strided_group_size (stmt_info);
847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
849 /* Not a strided access. */
853 first_dr = STMT_VINFO_DATA_REF (stmt_info);
856 /* We assume that the cost of a single store-lanes instruction is
857 equivalent to the cost of GROUP_SIZE separate stores. If a strided
858 access is instead being provided by a permute-and-store operation,
859 include the cost of the permutes. */
860 if (!store_lanes_p && group_size > 1)
862 /* Uses a high and low interleave operation for each needed permute. */
863 inside_cost = ncopies * exact_log2(group_size) * group_size
864 * vect_get_stmt_cost (vector_stmt);
866 if (vect_print_dump_info (REPORT_COST))
867 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
872 /* Costs of the stores. */
873 vect_get_store_cost (first_dr, ncopies, &inside_cost);
875 if (vect_print_dump_info (REPORT_COST))
876 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
877 "outside_cost = %d .", inside_cost, outside_cost);
879 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
880 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
881 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
885 /* Calculate cost of DR's memory access. */
887 vect_get_store_cost (struct data_reference *dr, int ncopies,
888 unsigned int *inside_cost)
890 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
892 switch (alignment_support_scheme)
896 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: aligned.");
904 case dr_unaligned_supported:
906 gimple stmt = DR_STMT (dr);
907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
910 /* Here, we assign an additional cost for the unaligned store. */
911 *inside_cost += ncopies
912 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
913 vectype, DR_MISALIGNMENT (dr));
915 if (vect_print_dump_info (REPORT_COST))
916 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
928 /* Function vect_model_load_cost
930 Models cost for loads. In the case of strided accesses, the last access
931 has the overhead of the strided access attributed to it. Since unaligned
932 accesses are supported for loads, we also account for the costs of the
933 access scheme chosen. */
936 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
942 unsigned int inside_cost = 0, outside_cost = 0;
944 /* The SLP costs were already calculated during SLP tree build. */
945 if (PURE_SLP_STMT (stmt_info))
948 /* Strided accesses? */
949 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
950 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
952 group_size = vect_cost_strided_group_size (stmt_info);
953 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
955 /* Not a strided access. */
962 /* We assume that the cost of a single load-lanes instruction is
963 equivalent to the cost of GROUP_SIZE separate loads. If a strided
964 access is instead being provided by a load-and-permute operation,
965 include the cost of the permutes. */
966 if (!load_lanes_p && group_size > 1)
968 /* Uses an even and odd extract operations for each needed permute. */
969 inside_cost = ncopies * exact_log2(group_size) * group_size
970 * vect_get_stmt_cost (vector_stmt);
972 if (vect_print_dump_info (REPORT_COST))
973 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
977 /* The loads themselves. */
978 vect_get_load_cost (first_dr, ncopies,
979 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
981 &inside_cost, &outside_cost);
983 if (vect_print_dump_info (REPORT_COST))
984 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
985 "outside_cost = %d .", inside_cost, outside_cost);
987 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
988 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
989 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
993 /* Calculate cost of DR's memory access. */
995 vect_get_load_cost (struct data_reference *dr, int ncopies,
996 bool add_realign_cost, unsigned int *inside_cost,
997 unsigned int *outside_cost)
999 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 switch (alignment_support_scheme)
1005 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1007 if (vect_print_dump_info (REPORT_COST))
1008 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1012 case dr_unaligned_supported:
1014 gimple stmt = DR_STMT (dr);
1015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1016 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1018 /* Here, we assign an additional cost for the unaligned load. */
1019 *inside_cost += ncopies
1020 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1021 vectype, DR_MISALIGNMENT (dr));
1022 if (vect_print_dump_info (REPORT_COST))
1023 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1028 case dr_explicit_realign:
1030 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1031 + vect_get_stmt_cost (vector_stmt));
1033 /* FIXME: If the misalignment remains fixed across the iterations of
1034 the containing loop, the following cost should be added to the
1036 if (targetm.vectorize.builtin_mask_for_load)
1037 *inside_cost += vect_get_stmt_cost (vector_stmt);
1041 case dr_explicit_realign_optimized:
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1047 /* Unaligned software pipeline has a load of an address, an initial
1048 load, and possibly a mask operation to "prime" the loop. However,
1049 if this is an access in a group of loads, which provide strided
1050 access, then the above cost should only be considered for one
1051 access in the group. Inside the loop, there is a load op
1052 and a realignment op. */
1054 if (add_realign_cost)
1056 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1057 if (targetm.vectorize.builtin_mask_for_load)
1058 *outside_cost += vect_get_stmt_cost (vector_stmt);
1061 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1062 + vect_get_stmt_cost (vector_stmt));
1072 /* Function vect_init_vector.
1074 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1075 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1076 is not NULL. Otherwise, place the initialization at the loop preheader.
1077 Return the DEF of INIT_STMT.
1078 It will be used in the vectorization of STMT. */
1081 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1082 gimple_stmt_iterator *gsi)
1084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1092 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1093 add_referenced_var (new_var);
1094 init_stmt = gimple_build_assign (new_var, vector_var);
1095 new_temp = make_ssa_name (new_var, init_stmt);
1096 gimple_assign_set_lhs (init_stmt, new_temp);
1099 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1108 if (nested_in_vect_loop_p (loop, stmt))
1111 pe = loop_preheader_edge (loop);
1112 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1113 gcc_assert (!new_bb);
1117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1119 gimple_stmt_iterator gsi_bb_start;
1121 gcc_assert (bb_vinfo);
1122 bb = BB_VINFO_BB (bb_vinfo);
1123 gsi_bb_start = gsi_after_labels (bb);
1124 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "created new init_stmt: ");
1131 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1134 vec_oprnd = gimple_assign_lhs (init_stmt);
1139 /* Function vect_get_vec_def_for_operand.
1141 OP is an operand in STMT. This function returns a (vector) def that will be
1142 used in the vectorized stmt for STMT.
1144 In the case that OP is an SSA_NAME which is defined in the loop, then
1145 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1147 In case OP is an invariant or constant, a new stmt that creates a vector def
1148 needs to be introduced. */
1151 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1156 stmt_vec_info def_stmt_info = NULL;
1157 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1158 unsigned int nunits;
1159 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1165 enum vect_def_type dt;
1169 if (vect_print_dump_info (REPORT_DETAILS))
1171 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1172 print_generic_expr (vect_dump, op, TDF_SLIM);
1175 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1177 gcc_assert (is_simple_use);
1178 if (vect_print_dump_info (REPORT_DETAILS))
1182 fprintf (vect_dump, "def = ");
1183 print_generic_expr (vect_dump, def, TDF_SLIM);
1187 fprintf (vect_dump, " def_stmt = ");
1188 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1194 /* Case 1: operand is a constant. */
1195 case vect_constant_def:
1197 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1198 gcc_assert (vector_type);
1199 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1204 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1205 if (vect_print_dump_info (REPORT_DETAILS))
1206 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1208 vec_cst = build_vector_from_val (vector_type,
1209 fold_convert (TREE_TYPE (vector_type),
1211 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1214 /* Case 2: operand is defined outside the loop - loop invariant. */
1215 case vect_external_def:
1217 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1218 gcc_assert (vector_type);
1219 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1224 /* Create 'vec_inv = {inv,inv,..,inv}' */
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "Create vector_inv.");
1228 for (i = nunits - 1; i >= 0; --i)
1230 t = tree_cons (NULL_TREE, def, t);
1233 /* FIXME: use build_constructor directly. */
1234 vec_inv = build_constructor_from_list (vector_type, t);
1235 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1238 /* Case 3: operand is defined inside the loop. */
1239 case vect_internal_def:
1242 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1244 /* Get the def from the vectorized stmt. */
1245 def_stmt_info = vinfo_for_stmt (def_stmt);
1247 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1248 /* Get vectorized pattern statement. */
1250 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1251 && !STMT_VINFO_RELEVANT (def_stmt_info))
1252 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1253 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1254 gcc_assert (vec_stmt);
1255 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1256 vec_oprnd = PHI_RESULT (vec_stmt);
1257 else if (is_gimple_call (vec_stmt))
1258 vec_oprnd = gimple_call_lhs (vec_stmt);
1260 vec_oprnd = gimple_assign_lhs (vec_stmt);
1264 /* Case 4: operand is defined by a loop header phi - reduction */
1265 case vect_reduction_def:
1266 case vect_double_reduction_def:
1267 case vect_nested_cycle:
1271 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1272 loop = (gimple_bb (def_stmt))->loop_father;
1274 /* Get the def before the loop */
1275 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1276 return get_initial_def_for_reduction (stmt, op, scalar_def);
1279 /* Case 5: operand is defined by loop-header phi - induction. */
1280 case vect_induction_def:
1282 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1284 /* Get the def from the vectorized stmt. */
1285 def_stmt_info = vinfo_for_stmt (def_stmt);
1286 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1287 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1288 vec_oprnd = PHI_RESULT (vec_stmt);
1290 vec_oprnd = gimple_get_lhs (vec_stmt);
1300 /* Function vect_get_vec_def_for_stmt_copy
1302 Return a vector-def for an operand. This function is used when the
1303 vectorized stmt to be created (by the caller to this function) is a "copy"
1304 created in case the vectorized result cannot fit in one vector, and several
1305 copies of the vector-stmt are required. In this case the vector-def is
1306 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1307 of the stmt that defines VEC_OPRND.
1308 DT is the type of the vector def VEC_OPRND.
1311 In case the vectorization factor (VF) is bigger than the number
1312 of elements that can fit in a vectype (nunits), we have to generate
1313 more than one vector stmt to vectorize the scalar stmt. This situation
1314 arises when there are multiple data-types operated upon in the loop; the
1315 smallest data-type determines the VF, and as a result, when vectorizing
1316 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1317 vector stmt (each computing a vector of 'nunits' results, and together
1318 computing 'VF' results in each iteration). This function is called when
1319 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1320 which VF=16 and nunits=4, so the number of copies required is 4):
1322 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1324 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1325 VS1.1: vx.1 = memref1 VS1.2
1326 VS1.2: vx.2 = memref2 VS1.3
1327 VS1.3: vx.3 = memref3
1329 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1330 VSnew.1: vz1 = vx.1 + ... VSnew.2
1331 VSnew.2: vz2 = vx.2 + ... VSnew.3
1332 VSnew.3: vz3 = vx.3 + ...
1334 The vectorization of S1 is explained in vectorizable_load.
1335 The vectorization of S2:
1336 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1337 the function 'vect_get_vec_def_for_operand' is called to
1338 get the relevant vector-def for each operand of S2. For operand x it
1339 returns the vector-def 'vx.0'.
1341 To create the remaining copies of the vector-stmt (VSnew.j), this
1342 function is called to get the relevant vector-def for each operand. It is
1343 obtained from the respective VS1.j stmt, which is recorded in the
1344 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1346 For example, to obtain the vector-def 'vx.1' in order to create the
1347 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1348 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1349 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1350 and return its def ('vx.1').
1351 Overall, to create the above sequence this function will be called 3 times:
1352 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1353 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1354 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1357 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1359 gimple vec_stmt_for_operand;
1360 stmt_vec_info def_stmt_info;
1362 /* Do nothing; can reuse same def. */
1363 if (dt == vect_external_def || dt == vect_constant_def )
1366 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1367 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1368 gcc_assert (def_stmt_info);
1369 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1370 gcc_assert (vec_stmt_for_operand);
1371 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1372 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1373 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1375 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1380 /* Get vectorized definitions for the operands to create a copy of an original
1381 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1384 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1385 VEC(tree,heap) **vec_oprnds0,
1386 VEC(tree,heap) **vec_oprnds1)
1388 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1390 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1391 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1393 if (vec_oprnds1 && *vec_oprnds1)
1395 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1396 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1397 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1402 /* Get vectorized definitions for OP0 and OP1.
1403 REDUC_INDEX is the index of reduction operand in case of reduction,
1404 and -1 otherwise. */
1407 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1408 VEC (tree, heap) **vec_oprnds0,
1409 VEC (tree, heap) **vec_oprnds1,
1410 slp_tree slp_node, int reduc_index)
1414 int nops = (op1 == NULL_TREE) ? 1 : 2;
1415 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1416 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1418 VEC_quick_push (tree, ops, op0);
1420 VEC_quick_push (tree, ops, op1);
1422 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1424 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1426 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1428 VEC_free (tree, heap, ops);
1429 VEC_free (slp_void_p, heap, vec_defs);
1435 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1436 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1437 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1441 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1442 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1443 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1449 /* Function vect_finish_stmt_generation.
1451 Insert a new stmt. */
1454 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1455 gimple_stmt_iterator *gsi)
1457 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1458 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1459 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1461 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1463 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1465 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1468 if (vect_print_dump_info (REPORT_DETAILS))
1470 fprintf (vect_dump, "add new stmt: ");
1471 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1474 gimple_set_location (vec_stmt, gimple_location (stmt));
1477 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1478 a function declaration if the target has a vectorized version
1479 of the function, or NULL_TREE if the function cannot be vectorized. */
1482 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1484 tree fndecl = gimple_call_fndecl (call);
1486 /* We only handle functions that do not read or clobber memory -- i.e.
1487 const or novops ones. */
1488 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1492 || TREE_CODE (fndecl) != FUNCTION_DECL
1493 || !DECL_BUILT_IN (fndecl))
1496 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1500 /* Function vectorizable_call.
1502 Check if STMT performs a function call that can be vectorized.
1503 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1504 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1505 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1508 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1513 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1514 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1515 tree vectype_out, vectype_in;
1518 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1519 tree fndecl, new_temp, def, rhs_type;
1521 enum vect_def_type dt[3]
1522 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1523 gimple new_stmt = NULL;
1525 VEC(tree, heap) *vargs = NULL;
1526 enum { NARROW, NONE, WIDEN } modifier;
1530 /* FORNOW: unsupported in basic block SLP. */
1531 gcc_assert (loop_vinfo);
1533 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1536 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1539 /* FORNOW: SLP not supported. */
1540 if (STMT_SLP_TYPE (stmt_info))
1543 /* Is STMT a vectorizable call? */
1544 if (!is_gimple_call (stmt))
1547 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1550 if (stmt_can_throw_internal (stmt))
1553 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1555 /* Process function arguments. */
1556 rhs_type = NULL_TREE;
1557 vectype_in = NULL_TREE;
1558 nargs = gimple_call_num_args (stmt);
1560 /* Bail out if the function has more than three arguments, we do not have
1561 interesting builtin functions to vectorize with more than two arguments
1562 except for fma. No arguments is also not good. */
1563 if (nargs == 0 || nargs > 3)
1566 for (i = 0; i < nargs; i++)
1570 op = gimple_call_arg (stmt, i);
1572 /* We can only handle calls with arguments of the same type. */
1574 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1576 if (vect_print_dump_info (REPORT_DETAILS))
1577 fprintf (vect_dump, "argument types differ.");
1581 rhs_type = TREE_TYPE (op);
1583 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1584 &def_stmt, &def, &dt[i], &opvectype))
1586 if (vect_print_dump_info (REPORT_DETAILS))
1587 fprintf (vect_dump, "use not simple.");
1592 vectype_in = opvectype;
1594 && opvectype != vectype_in)
1596 if (vect_print_dump_info (REPORT_DETAILS))
1597 fprintf (vect_dump, "argument vector types differ.");
1601 /* If all arguments are external or constant defs use a vector type with
1602 the same size as the output vector type. */
1604 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1606 gcc_assert (vectype_in);
1609 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "no vectype for scalar type ");
1612 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1619 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1620 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1621 if (nunits_in == nunits_out / 2)
1623 else if (nunits_out == nunits_in)
1625 else if (nunits_out == nunits_in / 2)
1630 /* For now, we only vectorize functions if a target specific builtin
1631 is available. TODO -- in some cases, it might be profitable to
1632 insert the calls for pieces of the vector, in order to be able
1633 to vectorize other operations in the loop. */
1634 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1635 if (fndecl == NULL_TREE)
1637 if (vect_print_dump_info (REPORT_DETAILS))
1638 fprintf (vect_dump, "function is not vectorizable.");
1643 gcc_assert (!gimple_vuse (stmt));
1645 if (modifier == NARROW)
1646 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1648 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1650 /* Sanity check: make sure that at least one copy of the vectorized stmt
1651 needs to be generated. */
1652 gcc_assert (ncopies >= 1);
1654 if (!vec_stmt) /* transformation not required. */
1656 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1657 if (vect_print_dump_info (REPORT_DETAILS))
1658 fprintf (vect_dump, "=== vectorizable_call ===");
1659 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1665 if (vect_print_dump_info (REPORT_DETAILS))
1666 fprintf (vect_dump, "transform call.");
1669 scalar_dest = gimple_call_lhs (stmt);
1670 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1672 prev_stmt_info = NULL;
1676 for (j = 0; j < ncopies; ++j)
1678 /* Build argument list for the vectorized call. */
1680 vargs = VEC_alloc (tree, heap, nargs);
1682 VEC_truncate (tree, vargs, 0);
1684 for (i = 0; i < nargs; i++)
1686 op = gimple_call_arg (stmt, i);
1689 = vect_get_vec_def_for_operand (op, stmt, NULL);
1692 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1694 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1697 VEC_quick_push (tree, vargs, vec_oprnd0);
1700 new_stmt = gimple_build_call_vec (fndecl, vargs);
1701 new_temp = make_ssa_name (vec_dest, new_stmt);
1702 gimple_call_set_lhs (new_stmt, new_temp);
1704 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1705 mark_symbols_for_renaming (new_stmt);
1708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1712 prev_stmt_info = vinfo_for_stmt (new_stmt);
1718 for (j = 0; j < ncopies; ++j)
1720 /* Build argument list for the vectorized call. */
1722 vargs = VEC_alloc (tree, heap, nargs * 2);
1724 VEC_truncate (tree, vargs, 0);
1726 for (i = 0; i < nargs; i++)
1728 op = gimple_call_arg (stmt, i);
1732 = vect_get_vec_def_for_operand (op, stmt, NULL);
1734 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1738 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1740 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1742 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1745 VEC_quick_push (tree, vargs, vec_oprnd0);
1746 VEC_quick_push (tree, vargs, vec_oprnd1);
1749 new_stmt = gimple_build_call_vec (fndecl, vargs);
1750 new_temp = make_ssa_name (vec_dest, new_stmt);
1751 gimple_call_set_lhs (new_stmt, new_temp);
1753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1754 mark_symbols_for_renaming (new_stmt);
1757 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1761 prev_stmt_info = vinfo_for_stmt (new_stmt);
1764 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1769 /* No current target implements this case. */
1773 VEC_free (tree, heap, vargs);
1775 /* Update the exception handling table with the vector stmt if necessary. */
1776 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1777 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1779 /* The call in STMT might prevent it from being removed in dce.
1780 We however cannot remove it here, due to the way the ssa name
1781 it defines is mapped to the new definition. So just replace
1782 rhs of the statement with something harmless. */
1784 type = TREE_TYPE (scalar_dest);
1785 if (is_pattern_stmt_p (stmt_info))
1786 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1788 lhs = gimple_call_lhs (stmt);
1789 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1790 set_vinfo_for_stmt (new_stmt, stmt_info);
1791 set_vinfo_for_stmt (stmt, NULL);
1792 STMT_VINFO_STMT (stmt_info) = new_stmt;
1793 gsi_replace (gsi, new_stmt, false);
1794 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1800 /* Function vect_gen_widened_results_half
1802 Create a vector stmt whose code, type, number of arguments, and result
1803 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1804 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1805 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1806 needs to be created (DECL is a function-decl of a target-builtin).
1807 STMT is the original scalar stmt that we are vectorizing. */
1810 vect_gen_widened_results_half (enum tree_code code,
1812 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1813 tree vec_dest, gimple_stmt_iterator *gsi,
1819 /* Generate half of the widened result: */
1820 if (code == CALL_EXPR)
1822 /* Target specific support */
1823 if (op_type == binary_op)
1824 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1826 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1827 new_temp = make_ssa_name (vec_dest, new_stmt);
1828 gimple_call_set_lhs (new_stmt, new_temp);
1832 /* Generic support */
1833 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1834 if (op_type != binary_op)
1836 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1838 new_temp = make_ssa_name (vec_dest, new_stmt);
1839 gimple_assign_set_lhs (new_stmt, new_temp);
1841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1846 /* Check if STMT performs a conversion operation, that can be vectorized.
1847 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1848 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1849 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1852 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1853 gimple *vec_stmt, slp_tree slp_node)
1858 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1859 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1860 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1861 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1862 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1866 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1867 gimple new_stmt = NULL;
1868 stmt_vec_info prev_stmt_info;
1871 tree vectype_out, vectype_in;
1874 enum { NARROW, NONE, WIDEN } modifier;
1876 VEC(tree,heap) *vec_oprnds0 = NULL;
1878 VEC(tree,heap) *dummy = NULL;
1881 /* Is STMT a vectorizable conversion? */
1883 /* FORNOW: unsupported in basic block SLP. */
1884 gcc_assert (loop_vinfo);
1886 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1889 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1892 if (!is_gimple_assign (stmt))
1895 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1898 code = gimple_assign_rhs_code (stmt);
1899 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1902 /* Check types of lhs and rhs. */
1903 scalar_dest = gimple_assign_lhs (stmt);
1904 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1906 op0 = gimple_assign_rhs1 (stmt);
1907 rhs_type = TREE_TYPE (op0);
1908 /* Check the operands of the operation. */
1909 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1910 &def_stmt, &def, &dt[0], &vectype_in))
1912 if (vect_print_dump_info (REPORT_DETAILS))
1913 fprintf (vect_dump, "use not simple.");
1916 /* If op0 is an external or constant defs use a vector type of
1917 the same size as the output vector type. */
1919 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1921 gcc_assert (vectype_in);
1924 if (vect_print_dump_info (REPORT_DETAILS))
1926 fprintf (vect_dump, "no vectype for scalar type ");
1927 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1934 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1935 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1936 if (nunits_in == nunits_out / 2)
1938 else if (nunits_out == nunits_in)
1940 else if (nunits_out == nunits_in / 2)
1945 if (modifier == NARROW)
1946 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1948 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1950 /* Multiple types in SLP are handled by creating the appropriate number of
1951 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1953 if (slp_node || PURE_SLP_STMT (stmt_info))
1956 /* Sanity check: make sure that at least one copy of the vectorized stmt
1957 needs to be generated. */
1958 gcc_assert (ncopies >= 1);
1960 /* Supportable by target? */
1961 if ((modifier == NONE
1962 && !supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1))
1963 || (modifier == WIDEN
1964 && !supportable_widening_operation (code, stmt,
1965 vectype_out, vectype_in,
1968 &dummy_int, &dummy))
1969 || (modifier == NARROW
1970 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1971 &code1, &dummy_int, &dummy)))
1973 if (vect_print_dump_info (REPORT_DETAILS))
1974 fprintf (vect_dump, "conversion not supported by target.");
1978 if (modifier != NONE)
1980 /* FORNOW: SLP not supported. */
1981 if (STMT_SLP_TYPE (stmt_info))
1985 if (!vec_stmt) /* transformation not required. */
1987 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1992 if (vect_print_dump_info (REPORT_DETAILS))
1993 fprintf (vect_dump, "transform conversion.");
1996 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1998 if (modifier == NONE && !slp_node)
1999 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2001 prev_stmt_info = NULL;
2005 for (j = 0; j < ncopies; j++)
2008 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2011 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2013 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2015 /* Arguments are ready, create the new vector stmt. */
2016 if (code1 == CALL_EXPR)
2018 new_stmt = gimple_build_call (decl1, 1, vop0);
2019 new_temp = make_ssa_name (vec_dest, new_stmt);
2020 gimple_call_set_lhs (new_stmt, new_temp);
2024 gcc_assert (TREE_CODE_LENGTH (code) == unary_op);
2025 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0,
2027 new_temp = make_ssa_name (vec_dest, new_stmt);
2028 gimple_assign_set_lhs (new_stmt, new_temp);
2031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2033 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2037 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2039 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2040 prev_stmt_info = vinfo_for_stmt (new_stmt);
2045 /* In case the vectorization factor (VF) is bigger than the number
2046 of elements that we can fit in a vectype (nunits), we have to
2047 generate more than one vector stmt - i.e - we need to "unroll"
2048 the vector stmt by a factor VF/nunits. */
2049 for (j = 0; j < ncopies; j++)
2052 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2054 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2056 /* Generate first half of the widened result: */
2058 = vect_gen_widened_results_half (code1, decl1,
2059 vec_oprnd0, vec_oprnd1,
2060 unary_op, vec_dest, gsi, stmt);
2062 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2064 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2065 prev_stmt_info = vinfo_for_stmt (new_stmt);
2067 /* Generate second half of the widened result: */
2069 = vect_gen_widened_results_half (code2, decl2,
2070 vec_oprnd0, vec_oprnd1,
2071 unary_op, vec_dest, gsi, stmt);
2072 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2073 prev_stmt_info = vinfo_for_stmt (new_stmt);
2078 /* In case the vectorization factor (VF) is bigger than the number
2079 of elements that we can fit in a vectype (nunits), we have to
2080 generate more than one vector stmt - i.e - we need to "unroll"
2081 the vector stmt by a factor VF/nunits. */
2082 for (j = 0; j < ncopies; j++)
2087 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2088 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2092 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2093 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2096 /* Arguments are ready. Create the new vector stmt. */
2097 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2099 new_temp = make_ssa_name (vec_dest, new_stmt);
2100 gimple_assign_set_lhs (new_stmt, new_temp);
2101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2104 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2108 prev_stmt_info = vinfo_for_stmt (new_stmt);
2111 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2115 VEC_free (tree, heap, vec_oprnds0);
2121 /* Function vectorizable_assignment.
2123 Check if STMT performs an assignment (copy) that can be vectorized.
2124 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2125 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2126 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2129 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2130 gimple *vec_stmt, slp_tree slp_node)
2135 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2136 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2137 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2141 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2142 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2145 VEC(tree,heap) *vec_oprnds = NULL;
2147 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2148 gimple new_stmt = NULL;
2149 stmt_vec_info prev_stmt_info = NULL;
2150 enum tree_code code;
2153 /* Multiple types in SLP are handled by creating the appropriate number of
2154 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2156 if (slp_node || PURE_SLP_STMT (stmt_info))
2159 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2161 gcc_assert (ncopies >= 1);
2163 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2166 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2169 /* Is vectorizable assignment? */
2170 if (!is_gimple_assign (stmt))
2173 scalar_dest = gimple_assign_lhs (stmt);
2174 if (TREE_CODE (scalar_dest) != SSA_NAME)
2177 code = gimple_assign_rhs_code (stmt);
2178 if (gimple_assign_single_p (stmt)
2179 || code == PAREN_EXPR
2180 || CONVERT_EXPR_CODE_P (code))
2181 op = gimple_assign_rhs1 (stmt);
2185 if (code == VIEW_CONVERT_EXPR)
2186 op = TREE_OPERAND (op, 0);
2188 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2189 &def_stmt, &def, &dt[0], &vectype_in))
2191 if (vect_print_dump_info (REPORT_DETAILS))
2192 fprintf (vect_dump, "use not simple.");
2196 /* We can handle NOP_EXPR conversions that do not change the number
2197 of elements or the vector size. */
2198 if ((CONVERT_EXPR_CODE_P (code)
2199 || code == VIEW_CONVERT_EXPR)
2201 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2202 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2203 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2206 /* We do not handle bit-precision changes. */
2207 if ((CONVERT_EXPR_CODE_P (code)
2208 || code == VIEW_CONVERT_EXPR)
2209 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2210 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2211 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2212 || ((TYPE_PRECISION (TREE_TYPE (op))
2213 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2214 /* But a conversion that does not change the bit-pattern is ok. */
2215 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2216 > TYPE_PRECISION (TREE_TYPE (op)))
2217 && TYPE_UNSIGNED (TREE_TYPE (op))))
2219 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "type conversion to/from bit-precision "
2225 if (!vec_stmt) /* transformation not required. */
2227 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2228 if (vect_print_dump_info (REPORT_DETAILS))
2229 fprintf (vect_dump, "=== vectorizable_assignment ===");
2230 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2235 if (vect_print_dump_info (REPORT_DETAILS))
2236 fprintf (vect_dump, "transform assignment.");
2239 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2242 for (j = 0; j < ncopies; j++)
2246 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2248 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2250 /* Arguments are ready. create the new vector stmt. */
2251 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2253 if (CONVERT_EXPR_CODE_P (code)
2254 || code == VIEW_CONVERT_EXPR)
2255 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2256 new_stmt = gimple_build_assign (vec_dest, vop);
2257 new_temp = make_ssa_name (vec_dest, new_stmt);
2258 gimple_assign_set_lhs (new_stmt, new_temp);
2259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2261 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2268 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2270 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2272 prev_stmt_info = vinfo_for_stmt (new_stmt);
2275 VEC_free (tree, heap, vec_oprnds);
2280 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2281 either as shift by a scalar or by a vector. */
2284 vect_supportable_shift (enum tree_code code, tree scalar_type)
2287 enum machine_mode vec_mode;
2292 vectype = get_vectype_for_scalar_type (scalar_type);
2296 optab = optab_for_tree_code (code, vectype, optab_scalar);
2298 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2300 optab = optab_for_tree_code (code, vectype, optab_vector);
2302 || (optab_handler (optab, TYPE_MODE (vectype))
2303 == CODE_FOR_nothing))
2307 vec_mode = TYPE_MODE (vectype);
2308 icode = (int) optab_handler (optab, vec_mode);
2309 if (icode == CODE_FOR_nothing)
2316 /* Function vectorizable_shift.
2318 Check if STMT performs a shift operation that can be vectorized.
2319 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2320 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2321 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2324 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2325 gimple *vec_stmt, slp_tree slp_node)
2329 tree op0, op1 = NULL;
2330 tree vec_oprnd1 = NULL_TREE;
2331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2333 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2334 enum tree_code code;
2335 enum machine_mode vec_mode;
2339 enum machine_mode optab_op2_mode;
2342 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2343 gimple new_stmt = NULL;
2344 stmt_vec_info prev_stmt_info;
2351 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2354 bool scalar_shift_arg = true;
2355 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2358 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2361 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2364 /* Is STMT a vectorizable binary/unary operation? */
2365 if (!is_gimple_assign (stmt))
2368 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2371 code = gimple_assign_rhs_code (stmt);
2373 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2374 || code == RROTATE_EXPR))
2377 scalar_dest = gimple_assign_lhs (stmt);
2378 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2379 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2380 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2382 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (vect_dump, "bit-precision shifts not supported.");
2387 op0 = gimple_assign_rhs1 (stmt);
2388 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2389 &def_stmt, &def, &dt[0], &vectype))
2391 if (vect_print_dump_info (REPORT_DETAILS))
2392 fprintf (vect_dump, "use not simple.");
2395 /* If op0 is an external or constant def use a vector type with
2396 the same size as the output vector type. */
2398 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2400 gcc_assert (vectype);
2403 if (vect_print_dump_info (REPORT_DETAILS))
2405 fprintf (vect_dump, "no vectype for scalar type ");
2406 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2412 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2413 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2414 if (nunits_out != nunits_in)
2417 op1 = gimple_assign_rhs2 (stmt);
2418 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2419 &dt[1], &op1_vectype))
2421 if (vect_print_dump_info (REPORT_DETAILS))
2422 fprintf (vect_dump, "use not simple.");
2427 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2431 /* Multiple types in SLP are handled by creating the appropriate number of
2432 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2434 if (slp_node || PURE_SLP_STMT (stmt_info))
2437 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2439 gcc_assert (ncopies >= 1);
2441 /* Determine whether the shift amount is a vector, or scalar. If the
2442 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2444 if (dt[1] == vect_internal_def && !slp_node)
2445 scalar_shift_arg = false;
2446 else if (dt[1] == vect_constant_def
2447 || dt[1] == vect_external_def
2448 || dt[1] == vect_internal_def)
2450 /* In SLP, need to check whether the shift count is the same,
2451 in loops if it is a constant or invariant, it is always
2455 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2458 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2459 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2460 scalar_shift_arg = false;
2465 if (vect_print_dump_info (REPORT_DETAILS))
2466 fprintf (vect_dump, "operand mode requires invariant argument.");
2470 /* Vector shifted by vector. */
2471 if (!scalar_shift_arg)
2473 optab = optab_for_tree_code (code, vectype, optab_vector);
2474 if (vect_print_dump_info (REPORT_DETAILS))
2475 fprintf (vect_dump, "vector/vector shift/rotate found.");
2477 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
2478 if (op1_vectype == NULL_TREE
2479 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
2481 if (vect_print_dump_info (REPORT_DETAILS))
2482 fprintf (vect_dump, "unusable type for last operand in"
2483 " vector/vector shift/rotate.");
2487 /* See if the machine has a vector shifted by scalar insn and if not
2488 then see if it has a vector shifted by vector insn. */
2491 optab = optab_for_tree_code (code, vectype, optab_scalar);
2493 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2495 if (vect_print_dump_info (REPORT_DETAILS))
2496 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2500 optab = optab_for_tree_code (code, vectype, optab_vector);
2502 && (optab_handler (optab, TYPE_MODE (vectype))
2503 != CODE_FOR_nothing))
2505 scalar_shift_arg = false;
2507 if (vect_print_dump_info (REPORT_DETAILS))
2508 fprintf (vect_dump, "vector/vector shift/rotate found.");
2510 /* Unlike the other binary operators, shifts/rotates have
2511 the rhs being int, instead of the same type as the lhs,
2512 so make sure the scalar is the right type if we are
2513 dealing with vectors of long long/long/short/char. */
2514 if (dt[1] == vect_constant_def)
2515 op1 = fold_convert (TREE_TYPE (vectype), op1);
2516 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
2520 && TYPE_MODE (TREE_TYPE (vectype))
2521 != TYPE_MODE (TREE_TYPE (op1)))
2523 if (vect_print_dump_info (REPORT_DETAILS))
2524 fprintf (vect_dump, "unusable type for last operand in"
2525 " vector/vector shift/rotate.");
2528 if (vec_stmt && !slp_node)
2530 op1 = fold_convert (TREE_TYPE (vectype), op1);
2531 op1 = vect_init_vector (stmt, op1,
2532 TREE_TYPE (vectype), NULL);
2539 /* Supportable by target? */
2542 if (vect_print_dump_info (REPORT_DETAILS))
2543 fprintf (vect_dump, "no optab.");
2546 vec_mode = TYPE_MODE (vectype);
2547 icode = (int) optab_handler (optab, vec_mode);
2548 if (icode == CODE_FOR_nothing)
2550 if (vect_print_dump_info (REPORT_DETAILS))
2551 fprintf (vect_dump, "op not supported by target.");
2552 /* Check only during analysis. */
2553 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2554 || (vf < vect_min_worthwhile_factor (code)
2557 if (vect_print_dump_info (REPORT_DETAILS))
2558 fprintf (vect_dump, "proceeding using word mode.");
2561 /* Worthwhile without SIMD support? Check only during analysis. */
2562 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2563 && vf < vect_min_worthwhile_factor (code)
2566 if (vect_print_dump_info (REPORT_DETAILS))
2567 fprintf (vect_dump, "not worthwhile without SIMD support.");
2571 if (!vec_stmt) /* transformation not required. */
2573 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2574 if (vect_print_dump_info (REPORT_DETAILS))
2575 fprintf (vect_dump, "=== vectorizable_shift ===");
2576 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2582 if (vect_print_dump_info (REPORT_DETAILS))
2583 fprintf (vect_dump, "transform binary/unary operation.");
2586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2588 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2589 created in the previous stages of the recursion, so no allocation is
2590 needed, except for the case of shift with scalar shift argument. In that
2591 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2592 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2593 In case of loop-based vectorization we allocate VECs of size 1. We
2594 allocate VEC_OPRNDS1 only in case of binary operation. */
2597 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2598 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2600 else if (scalar_shift_arg)
2601 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2603 prev_stmt_info = NULL;
2604 for (j = 0; j < ncopies; j++)
2609 if (scalar_shift_arg)
2611 /* Vector shl and shr insn patterns can be defined with scalar
2612 operand 2 (shift operand). In this case, use constant or loop
2613 invariant op1 directly, without extending it to vector mode
2615 optab_op2_mode = insn_data[icode].operand[2].mode;
2616 if (!VECTOR_MODE_P (optab_op2_mode))
2618 if (vect_print_dump_info (REPORT_DETAILS))
2619 fprintf (vect_dump, "operand 1 using scalar mode.");
2621 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2624 /* Store vec_oprnd1 for every vector stmt to be created
2625 for SLP_NODE. We check during the analysis that all
2626 the shift arguments are the same.
2627 TODO: Allow different constants for different vector
2628 stmts generated for an SLP instance. */
2629 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2630 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2635 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2636 (a special case for certain kind of vector shifts); otherwise,
2637 operand 1 should be of a vector type (the usual case). */
2639 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2642 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2646 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2648 /* Arguments are ready. Create the new vector stmt. */
2649 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2651 vop1 = VEC_index (tree, vec_oprnds1, i);
2652 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2653 new_temp = make_ssa_name (vec_dest, new_stmt);
2654 gimple_assign_set_lhs (new_stmt, new_temp);
2655 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2657 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2664 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2666 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2667 prev_stmt_info = vinfo_for_stmt (new_stmt);
2670 VEC_free (tree, heap, vec_oprnds0);
2671 VEC_free (tree, heap, vec_oprnds1);
2677 /* Function vectorizable_operation.
2679 Check if STMT performs a binary, unary or ternary operation that can
2681 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2682 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2683 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2686 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2687 gimple *vec_stmt, slp_tree slp_node)
2691 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2692 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2694 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2695 enum tree_code code;
2696 enum machine_mode vec_mode;
2703 enum vect_def_type dt[3]
2704 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2705 gimple new_stmt = NULL;
2706 stmt_vec_info prev_stmt_info;
2712 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2713 tree vop0, vop1, vop2;
2714 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2717 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2720 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2723 /* Is STMT a vectorizable binary/unary operation? */
2724 if (!is_gimple_assign (stmt))
2727 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2730 code = gimple_assign_rhs_code (stmt);
2732 /* For pointer addition, we should use the normal plus for
2733 the vector addition. */
2734 if (code == POINTER_PLUS_EXPR)
2737 /* Support only unary or binary operations. */
2738 op_type = TREE_CODE_LENGTH (code);
2739 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2741 if (vect_print_dump_info (REPORT_DETAILS))
2742 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2747 scalar_dest = gimple_assign_lhs (stmt);
2748 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2750 /* Most operations cannot handle bit-precision types without extra
2752 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2753 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2754 /* Exception are bitwise binary operations. */
2755 && code != BIT_IOR_EXPR
2756 && code != BIT_XOR_EXPR
2757 && code != BIT_AND_EXPR)
2759 if (vect_print_dump_info (REPORT_DETAILS))
2760 fprintf (vect_dump, "bit-precision arithmetic not supported.");
2764 op0 = gimple_assign_rhs1 (stmt);
2765 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2766 &def_stmt, &def, &dt[0], &vectype))
2768 if (vect_print_dump_info (REPORT_DETAILS))
2769 fprintf (vect_dump, "use not simple.");
2772 /* If op0 is an external or constant def use a vector type with
2773 the same size as the output vector type. */
2775 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2777 gcc_assert (vectype);
2780 if (vect_print_dump_info (REPORT_DETAILS))
2782 fprintf (vect_dump, "no vectype for scalar type ");
2783 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2789 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2790 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2791 if (nunits_out != nunits_in)
2794 if (op_type == binary_op || op_type == ternary_op)
2796 op1 = gimple_assign_rhs2 (stmt);
2797 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2800 if (vect_print_dump_info (REPORT_DETAILS))
2801 fprintf (vect_dump, "use not simple.");
2805 if (op_type == ternary_op)
2807 op2 = gimple_assign_rhs3 (stmt);
2808 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2811 if (vect_print_dump_info (REPORT_DETAILS))
2812 fprintf (vect_dump, "use not simple.");
2818 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2822 /* Multiple types in SLP are handled by creating the appropriate number of
2823 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2825 if (slp_node || PURE_SLP_STMT (stmt_info))
2828 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2830 gcc_assert (ncopies >= 1);
2832 /* Shifts are handled in vectorizable_shift (). */
2833 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2834 || code == RROTATE_EXPR)
2837 optab = optab_for_tree_code (code, vectype, optab_default);
2839 /* Supportable by target? */
2842 if (vect_print_dump_info (REPORT_DETAILS))
2843 fprintf (vect_dump, "no optab.");
2846 vec_mode = TYPE_MODE (vectype);
2847 icode = (int) optab_handler (optab, vec_mode);
2848 if (icode == CODE_FOR_nothing)
2850 if (vect_print_dump_info (REPORT_DETAILS))
2851 fprintf (vect_dump, "op not supported by target.");
2852 /* Check only during analysis. */
2853 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2854 || (vf < vect_min_worthwhile_factor (code)
2857 if (vect_print_dump_info (REPORT_DETAILS))
2858 fprintf (vect_dump, "proceeding using word mode.");
2861 /* Worthwhile without SIMD support? Check only during analysis. */
2862 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2863 && vf < vect_min_worthwhile_factor (code)
2866 if (vect_print_dump_info (REPORT_DETAILS))
2867 fprintf (vect_dump, "not worthwhile without SIMD support.");
2871 if (!vec_stmt) /* transformation not required. */
2873 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2874 if (vect_print_dump_info (REPORT_DETAILS))
2875 fprintf (vect_dump, "=== vectorizable_operation ===");
2876 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2882 if (vect_print_dump_info (REPORT_DETAILS))
2883 fprintf (vect_dump, "transform binary/unary operation.");
2886 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2888 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2889 created in the previous stages of the recursion, so no allocation is
2890 needed, except for the case of shift with scalar shift argument. In that
2891 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2892 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2893 In case of loop-based vectorization we allocate VECs of size 1. We
2894 allocate VEC_OPRNDS1 only in case of binary operation. */
2897 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2898 if (op_type == binary_op || op_type == ternary_op)
2899 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2900 if (op_type == ternary_op)
2901 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2904 /* In case the vectorization factor (VF) is bigger than the number
2905 of elements that we can fit in a vectype (nunits), we have to generate
2906 more than one vector stmt - i.e - we need to "unroll" the
2907 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2908 from one copy of the vector stmt to the next, in the field
2909 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2910 stages to find the correct vector defs to be used when vectorizing
2911 stmts that use the defs of the current stmt. The example below
2912 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2913 we need to create 4 vectorized stmts):
2915 before vectorization:
2916 RELATED_STMT VEC_STMT
2920 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2922 RELATED_STMT VEC_STMT
2923 VS1_0: vx0 = memref0 VS1_1 -
2924 VS1_1: vx1 = memref1 VS1_2 -
2925 VS1_2: vx2 = memref2 VS1_3 -
2926 VS1_3: vx3 = memref3 - -
2927 S1: x = load - VS1_0
2930 step2: vectorize stmt S2 (done here):
2931 To vectorize stmt S2 we first need to find the relevant vector
2932 def for the first operand 'x'. This is, as usual, obtained from
2933 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2934 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2935 relevant vector def 'vx0'. Having found 'vx0' we can generate
2936 the vector stmt VS2_0, and as usual, record it in the
2937 STMT_VINFO_VEC_STMT of stmt S2.
2938 When creating the second copy (VS2_1), we obtain the relevant vector
2939 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2940 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2941 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2942 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2943 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2944 chain of stmts and pointers:
2945 RELATED_STMT VEC_STMT
2946 VS1_0: vx0 = memref0 VS1_1 -
2947 VS1_1: vx1 = memref1 VS1_2 -
2948 VS1_2: vx2 = memref2 VS1_3 -
2949 VS1_3: vx3 = memref3 - -
2950 S1: x = load - VS1_0
2951 VS2_0: vz0 = vx0 + v1 VS2_1 -
2952 VS2_1: vz1 = vx1 + v1 VS2_2 -
2953 VS2_2: vz2 = vx2 + v1 VS2_3 -
2954 VS2_3: vz3 = vx3 + v1 - -
2955 S2: z = x + 1 - VS2_0 */
2957 prev_stmt_info = NULL;
2958 for (j = 0; j < ncopies; j++)
2963 if (op_type == binary_op || op_type == ternary_op)
2964 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2967 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2969 if (op_type == ternary_op)
2971 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2972 VEC_quick_push (tree, vec_oprnds2,
2973 vect_get_vec_def_for_operand (op2, stmt, NULL));
2978 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2979 if (op_type == ternary_op)
2981 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2982 VEC_quick_push (tree, vec_oprnds2,
2983 vect_get_vec_def_for_stmt_copy (dt[2],
2988 /* Arguments are ready. Create the new vector stmt. */
2989 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2991 vop1 = ((op_type == binary_op || op_type == ternary_op)
2992 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2993 vop2 = ((op_type == ternary_op)
2994 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2995 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2997 new_temp = make_ssa_name (vec_dest, new_stmt);
2998 gimple_assign_set_lhs (new_stmt, new_temp);
2999 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3001 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3011 prev_stmt_info = vinfo_for_stmt (new_stmt);
3014 VEC_free (tree, heap, vec_oprnds0);
3016 VEC_free (tree, heap, vec_oprnds1);
3018 VEC_free (tree, heap, vec_oprnds2);
3024 /* Get vectorized definitions for loop-based vectorization. For the first
3025 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3026 scalar operand), and for the rest we get a copy with
3027 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3028 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3029 The vectors are collected into VEC_OPRNDS. */
3032 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3033 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
3037 /* Get first vector operand. */
3038 /* All the vector operands except the very first one (that is scalar oprnd)
3040 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3041 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3043 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3045 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3047 /* Get second vector operand. */
3048 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3049 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3053 /* For conversion in multiple steps, continue to get operands
3056 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3060 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3061 For multi-step conversions store the resulting vectors and call the function
3065 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
3066 int multi_step_cvt, gimple stmt,
3067 VEC (tree, heap) *vec_dsts,
3068 gimple_stmt_iterator *gsi,
3069 slp_tree slp_node, enum tree_code code,
3070 stmt_vec_info *prev_stmt_info)
3073 tree vop0, vop1, new_tmp, vec_dest;
3075 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3077 vec_dest = VEC_pop (tree, vec_dsts);
3079 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
3081 /* Create demotion operation. */
3082 vop0 = VEC_index (tree, *vec_oprnds, i);
3083 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
3084 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3085 new_tmp = make_ssa_name (vec_dest, new_stmt);
3086 gimple_assign_set_lhs (new_stmt, new_tmp);
3087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3090 /* Store the resulting vector for next recursive call. */
3091 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
3094 /* This is the last step of the conversion sequence. Store the
3095 vectors in SLP_NODE or in vector info of the scalar statement
3096 (or in STMT_VINFO_RELATED_STMT chain). */
3098 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3101 if (!*prev_stmt_info)
3102 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3104 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3106 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3111 /* For multi-step demotion operations we first generate demotion operations
3112 from the source type to the intermediate types, and then combine the
3113 results (stored in VEC_OPRNDS) in demotion operation to the destination
3117 /* At each level of recursion we have have of the operands we had at the
3119 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3120 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3121 stmt, vec_dsts, gsi, slp_node,
3122 code, prev_stmt_info);
3127 /* Function vectorizable_type_demotion
3129 Check if STMT performs a binary or unary operation that involves
3130 type demotion, and if it can be vectorized.
3131 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3132 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3133 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3136 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3137 gimple *vec_stmt, slp_tree slp_node)
3142 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3143 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3144 enum tree_code code, code1 = ERROR_MARK;
3147 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3148 stmt_vec_info prev_stmt_info;
3155 int multi_step_cvt = 0;
3156 VEC (tree, heap) *vec_oprnds0 = NULL;
3157 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3158 tree last_oprnd, intermediate_type;
3159 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3161 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3164 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3167 /* Is STMT a vectorizable type-demotion operation? */
3168 if (!is_gimple_assign (stmt))
3171 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3174 code = gimple_assign_rhs_code (stmt);
3175 if (!CONVERT_EXPR_CODE_P (code))
3178 scalar_dest = gimple_assign_lhs (stmt);
3179 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3181 /* Check the operands of the operation. */
3182 op0 = gimple_assign_rhs1 (stmt);
3183 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3184 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3185 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3186 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
3189 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3190 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3191 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3192 || ((TYPE_PRECISION (TREE_TYPE (op0))
3193 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3195 if (vect_print_dump_info (REPORT_DETAILS))
3196 fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
3200 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3201 &def_stmt, &def, &dt[0], &vectype_in))
3203 if (vect_print_dump_info (REPORT_DETAILS))
3204 fprintf (vect_dump, "use not simple.");
3207 /* If op0 is an external def use a vector type with the
3208 same size as the output vector type if possible. */
3210 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3212 gcc_assert (vectype_in);
3215 if (vect_print_dump_info (REPORT_DETAILS))
3217 fprintf (vect_dump, "no vectype for scalar type ");
3218 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3224 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3225 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3226 if (nunits_in >= nunits_out)
3229 /* Multiple types in SLP are handled by creating the appropriate number of
3230 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3232 if (slp_node || PURE_SLP_STMT (stmt_info))
3235 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3236 gcc_assert (ncopies >= 1);
3238 /* Supportable by target? */
3239 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3240 &code1, &multi_step_cvt, &interm_types))
3243 if (!vec_stmt) /* transformation not required. */
3245 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3246 if (vect_print_dump_info (REPORT_DETAILS))
3247 fprintf (vect_dump, "=== vectorizable_demotion ===");
3248 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3253 if (vect_print_dump_info (REPORT_DETAILS))
3254 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3257 /* In case of multi-step demotion, we first generate demotion operations to
3258 the intermediate types, and then from that types to the final one.
3259 We create vector destinations for the intermediate type (TYPES) received
3260 from supportable_narrowing_operation, and store them in the correct order
3261 for future use in vect_create_vectorized_demotion_stmts(). */
3263 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3265 vec_dsts = VEC_alloc (tree, heap, 1);
3267 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3268 VEC_quick_push (tree, vec_dsts, vec_dest);
3272 for (i = VEC_length (tree, interm_types) - 1;
3273 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3275 vec_dest = vect_create_destination_var (scalar_dest,
3277 VEC_quick_push (tree, vec_dsts, vec_dest);
3281 /* In case the vectorization factor (VF) is bigger than the number
3282 of elements that we can fit in a vectype (nunits), we have to generate
3283 more than one vector stmt - i.e - we need to "unroll" the
3284 vector stmt by a factor VF/nunits. */
3286 prev_stmt_info = NULL;
3287 for (j = 0; j < ncopies; j++)
3291 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3295 VEC_free (tree, heap, vec_oprnds0);
3296 vec_oprnds0 = VEC_alloc (tree, heap,
3297 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3298 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3299 vect_pow2 (multi_step_cvt) - 1);
3302 /* Arguments are ready. Create the new vector stmts. */
3303 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3304 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3305 multi_step_cvt, stmt, tmp_vec_dsts,
3306 gsi, slp_node, code1,
3310 VEC_free (tree, heap, vec_oprnds0);
3311 VEC_free (tree, heap, vec_dsts);
3312 VEC_free (tree, heap, tmp_vec_dsts);
3313 VEC_free (tree, heap, interm_types);
3315 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3320 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3321 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3322 the resulting vectors and call the function recursively. */
3325 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3326 VEC (tree, heap) **vec_oprnds1,
3327 int multi_step_cvt, gimple stmt,
3328 VEC (tree, heap) *vec_dsts,
3329 gimple_stmt_iterator *gsi,
3330 slp_tree slp_node, enum tree_code code1,
3331 enum tree_code code2, tree decl1,
3332 tree decl2, int op_type,
3333 stmt_vec_info *prev_stmt_info)
3336 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3337 gimple new_stmt1, new_stmt2;
3338 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3339 VEC (tree, heap) *vec_tmp;
3341 vec_dest = VEC_pop (tree, vec_dsts);
3342 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3344 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3346 if (op_type == binary_op)
3347 vop1 = VEC_index (tree, *vec_oprnds1, i);
3351 /* Generate the two halves of promotion operation. */
3352 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3353 op_type, vec_dest, gsi, stmt);
3354 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3355 op_type, vec_dest, gsi, stmt);
3356 if (is_gimple_call (new_stmt1))
3358 new_tmp1 = gimple_call_lhs (new_stmt1);
3359 new_tmp2 = gimple_call_lhs (new_stmt2);
3363 new_tmp1 = gimple_assign_lhs (new_stmt1);
3364 new_tmp2 = gimple_assign_lhs (new_stmt2);
3369 /* Store the results for the recursive call. */
3370 VEC_quick_push (tree, vec_tmp, new_tmp1);
3371 VEC_quick_push (tree, vec_tmp, new_tmp2);
3375 /* Last step of promotion sequience - store the results. */
3378 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3379 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3383 if (!*prev_stmt_info)
3384 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3386 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3388 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3389 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3390 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3397 /* For multi-step promotion operation we first generate we call the
3398 function recurcively for every stage. We start from the input type,
3399 create promotion operations to the intermediate types, and then
3400 create promotions to the output type. */
3401 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3402 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3403 multi_step_cvt - 1, stmt,
3404 vec_dsts, gsi, slp_node, code1,
3405 code2, decl2, decl2, op_type,
3409 VEC_free (tree, heap, vec_tmp);
3413 /* Function vectorizable_type_promotion
3415 Check if STMT performs a binary or unary operation that involves
3416 type promotion, and if it can be vectorized.
3417 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3418 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3419 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3422 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3423 gimple *vec_stmt, slp_tree slp_node)
3427 tree op0, op1 = NULL;
3428 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3429 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3430 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3431 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3432 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3436 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3437 stmt_vec_info prev_stmt_info;
3444 tree intermediate_type = NULL_TREE;
3445 int multi_step_cvt = 0;
3446 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3447 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3448 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3451 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3454 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3457 /* Is STMT a vectorizable type-promotion operation? */
3458 if (!is_gimple_assign (stmt))
3461 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3464 code = gimple_assign_rhs_code (stmt);
3465 if (!CONVERT_EXPR_CODE_P (code)
3466 && code != WIDEN_MULT_EXPR
3467 && code != WIDEN_LSHIFT_EXPR)
3470 scalar_dest = gimple_assign_lhs (stmt);
3471 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3473 /* Check the operands of the operation. */
3474 op0 = gimple_assign_rhs1 (stmt);
3475 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3476 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3477 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3478 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3479 && CONVERT_EXPR_CODE_P (code))))
3482 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3483 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3484 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3485 || ((TYPE_PRECISION (TREE_TYPE (op0))
3486 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3488 if (vect_print_dump_info (REPORT_DETAILS))
3489 fprintf (vect_dump, "type promotion to/from bit-precision "
3494 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3495 &def_stmt, &def, &dt[0], &vectype_in))
3497 if (vect_print_dump_info (REPORT_DETAILS))
3498 fprintf (vect_dump, "use not simple.");
3502 op_type = TREE_CODE_LENGTH (code);
3503 if (op_type == binary_op)
3507 op1 = gimple_assign_rhs2 (stmt);
3508 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3510 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3512 if (CONSTANT_CLASS_P (op0))
3513 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3514 &def_stmt, &def, &dt[1], &vectype_in);
3516 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3521 if (vect_print_dump_info (REPORT_DETAILS))
3522 fprintf (vect_dump, "use not simple.");
3528 /* If op0 is an external or constant def use a vector type with
3529 the same size as the output vector type. */
3531 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3533 gcc_assert (vectype_in);
3536 if (vect_print_dump_info (REPORT_DETAILS))
3538 fprintf (vect_dump, "no vectype for scalar type ");
3539 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3545 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3546 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3547 if (nunits_in <= nunits_out)
3550 /* Multiple types in SLP are handled by creating the appropriate number of
3551 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3553 if (slp_node || PURE_SLP_STMT (stmt_info))
3556 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3558 gcc_assert (ncopies >= 1);
3560 /* Supportable by target? */
3561 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3562 &decl1, &decl2, &code1, &code2,
3563 &multi_step_cvt, &interm_types))
3566 /* Binary widening operation can only be supported directly by the
3568 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3570 if (!vec_stmt) /* transformation not required. */
3572 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3573 if (vect_print_dump_info (REPORT_DETAILS))
3574 fprintf (vect_dump, "=== vectorizable_promotion ===");
3575 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3581 if (vect_print_dump_info (REPORT_DETAILS))
3582 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3585 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3587 if (CONSTANT_CLASS_P (op0))
3588 op0 = fold_convert (TREE_TYPE (op1), op0);
3589 else if (CONSTANT_CLASS_P (op1))
3590 op1 = fold_convert (TREE_TYPE (op0), op1);
3594 /* In case of multi-step promotion, we first generate promotion operations
3595 to the intermediate types, and then from that types to the final one.
3596 We store vector destination in VEC_DSTS in the correct order for
3597 recursive creation of promotion operations in
3598 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3599 according to TYPES recieved from supportable_widening_operation(). */
3601 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3603 vec_dsts = VEC_alloc (tree, heap, 1);
3605 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3606 VEC_quick_push (tree, vec_dsts, vec_dest);
3610 for (i = VEC_length (tree, interm_types) - 1;
3611 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3613 vec_dest = vect_create_destination_var (scalar_dest,
3615 VEC_quick_push (tree, vec_dsts, vec_dest);
3621 vec_oprnds0 = VEC_alloc (tree, heap,
3622 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3623 if (op_type == binary_op)
3624 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3626 else if (code == WIDEN_LSHIFT_EXPR)
3627 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3629 /* In case the vectorization factor (VF) is bigger than the number
3630 of elements that we can fit in a vectype (nunits), we have to generate
3631 more than one vector stmt - i.e - we need to "unroll" the
3632 vector stmt by a factor VF/nunits. */
3634 prev_stmt_info = NULL;
3635 for (j = 0; j < ncopies; j++)
3642 if (code == WIDEN_LSHIFT_EXPR)
3645 /* Store vec_oprnd1 for every vector stmt to be created
3646 for SLP_NODE. We check during the analysis that all
3647 the shift arguments are the same. */
3648 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3649 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3651 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3655 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3656 &vec_oprnds1, slp_node, -1);
3660 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3661 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3662 if (op_type == binary_op)
3664 if (code == WIDEN_LSHIFT_EXPR)
3667 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3668 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3674 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3675 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3676 if (op_type == binary_op)
3678 if (code == WIDEN_LSHIFT_EXPR)
3681 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3682 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3686 /* Arguments are ready. Create the new vector stmts. */
3687 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3688 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3689 multi_step_cvt, stmt,
3691 gsi, slp_node, code1, code2,
3692 decl1, decl2, op_type,
3696 VEC_free (tree, heap, vec_dsts);
3697 VEC_free (tree, heap, tmp_vec_dsts);
3698 VEC_free (tree, heap, interm_types);
3699 VEC_free (tree, heap, vec_oprnds0);
3700 VEC_free (tree, heap, vec_oprnds1);
3702 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3707 /* Function vectorizable_store.
3709 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3711 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3712 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3713 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3716 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3722 tree vec_oprnd = NULL_TREE;
3723 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3724 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3725 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3727 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3728 struct loop *loop = NULL;
3729 enum machine_mode vec_mode;
3731 enum dr_alignment_support alignment_support_scheme;
3734 enum vect_def_type dt;
3735 stmt_vec_info prev_stmt_info = NULL;
3736 tree dataref_ptr = NULL_TREE;
3737 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3740 gimple next_stmt, first_stmt = NULL;
3741 bool strided_store = false;
3742 bool store_lanes_p = false;
3743 unsigned int group_size, i;
3744 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3746 VEC(tree,heap) *vec_oprnds = NULL;
3747 bool slp = (slp_node != NULL);
3748 unsigned int vec_num;
3749 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3753 loop = LOOP_VINFO_LOOP (loop_vinfo);
3755 /* Multiple types in SLP are handled by creating the appropriate number of
3756 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3758 if (slp || PURE_SLP_STMT (stmt_info))
3761 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3763 gcc_assert (ncopies >= 1);
3765 /* FORNOW. This restriction should be relaxed. */
3766 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3768 if (vect_print_dump_info (REPORT_DETAILS))
3769 fprintf (vect_dump, "multiple types in nested loop.");
3773 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3776 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3779 /* Is vectorizable store? */
3781 if (!is_gimple_assign (stmt))
3784 scalar_dest = gimple_assign_lhs (stmt);
3785 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3786 && is_pattern_stmt_p (stmt_info))
3787 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3788 if (TREE_CODE (scalar_dest) != ARRAY_REF
3789 && TREE_CODE (scalar_dest) != INDIRECT_REF
3790 && TREE_CODE (scalar_dest) != COMPONENT_REF
3791 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3792 && TREE_CODE (scalar_dest) != REALPART_EXPR
3793 && TREE_CODE (scalar_dest) != MEM_REF)
3796 gcc_assert (gimple_assign_single_p (stmt));
3797 op = gimple_assign_rhs1 (stmt);
3798 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3800 if (vect_print_dump_info (REPORT_DETAILS))
3801 fprintf (vect_dump, "use not simple.");
3805 elem_type = TREE_TYPE (vectype);
3806 vec_mode = TYPE_MODE (vectype);
3808 /* FORNOW. In some cases can vectorize even if data-type not supported
3809 (e.g. - array initialization with 0). */
3810 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3813 if (!STMT_VINFO_DATA_REF (stmt_info))
3816 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3818 if (vect_print_dump_info (REPORT_DETAILS))
3819 fprintf (vect_dump, "negative step for store.");
3823 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3825 strided_store = true;
3826 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3827 if (!slp && !PURE_SLP_STMT (stmt_info))
3829 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3830 if (vect_store_lanes_supported (vectype, group_size))
3831 store_lanes_p = true;
3832 else if (!vect_strided_store_supported (vectype, group_size))
3836 if (first_stmt == stmt)
3838 /* STMT is the leader of the group. Check the operands of all the
3839 stmts of the group. */
3840 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3843 gcc_assert (gimple_assign_single_p (next_stmt));
3844 op = gimple_assign_rhs1 (next_stmt);
3845 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3848 if (vect_print_dump_info (REPORT_DETAILS))
3849 fprintf (vect_dump, "use not simple.");
3852 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3857 if (!vec_stmt) /* transformation not required. */
3859 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3860 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3868 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3869 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3871 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3874 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3876 /* We vectorize all the stmts of the interleaving group when we
3877 reach the last stmt in the group. */
3878 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3879 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3888 strided_store = false;
3889 /* VEC_NUM is the number of vect stmts to be created for this
3891 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3892 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3893 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3894 op = gimple_assign_rhs1 (first_stmt);
3897 /* VEC_NUM is the number of vect stmts to be created for this
3899 vec_num = group_size;
3905 group_size = vec_num = 1;
3908 if (vect_print_dump_info (REPORT_DETAILS))
3909 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3911 dr_chain = VEC_alloc (tree, heap, group_size);
3912 oprnds = VEC_alloc (tree, heap, group_size);
3914 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3915 gcc_assert (alignment_support_scheme);
3916 /* Targets with store-lane instructions must not require explicit
3918 gcc_assert (!store_lanes_p
3919 || alignment_support_scheme == dr_aligned
3920 || alignment_support_scheme == dr_unaligned_supported);
3923 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3925 aggr_type = vectype;
3927 /* In case the vectorization factor (VF) is bigger than the number
3928 of elements that we can fit in a vectype (nunits), we have to generate
3929 more than one vector stmt - i.e - we need to "unroll" the
3930 vector stmt by a factor VF/nunits. For more details see documentation in
3931 vect_get_vec_def_for_copy_stmt. */
3933 /* In case of interleaving (non-unit strided access):
3940 We create vectorized stores starting from base address (the access of the
3941 first stmt in the chain (S2 in the above example), when the last store stmt
3942 of the chain (S4) is reached:
3945 VS2: &base + vec_size*1 = vx0
3946 VS3: &base + vec_size*2 = vx1
3947 VS4: &base + vec_size*3 = vx3
3949 Then permutation statements are generated:
3951 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3952 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3955 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3956 (the order of the data-refs in the output of vect_permute_store_chain
3957 corresponds to the order of scalar stmts in the interleaving chain - see
3958 the documentation of vect_permute_store_chain()).
3960 In case of both multiple types and interleaving, above vector stores and
3961 permutation stmts are created for every copy. The result vector stmts are
3962 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3963 STMT_VINFO_RELATED_STMT for the next copies.
3966 prev_stmt_info = NULL;
3967 for (j = 0; j < ncopies; j++)
3976 /* Get vectorized arguments for SLP_NODE. */
3977 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3978 NULL, slp_node, -1);
3980 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3984 /* For interleaved stores we collect vectorized defs for all the
3985 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3986 used as an input to vect_permute_store_chain(), and OPRNDS as
3987 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3989 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3990 OPRNDS are of size 1. */
3991 next_stmt = first_stmt;
3992 for (i = 0; i < group_size; i++)
3994 /* Since gaps are not supported for interleaved stores,
3995 GROUP_SIZE is the exact number of stmts in the chain.
3996 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3997 there is no interleaving, GROUP_SIZE is 1, and only one
3998 iteration of the loop will be executed. */
3999 gcc_assert (next_stmt
4000 && gimple_assign_single_p (next_stmt));
4001 op = gimple_assign_rhs1 (next_stmt);
4003 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4005 VEC_quick_push(tree, dr_chain, vec_oprnd);
4006 VEC_quick_push(tree, oprnds, vec_oprnd);
4007 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4011 /* We should have catched mismatched types earlier. */
4012 gcc_assert (useless_type_conversion_p (vectype,
4013 TREE_TYPE (vec_oprnd)));
4014 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4015 NULL_TREE, &dummy, gsi,
4016 &ptr_incr, false, &inv_p);
4017 gcc_assert (bb_vinfo || !inv_p);
4021 /* For interleaved stores we created vectorized defs for all the
4022 defs stored in OPRNDS in the previous iteration (previous copy).
4023 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4024 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4026 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4027 OPRNDS are of size 1. */
4028 for (i = 0; i < group_size; i++)
4030 op = VEC_index (tree, oprnds, i);
4031 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
4033 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4034 VEC_replace(tree, dr_chain, i, vec_oprnd);
4035 VEC_replace(tree, oprnds, i, vec_oprnd);
4037 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4038 TYPE_SIZE_UNIT (aggr_type));
4045 /* Combine all the vectors into an array. */
4046 vec_array = create_vector_array (vectype, vec_num);
4047 for (i = 0; i < vec_num; i++)
4049 vec_oprnd = VEC_index (tree, dr_chain, i);
4050 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4054 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4055 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4056 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4057 gimple_call_set_lhs (new_stmt, data_ref);
4058 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4059 mark_symbols_for_renaming (new_stmt);
4066 result_chain = VEC_alloc (tree, heap, group_size);
4068 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4072 next_stmt = first_stmt;
4073 for (i = 0; i < vec_num; i++)
4075 struct ptr_info_def *pi;
4078 /* Bump the vector pointer. */
4079 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4083 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4084 else if (strided_store)
4085 /* For strided stores vectorized defs are interleaved in
4086 vect_permute_store_chain(). */
4087 vec_oprnd = VEC_index (tree, result_chain, i);
4089 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4090 build_int_cst (reference_alias_ptr_type
4091 (DR_REF (first_dr)), 0));
4092 pi = get_ptr_info (dataref_ptr);
4093 pi->align = TYPE_ALIGN_UNIT (vectype);
4094 if (aligned_access_p (first_dr))
4096 else if (DR_MISALIGNMENT (first_dr) == -1)
4098 TREE_TYPE (data_ref)
4099 = build_aligned_type (TREE_TYPE (data_ref),
4100 TYPE_ALIGN (elem_type));
4101 pi->align = TYPE_ALIGN_UNIT (elem_type);
4106 TREE_TYPE (data_ref)
4107 = build_aligned_type (TREE_TYPE (data_ref),
4108 TYPE_ALIGN (elem_type));
4109 pi->misalign = DR_MISALIGNMENT (first_dr);
4112 /* Arguments are ready. Create the new vector stmt. */
4113 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4115 mark_symbols_for_renaming (new_stmt);
4120 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4128 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4130 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4131 prev_stmt_info = vinfo_for_stmt (new_stmt);
4135 VEC_free (tree, heap, dr_chain);
4136 VEC_free (tree, heap, oprnds);
4138 VEC_free (tree, heap, result_chain);
4140 VEC_free (tree, heap, vec_oprnds);
4145 /* Given a vector type VECTYPE returns a builtin DECL to be used
4146 for vector permutation and returns the mask that implements
4147 reversal of the vector elements. If that is impossible to do,
4151 perm_mask_for_reverse (tree vectype)
4153 tree mask_elt_type, mask_type, mask_vec;
4157 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4158 sel = XALLOCAVEC (unsigned char, nunits);
4160 for (i = 0; i < nunits; ++i)
4161 sel[i] = nunits - 1 - i;
4163 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4167 = lang_hooks.types.type_for_size
4168 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4169 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4172 for (i = 0; i < nunits; i++)
4173 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec);
4174 mask_vec = build_vector (mask_type, mask_vec);
4179 /* Given a vector variable X, that was generated for the scalar LHS of
4180 STMT, generate instructions to reverse the vector elements of X,
4181 insert them a *GSI and return the permuted vector variable. */
4184 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4186 tree vectype = TREE_TYPE (x);
4187 tree mask_vec, perm_dest, data_ref;
4190 mask_vec = perm_mask_for_reverse (vectype);
4192 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4194 /* Generate the permute statement. */
4195 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4197 data_ref = make_ssa_name (perm_dest, perm_stmt);
4198 gimple_set_lhs (perm_stmt, data_ref);
4199 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4204 /* vectorizable_load.
4206 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4208 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4209 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4210 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4213 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4214 slp_tree slp_node, slp_instance slp_node_instance)
4217 tree vec_dest = NULL;
4218 tree data_ref = NULL;
4219 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4220 stmt_vec_info prev_stmt_info;
4221 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4222 struct loop *loop = NULL;
4223 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4224 bool nested_in_vect_loop = false;
4225 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4226 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4229 enum machine_mode mode;
4230 gimple new_stmt = NULL;
4232 enum dr_alignment_support alignment_support_scheme;
4233 tree dataref_ptr = NULL_TREE;
4235 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4237 int i, j, group_size;
4238 tree msq = NULL_TREE, lsq;
4239 tree offset = NULL_TREE;
4240 tree realignment_token = NULL_TREE;
4242 VEC(tree,heap) *dr_chain = NULL;
4243 bool strided_load = false;
4244 bool load_lanes_p = false;
4248 bool compute_in_loop = false;
4249 struct loop *at_loop;
4251 bool slp = (slp_node != NULL);
4252 bool slp_perm = false;
4253 enum tree_code code;
4254 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4260 loop = LOOP_VINFO_LOOP (loop_vinfo);
4261 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4262 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4267 /* Multiple types in SLP are handled by creating the appropriate number of
4268 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4270 if (slp || PURE_SLP_STMT (stmt_info))
4273 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4275 gcc_assert (ncopies >= 1);
4277 /* FORNOW. This restriction should be relaxed. */
4278 if (nested_in_vect_loop && ncopies > 1)
4280 if (vect_print_dump_info (REPORT_DETAILS))
4281 fprintf (vect_dump, "multiple types in nested loop.");
4285 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4288 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4291 /* Is vectorizable load? */
4292 if (!is_gimple_assign (stmt))
4295 scalar_dest = gimple_assign_lhs (stmt);
4296 if (TREE_CODE (scalar_dest) != SSA_NAME)
4299 code = gimple_assign_rhs_code (stmt);
4300 if (code != ARRAY_REF
4301 && code != INDIRECT_REF
4302 && code != COMPONENT_REF
4303 && code != IMAGPART_EXPR
4304 && code != REALPART_EXPR
4306 && TREE_CODE_CLASS (code) != tcc_declaration)
4309 if (!STMT_VINFO_DATA_REF (stmt_info))
4312 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4313 if (negative && ncopies > 1)
4315 if (vect_print_dump_info (REPORT_DETAILS))
4316 fprintf (vect_dump, "multiple types with negative step.");
4320 elem_type = TREE_TYPE (vectype);
4321 mode = TYPE_MODE (vectype);
4323 /* FORNOW. In some cases can vectorize even if data-type not supported
4324 (e.g. - data copies). */
4325 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4327 if (vect_print_dump_info (REPORT_DETAILS))
4328 fprintf (vect_dump, "Aligned load, but unsupported type.");
4332 /* Check if the load is a part of an interleaving chain. */
4333 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4335 strided_load = true;
4337 gcc_assert (! nested_in_vect_loop);
4339 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4340 if (!slp && !PURE_SLP_STMT (stmt_info))
4342 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4343 if (vect_load_lanes_supported (vectype, group_size))
4344 load_lanes_p = true;
4345 else if (!vect_strided_load_supported (vectype, group_size))
4352 gcc_assert (!strided_load);
4353 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4354 if (alignment_support_scheme != dr_aligned
4355 && alignment_support_scheme != dr_unaligned_supported)
4357 if (vect_print_dump_info (REPORT_DETAILS))
4358 fprintf (vect_dump, "negative step but alignment required.");
4361 if (!perm_mask_for_reverse (vectype))
4363 if (vect_print_dump_info (REPORT_DETAILS))
4364 fprintf (vect_dump, "negative step and reversing not supported.");
4369 if (!vec_stmt) /* transformation not required. */
4371 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4372 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4376 if (vect_print_dump_info (REPORT_DETAILS))
4377 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4383 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4385 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4386 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4387 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4389 /* Check if the chain of loads is already vectorized. */
4390 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4392 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4395 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4396 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4398 /* VEC_NUM is the number of vect stmts to be created for this group. */
4401 strided_load = false;
4402 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4403 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4407 vec_num = group_size;
4413 group_size = vec_num = 1;
4416 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4417 gcc_assert (alignment_support_scheme);
4418 /* Targets with load-lane instructions must not require explicit
4420 gcc_assert (!load_lanes_p
4421 || alignment_support_scheme == dr_aligned
4422 || alignment_support_scheme == dr_unaligned_supported);
4424 /* In case the vectorization factor (VF) is bigger than the number
4425 of elements that we can fit in a vectype (nunits), we have to generate
4426 more than one vector stmt - i.e - we need to "unroll" the
4427 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4428 from one copy of the vector stmt to the next, in the field
4429 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4430 stages to find the correct vector defs to be used when vectorizing
4431 stmts that use the defs of the current stmt. The example below
4432 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4433 need to create 4 vectorized stmts):
4435 before vectorization:
4436 RELATED_STMT VEC_STMT
4440 step 1: vectorize stmt S1:
4441 We first create the vector stmt VS1_0, and, as usual, record a
4442 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4443 Next, we create the vector stmt VS1_1, and record a pointer to
4444 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4445 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4447 RELATED_STMT VEC_STMT
4448 VS1_0: vx0 = memref0 VS1_1 -
4449 VS1_1: vx1 = memref1 VS1_2 -
4450 VS1_2: vx2 = memref2 VS1_3 -
4451 VS1_3: vx3 = memref3 - -
4452 S1: x = load - VS1_0
4455 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4456 information we recorded in RELATED_STMT field is used to vectorize
4459 /* In case of interleaving (non-unit strided access):
4466 Vectorized loads are created in the order of memory accesses
4467 starting from the access of the first stmt of the chain:
4470 VS2: vx1 = &base + vec_size*1
4471 VS3: vx3 = &base + vec_size*2
4472 VS4: vx4 = &base + vec_size*3
4474 Then permutation statements are generated:
4476 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4477 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4480 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4481 (the order of the data-refs in the output of vect_permute_load_chain
4482 corresponds to the order of scalar stmts in the interleaving chain - see
4483 the documentation of vect_permute_load_chain()).
4484 The generation of permutation stmts and recording them in
4485 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4487 In case of both multiple types and interleaving, the vector loads and
4488 permutation stmts above are created for every copy. The result vector
4489 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4490 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4492 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4493 on a target that supports unaligned accesses (dr_unaligned_supported)
4494 we generate the following code:
4498 p = p + indx * vectype_size;
4503 Otherwise, the data reference is potentially unaligned on a target that
4504 does not support unaligned accesses (dr_explicit_realign_optimized) -
4505 then generate the following code, in which the data in each iteration is
4506 obtained by two vector loads, one from the previous iteration, and one
4507 from the current iteration:
4509 msq_init = *(floor(p1))
4510 p2 = initial_addr + VS - 1;
4511 realignment_token = call target_builtin;
4514 p2 = p2 + indx * vectype_size
4516 vec_dest = realign_load (msq, lsq, realignment_token)
4521 /* If the misalignment remains the same throughout the execution of the
4522 loop, we can create the init_addr and permutation mask at the loop
4523 preheader. Otherwise, it needs to be created inside the loop.
4524 This can only occur when vectorizing memory accesses in the inner-loop
4525 nested within an outer-loop that is being vectorized. */
4527 if (loop && nested_in_vect_loop_p (loop, stmt)
4528 && (TREE_INT_CST_LOW (DR_STEP (dr))
4529 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4531 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4532 compute_in_loop = true;
4535 if ((alignment_support_scheme == dr_explicit_realign_optimized
4536 || alignment_support_scheme == dr_explicit_realign)
4537 && !compute_in_loop)
4539 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4540 alignment_support_scheme, NULL_TREE,
4542 if (alignment_support_scheme == dr_explicit_realign_optimized)
4544 phi = SSA_NAME_DEF_STMT (msq);
4545 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4552 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4555 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4557 aggr_type = vectype;
4559 prev_stmt_info = NULL;
4560 for (j = 0; j < ncopies; j++)
4562 /* 1. Create the vector or array pointer update chain. */
4564 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4565 offset, &dummy, gsi,
4566 &ptr_incr, false, &inv_p);
4568 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4569 TYPE_SIZE_UNIT (aggr_type));
4571 if (strided_load || slp_perm)
4572 dr_chain = VEC_alloc (tree, heap, vec_num);
4578 vec_array = create_vector_array (vectype, vec_num);
4581 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4582 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4583 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4584 gimple_call_set_lhs (new_stmt, vec_array);
4585 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4586 mark_symbols_for_renaming (new_stmt);
4588 /* Extract each vector into an SSA_NAME. */
4589 for (i = 0; i < vec_num; i++)
4591 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4593 VEC_quick_push (tree, dr_chain, new_temp);
4596 /* Record the mapping between SSA_NAMEs and statements. */
4597 vect_record_strided_load_vectors (stmt, dr_chain);
4601 for (i = 0; i < vec_num; i++)
4604 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4607 /* 2. Create the vector-load in the loop. */
4608 switch (alignment_support_scheme)
4611 case dr_unaligned_supported:
4613 struct ptr_info_def *pi;
4615 = build2 (MEM_REF, vectype, dataref_ptr,
4616 build_int_cst (reference_alias_ptr_type
4617 (DR_REF (first_dr)), 0));
4618 pi = get_ptr_info (dataref_ptr);
4619 pi->align = TYPE_ALIGN_UNIT (vectype);
4620 if (alignment_support_scheme == dr_aligned)
4622 gcc_assert (aligned_access_p (first_dr));
4625 else if (DR_MISALIGNMENT (first_dr) == -1)
4627 TREE_TYPE (data_ref)
4628 = build_aligned_type (TREE_TYPE (data_ref),
4629 TYPE_ALIGN (elem_type));
4630 pi->align = TYPE_ALIGN_UNIT (elem_type);
4635 TREE_TYPE (data_ref)
4636 = build_aligned_type (TREE_TYPE (data_ref),
4637 TYPE_ALIGN (elem_type));
4638 pi->misalign = DR_MISALIGNMENT (first_dr);
4642 case dr_explicit_realign:
4647 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4649 if (compute_in_loop)
4650 msq = vect_setup_realignment (first_stmt, gsi,
4652 dr_explicit_realign,
4655 new_stmt = gimple_build_assign_with_ops
4656 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4658 (TREE_TYPE (dataref_ptr),
4659 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4660 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4661 gimple_assign_set_lhs (new_stmt, ptr);
4662 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4664 = build2 (MEM_REF, vectype, ptr,
4665 build_int_cst (reference_alias_ptr_type
4666 (DR_REF (first_dr)), 0));
4667 vec_dest = vect_create_destination_var (scalar_dest,
4669 new_stmt = gimple_build_assign (vec_dest, data_ref);
4670 new_temp = make_ssa_name (vec_dest, new_stmt);
4671 gimple_assign_set_lhs (new_stmt, new_temp);
4672 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4673 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4674 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4677 bump = size_binop (MULT_EXPR, vs_minus_1,
4678 TYPE_SIZE_UNIT (elem_type));
4679 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4680 new_stmt = gimple_build_assign_with_ops
4681 (BIT_AND_EXPR, NULL_TREE, ptr,
4684 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4685 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4686 gimple_assign_set_lhs (new_stmt, ptr);
4687 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4689 = build2 (MEM_REF, vectype, ptr,
4690 build_int_cst (reference_alias_ptr_type
4691 (DR_REF (first_dr)), 0));
4694 case dr_explicit_realign_optimized:
4695 new_stmt = gimple_build_assign_with_ops
4696 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4698 (TREE_TYPE (dataref_ptr),
4699 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4700 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4702 gimple_assign_set_lhs (new_stmt, new_temp);
4703 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4705 = build2 (MEM_REF, vectype, new_temp,
4706 build_int_cst (reference_alias_ptr_type
4707 (DR_REF (first_dr)), 0));
4712 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4713 new_stmt = gimple_build_assign (vec_dest, data_ref);
4714 new_temp = make_ssa_name (vec_dest, new_stmt);
4715 gimple_assign_set_lhs (new_stmt, new_temp);
4716 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4717 mark_symbols_for_renaming (new_stmt);
4719 /* 3. Handle explicit realignment if necessary/supported.
4721 vec_dest = realign_load (msq, lsq, realignment_token) */
4722 if (alignment_support_scheme == dr_explicit_realign_optimized
4723 || alignment_support_scheme == dr_explicit_realign)
4725 lsq = gimple_assign_lhs (new_stmt);
4726 if (!realignment_token)
4727 realignment_token = dataref_ptr;
4728 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4730 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4733 new_temp = make_ssa_name (vec_dest, new_stmt);
4734 gimple_assign_set_lhs (new_stmt, new_temp);
4735 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4737 if (alignment_support_scheme == dr_explicit_realign_optimized)
4740 if (i == vec_num - 1 && j == ncopies - 1)
4741 add_phi_arg (phi, lsq,
4742 loop_latch_edge (containing_loop),
4748 /* 4. Handle invariant-load. */
4749 if (inv_p && !bb_vinfo)
4752 gimple_stmt_iterator gsi2 = *gsi;
4753 gcc_assert (!strided_load);
4756 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4759 tem = fold_convert (TREE_TYPE (vectype), tem);
4760 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4764 vec_inv = build_vector_from_val (vectype, tem);
4765 new_temp = vect_init_vector (stmt, vec_inv,
4767 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4772 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4773 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4776 /* Collect vector loads and later create their permutation in
4777 vect_transform_strided_load (). */
4778 if (strided_load || slp_perm)
4779 VEC_quick_push (tree, dr_chain, new_temp);
4781 /* Store vector loads in the corresponding SLP_NODE. */
4782 if (slp && !slp_perm)
4783 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4788 if (slp && !slp_perm)
4793 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4794 slp_node_instance, false))
4796 VEC_free (tree, heap, dr_chain);
4805 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4806 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4811 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4813 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4814 prev_stmt_info = vinfo_for_stmt (new_stmt);
4818 VEC_free (tree, heap, dr_chain);
4824 /* Function vect_is_simple_cond.
4827 LOOP - the loop that is being vectorized.
4828 COND - Condition that is checked for simple use.
4831 *COMP_VECTYPE - the vector type for the comparison.
4833 Returns whether a COND can be vectorized. Checks whether
4834 condition operands are supportable using vec_is_simple_use. */
4837 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4841 enum vect_def_type dt;
4842 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4844 if (!COMPARISON_CLASS_P (cond))
4847 lhs = TREE_OPERAND (cond, 0);
4848 rhs = TREE_OPERAND (cond, 1);
4850 if (TREE_CODE (lhs) == SSA_NAME)
4852 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4853 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4857 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4858 && TREE_CODE (lhs) != FIXED_CST)
4861 if (TREE_CODE (rhs) == SSA_NAME)
4863 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4864 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4868 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4869 && TREE_CODE (rhs) != FIXED_CST)
4872 *comp_vectype = vectype1 ? vectype1 : vectype2;
4876 /* vectorizable_condition.
4878 Check if STMT is conditional modify expression that can be vectorized.
4879 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4880 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4883 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4884 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4885 else caluse if it is 2).
4887 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4890 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4891 gimple *vec_stmt, tree reduc_def, int reduc_index)
4893 tree scalar_dest = NULL_TREE;
4894 tree vec_dest = NULL_TREE;
4895 tree cond_expr, then_clause, else_clause;
4896 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4897 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4899 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4900 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4901 tree vec_compare, vec_cond_expr;
4903 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4905 enum vect_def_type dt, dts[4];
4906 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4907 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4908 enum tree_code code;
4909 stmt_vec_info prev_stmt_info = NULL;
4912 /* FORNOW: unsupported in basic block SLP. */
4913 gcc_assert (loop_vinfo);
4915 /* FORNOW: SLP not supported. */
4916 if (STMT_SLP_TYPE (stmt_info))
4919 gcc_assert (ncopies >= 1);
4920 if (reduc_index && ncopies > 1)
4921 return false; /* FORNOW */
4923 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4926 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4927 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4931 /* FORNOW: not yet supported. */
4932 if (STMT_VINFO_LIVE_P (stmt_info))
4934 if (vect_print_dump_info (REPORT_DETAILS))
4935 fprintf (vect_dump, "value used after loop.");
4939 /* Is vectorizable conditional operation? */
4940 if (!is_gimple_assign (stmt))
4943 code = gimple_assign_rhs_code (stmt);
4945 if (code != COND_EXPR)
4948 cond_expr = gimple_assign_rhs1 (stmt);
4949 then_clause = gimple_assign_rhs2 (stmt);
4950 else_clause = gimple_assign_rhs3 (stmt);
4952 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4956 if (TREE_CODE (then_clause) == SSA_NAME)
4958 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4959 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4960 &then_def_stmt, &def, &dt))
4963 else if (TREE_CODE (then_clause) != INTEGER_CST
4964 && TREE_CODE (then_clause) != REAL_CST
4965 && TREE_CODE (then_clause) != FIXED_CST)
4968 if (TREE_CODE (else_clause) == SSA_NAME)
4970 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4971 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4972 &else_def_stmt, &def, &dt))
4975 else if (TREE_CODE (else_clause) != INTEGER_CST
4976 && TREE_CODE (else_clause) != REAL_CST
4977 && TREE_CODE (else_clause) != FIXED_CST)
4982 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4983 return expand_vec_cond_expr_p (vectype, comp_vectype);
4989 scalar_dest = gimple_assign_lhs (stmt);
4990 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4992 /* Handle cond expr. */
4993 for (j = 0; j < ncopies; j++)
5000 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5002 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
5003 NULL, >emp, &def, &dts[0]);
5005 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5007 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5008 NULL, >emp, &def, &dts[1]);
5009 if (reduc_index == 1)
5010 vec_then_clause = reduc_def;
5013 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5015 vect_is_simple_use (then_clause, loop_vinfo,
5016 NULL, >emp, &def, &dts[2]);
5018 if (reduc_index == 2)
5019 vec_else_clause = reduc_def;
5022 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5024 vect_is_simple_use (else_clause, loop_vinfo,
5025 NULL, >emp, &def, &dts[3]);
5030 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
5031 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
5032 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5034 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5038 /* Arguments are ready. Create the new vector stmt. */
5039 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5040 vec_cond_lhs, vec_cond_rhs);
5041 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5042 vec_compare, vec_then_clause, vec_else_clause);
5044 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5045 new_temp = make_ssa_name (vec_dest, new_stmt);
5046 gimple_assign_set_lhs (new_stmt, new_temp);
5047 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5049 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5051 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5053 prev_stmt_info = vinfo_for_stmt (new_stmt);
5060 /* Make sure the statement is vectorizable. */
5063 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5065 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5066 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5067 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5069 tree scalar_type, vectype;
5070 gimple pattern_stmt, pattern_def_stmt;
5072 if (vect_print_dump_info (REPORT_DETAILS))
5074 fprintf (vect_dump, "==> examining statement: ");
5075 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5078 if (gimple_has_volatile_ops (stmt))
5080 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5081 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5086 /* Skip stmts that do not need to be vectorized. In loops this is expected
5088 - the COND_EXPR which is the loop exit condition
5089 - any LABEL_EXPRs in the loop
5090 - computations that are used only for array indexing or loop control.
5091 In basic blocks we only analyze statements that are a part of some SLP
5092 instance, therefore, all the statements are relevant.
5094 Pattern statement needs to be analyzed instead of the original statement
5095 if the original statement is not relevant. Otherwise, we analyze both
5098 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5099 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5100 && !STMT_VINFO_LIVE_P (stmt_info))
5102 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5104 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5105 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5107 /* Analyze PATTERN_STMT instead of the original stmt. */
5108 stmt = pattern_stmt;
5109 stmt_info = vinfo_for_stmt (pattern_stmt);
5110 if (vect_print_dump_info (REPORT_DETAILS))
5112 fprintf (vect_dump, "==> examining pattern statement: ");
5113 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5118 if (vect_print_dump_info (REPORT_DETAILS))
5119 fprintf (vect_dump, "irrelevant.");
5124 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5126 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5127 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5129 /* Analyze PATTERN_STMT too. */
5130 if (vect_print_dump_info (REPORT_DETAILS))
5132 fprintf (vect_dump, "==> examining pattern statement: ");
5133 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5136 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5140 if (is_pattern_stmt_p (stmt_info)
5141 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5142 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5143 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5145 /* Analyze def stmt of STMT if it's a pattern stmt. */
5146 if (vect_print_dump_info (REPORT_DETAILS))
5148 fprintf (vect_dump, "==> examining pattern def statement: ");
5149 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5152 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5157 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5159 case vect_internal_def:
5162 case vect_reduction_def:
5163 case vect_nested_cycle:
5164 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5165 || relevance == vect_used_in_outer_by_reduction
5166 || relevance == vect_unused_in_scope));
5169 case vect_induction_def:
5170 case vect_constant_def:
5171 case vect_external_def:
5172 case vect_unknown_def_type:
5179 gcc_assert (PURE_SLP_STMT (stmt_info));
5181 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5182 if (vect_print_dump_info (REPORT_DETAILS))
5184 fprintf (vect_dump, "get vectype for scalar type: ");
5185 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5188 vectype = get_vectype_for_scalar_type (scalar_type);
5191 if (vect_print_dump_info (REPORT_DETAILS))
5193 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5194 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5199 if (vect_print_dump_info (REPORT_DETAILS))
5201 fprintf (vect_dump, "vectype: ");
5202 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5205 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5208 if (STMT_VINFO_RELEVANT_P (stmt_info))
5210 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5211 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5212 *need_to_vectorize = true;
5217 && (STMT_VINFO_RELEVANT_P (stmt_info)
5218 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5219 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5220 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5221 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5222 || vectorizable_shift (stmt, NULL, NULL, NULL)
5223 || vectorizable_operation (stmt, NULL, NULL, NULL)
5224 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5225 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5226 || vectorizable_call (stmt, NULL, NULL)
5227 || vectorizable_store (stmt, NULL, NULL, NULL)
5228 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5229 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5233 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5234 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5235 || vectorizable_shift (stmt, NULL, NULL, node)
5236 || vectorizable_operation (stmt, NULL, NULL, node)
5237 || vectorizable_assignment (stmt, NULL, NULL, node)
5238 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5239 || vectorizable_store (stmt, NULL, NULL, node));
5244 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5246 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5247 fprintf (vect_dump, "supported: ");
5248 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5257 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5258 need extra handling, except for vectorizable reductions. */
5259 if (STMT_VINFO_LIVE_P (stmt_info)
5260 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5261 ok = vectorizable_live_operation (stmt, NULL, NULL);
5265 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5267 fprintf (vect_dump, "not vectorized: live stmt not ");
5268 fprintf (vect_dump, "supported: ");
5269 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5279 /* Function vect_transform_stmt.
5281 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5284 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5285 bool *strided_store, slp_tree slp_node,
5286 slp_instance slp_node_instance)
5288 bool is_store = false;
5289 gimple vec_stmt = NULL;
5290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5293 switch (STMT_VINFO_TYPE (stmt_info))
5295 case type_demotion_vec_info_type:
5296 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5300 case type_promotion_vec_info_type:
5301 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5305 case type_conversion_vec_info_type:
5306 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5310 case induc_vec_info_type:
5311 gcc_assert (!slp_node);
5312 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5316 case shift_vec_info_type:
5317 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5321 case op_vec_info_type:
5322 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5326 case assignment_vec_info_type:
5327 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5331 case load_vec_info_type:
5332 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5337 case store_vec_info_type:
5338 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5340 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5342 /* In case of interleaving, the whole chain is vectorized when the
5343 last store in the chain is reached. Store stmts before the last
5344 one are skipped, and there vec_stmt_info shouldn't be freed
5346 *strided_store = true;
5347 if (STMT_VINFO_VEC_STMT (stmt_info))
5354 case condition_vec_info_type:
5355 gcc_assert (!slp_node);
5356 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5360 case call_vec_info_type:
5361 gcc_assert (!slp_node);
5362 done = vectorizable_call (stmt, gsi, &vec_stmt);
5363 stmt = gsi_stmt (*gsi);
5366 case reduc_vec_info_type:
5367 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5372 if (!STMT_VINFO_LIVE_P (stmt_info))
5374 if (vect_print_dump_info (REPORT_DETAILS))
5375 fprintf (vect_dump, "stmt not supported.");
5380 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5381 is being vectorized, but outside the immediately enclosing loop. */
5383 && STMT_VINFO_LOOP_VINFO (stmt_info)
5384 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5385 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5386 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5387 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5388 || STMT_VINFO_RELEVANT (stmt_info) ==
5389 vect_used_in_outer_by_reduction))
5391 struct loop *innerloop = LOOP_VINFO_LOOP (
5392 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5393 imm_use_iterator imm_iter;
5394 use_operand_p use_p;
5398 if (vect_print_dump_info (REPORT_DETAILS))
5399 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5401 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5402 (to be used when vectorizing outer-loop stmts that use the DEF of
5404 if (gimple_code (stmt) == GIMPLE_PHI)
5405 scalar_dest = PHI_RESULT (stmt);
5407 scalar_dest = gimple_assign_lhs (stmt);
5409 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5411 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5413 exit_phi = USE_STMT (use_p);
5414 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5419 /* Handle stmts whose DEF is used outside the loop-nest that is
5420 being vectorized. */
5421 if (STMT_VINFO_LIVE_P (stmt_info)
5422 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5424 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5429 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5435 /* Remove a group of stores (for SLP or interleaving), free their
5439 vect_remove_stores (gimple first_stmt)
5441 gimple next = first_stmt;
5443 gimple_stmt_iterator next_si;
5447 /* Free the attached stmt_vec_info and remove the stmt. */
5448 next_si = gsi_for_stmt (next);
5449 gsi_remove (&next_si, true);
5450 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5451 free_stmt_vec_info (next);
5457 /* Function new_stmt_vec_info.
5459 Create and initialize a new stmt_vec_info struct for STMT. */
5462 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5463 bb_vec_info bb_vinfo)
5466 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5468 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5469 STMT_VINFO_STMT (res) = stmt;
5470 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5471 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5472 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5473 STMT_VINFO_LIVE_P (res) = false;
5474 STMT_VINFO_VECTYPE (res) = NULL;
5475 STMT_VINFO_VEC_STMT (res) = NULL;
5476 STMT_VINFO_VECTORIZABLE (res) = true;
5477 STMT_VINFO_IN_PATTERN_P (res) = false;
5478 STMT_VINFO_RELATED_STMT (res) = NULL;
5479 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5480 STMT_VINFO_DATA_REF (res) = NULL;
5482 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5483 STMT_VINFO_DR_OFFSET (res) = NULL;
5484 STMT_VINFO_DR_INIT (res) = NULL;
5485 STMT_VINFO_DR_STEP (res) = NULL;
5486 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5488 if (gimple_code (stmt) == GIMPLE_PHI
5489 && is_loop_header_bb_p (gimple_bb (stmt)))
5490 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5492 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5494 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5495 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5496 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5497 STMT_SLP_TYPE (res) = loop_vect;
5498 GROUP_FIRST_ELEMENT (res) = NULL;
5499 GROUP_NEXT_ELEMENT (res) = NULL;
5500 GROUP_SIZE (res) = 0;
5501 GROUP_STORE_COUNT (res) = 0;
5502 GROUP_GAP (res) = 0;
5503 GROUP_SAME_DR_STMT (res) = NULL;
5504 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5510 /* Create a hash table for stmt_vec_info. */
5513 init_stmt_vec_info_vec (void)
5515 gcc_assert (!stmt_vec_info_vec);
5516 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5520 /* Free hash table for stmt_vec_info. */
5523 free_stmt_vec_info_vec (void)
5525 gcc_assert (stmt_vec_info_vec);
5526 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5530 /* Free stmt vectorization related info. */
5533 free_stmt_vec_info (gimple stmt)
5535 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5540 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5541 set_vinfo_for_stmt (stmt, NULL);
5546 /* Function get_vectype_for_scalar_type_and_size.
5548 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5552 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5554 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5555 enum machine_mode simd_mode;
5556 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5563 /* We can't build a vector type of elements with alignment bigger than
5565 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5568 /* For vector types of elements whose mode precision doesn't
5569 match their types precision we use a element type of mode
5570 precision. The vectorization routines will have to make sure
5571 they support the proper result truncation/extension. */
5572 if (INTEGRAL_TYPE_P (scalar_type)
5573 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5574 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5575 TYPE_UNSIGNED (scalar_type));
5577 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5578 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5581 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5582 When the component mode passes the above test simply use a type
5583 corresponding to that mode. The theory is that any use that
5584 would cause problems with this will disable vectorization anyway. */
5585 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5586 && !INTEGRAL_TYPE_P (scalar_type)
5587 && !POINTER_TYPE_P (scalar_type))
5588 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5590 /* If no size was supplied use the mode the target prefers. Otherwise
5591 lookup a vector mode of the specified size. */
5593 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5595 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5596 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5600 vectype = build_vector_type (scalar_type, nunits);
5601 if (vect_print_dump_info (REPORT_DETAILS))
5603 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5604 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5610 if (vect_print_dump_info (REPORT_DETAILS))
5612 fprintf (vect_dump, "vectype: ");
5613 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5616 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5617 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5619 if (vect_print_dump_info (REPORT_DETAILS))
5620 fprintf (vect_dump, "mode not supported by target.");
5627 unsigned int current_vector_size;
5629 /* Function get_vectype_for_scalar_type.
5631 Returns the vector type corresponding to SCALAR_TYPE as supported
5635 get_vectype_for_scalar_type (tree scalar_type)
5638 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5639 current_vector_size);
5641 && current_vector_size == 0)
5642 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5646 /* Function get_same_sized_vectype
5648 Returns a vector type corresponding to SCALAR_TYPE of size
5649 VECTOR_TYPE if supported by the target. */
5652 get_same_sized_vectype (tree scalar_type, tree vector_type)
5654 return get_vectype_for_scalar_type_and_size
5655 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5658 /* Function vect_is_simple_use.
5661 LOOP_VINFO - the vect info of the loop that is being vectorized.
5662 BB_VINFO - the vect info of the basic block that is being vectorized.
5663 OPERAND - operand of a stmt in the loop or bb.
5664 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5666 Returns whether a stmt with OPERAND can be vectorized.
5667 For loops, supportable operands are constants, loop invariants, and operands
5668 that are defined by the current iteration of the loop. Unsupportable
5669 operands are those that are defined by a previous iteration of the loop (as
5670 is the case in reduction/induction computations).
5671 For basic blocks, supportable operands are constants and bb invariants.
5672 For now, operands defined outside the basic block are not supported. */
5675 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5676 bb_vec_info bb_vinfo, gimple *def_stmt,
5677 tree *def, enum vect_def_type *dt)
5680 stmt_vec_info stmt_vinfo;
5681 struct loop *loop = NULL;
5684 loop = LOOP_VINFO_LOOP (loop_vinfo);
5689 if (vect_print_dump_info (REPORT_DETAILS))
5691 fprintf (vect_dump, "vect_is_simple_use: operand ");
5692 print_generic_expr (vect_dump, operand, TDF_SLIM);
5695 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5697 *dt = vect_constant_def;
5701 if (is_gimple_min_invariant (operand))
5704 *dt = vect_external_def;
5708 if (TREE_CODE (operand) == PAREN_EXPR)
5710 if (vect_print_dump_info (REPORT_DETAILS))
5711 fprintf (vect_dump, "non-associatable copy.");
5712 operand = TREE_OPERAND (operand, 0);
5715 if (TREE_CODE (operand) != SSA_NAME)
5717 if (vect_print_dump_info (REPORT_DETAILS))
5718 fprintf (vect_dump, "not ssa-name.");
5722 *def_stmt = SSA_NAME_DEF_STMT (operand);
5723 if (*def_stmt == NULL)
5725 if (vect_print_dump_info (REPORT_DETAILS))
5726 fprintf (vect_dump, "no def_stmt.");
5730 if (vect_print_dump_info (REPORT_DETAILS))
5732 fprintf (vect_dump, "def_stmt: ");
5733 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5736 /* Empty stmt is expected only in case of a function argument.
5737 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5738 if (gimple_nop_p (*def_stmt))
5741 *dt = vect_external_def;
5745 bb = gimple_bb (*def_stmt);
5747 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5748 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5749 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5750 *dt = vect_external_def;
5753 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5754 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5757 if (*dt == vect_unknown_def_type)
5759 if (vect_print_dump_info (REPORT_DETAILS))
5760 fprintf (vect_dump, "Unsupported pattern.");
5764 if (vect_print_dump_info (REPORT_DETAILS))
5765 fprintf (vect_dump, "type of def: %d.",*dt);
5767 switch (gimple_code (*def_stmt))
5770 *def = gimple_phi_result (*def_stmt);
5774 *def = gimple_assign_lhs (*def_stmt);
5778 *def = gimple_call_lhs (*def_stmt);
5783 if (vect_print_dump_info (REPORT_DETAILS))
5784 fprintf (vect_dump, "unsupported defining stmt: ");
5791 /* Function vect_is_simple_use_1.
5793 Same as vect_is_simple_use_1 but also determines the vector operand
5794 type of OPERAND and stores it to *VECTYPE. If the definition of
5795 OPERAND is vect_uninitialized_def, vect_constant_def or
5796 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5797 is responsible to compute the best suited vector type for the
5801 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5802 bb_vec_info bb_vinfo, gimple *def_stmt,
5803 tree *def, enum vect_def_type *dt, tree *vectype)
5805 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5808 /* Now get a vector type if the def is internal, otherwise supply
5809 NULL_TREE and leave it up to the caller to figure out a proper
5810 type for the use stmt. */
5811 if (*dt == vect_internal_def
5812 || *dt == vect_induction_def
5813 || *dt == vect_reduction_def
5814 || *dt == vect_double_reduction_def
5815 || *dt == vect_nested_cycle)
5817 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5819 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5820 && !STMT_VINFO_RELEVANT (stmt_info)
5821 && !STMT_VINFO_LIVE_P (stmt_info))
5822 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5824 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5825 gcc_assert (*vectype != NULL_TREE);
5827 else if (*dt == vect_uninitialized_def
5828 || *dt == vect_constant_def
5829 || *dt == vect_external_def)
5830 *vectype = NULL_TREE;
5838 /* Function supportable_widening_operation
5840 Check whether an operation represented by the code CODE is a
5841 widening operation that is supported by the target platform in
5842 vector form (i.e., when operating on arguments of type VECTYPE_IN
5843 producing a result of type VECTYPE_OUT).
5845 Widening operations we currently support are NOP (CONVERT), FLOAT
5846 and WIDEN_MULT. This function checks if these operations are supported
5847 by the target platform either directly (via vector tree-codes), or via
5851 - CODE1 and CODE2 are codes of vector operations to be used when
5852 vectorizing the operation, if available.
5853 - DECL1 and DECL2 are decls of target builtin functions to be used
5854 when vectorizing the operation, if available. In this case,
5855 CODE1 and CODE2 are CALL_EXPR.
5856 - MULTI_STEP_CVT determines the number of required intermediate steps in
5857 case of multi-step conversion (like char->short->int - in that case
5858 MULTI_STEP_CVT will be 1).
5859 - INTERM_TYPES contains the intermediate type required to perform the
5860 widening operation (short in the above example). */
5863 supportable_widening_operation (enum tree_code code, gimple stmt,
5864 tree vectype_out, tree vectype_in,
5865 tree *decl1, tree *decl2,
5866 enum tree_code *code1, enum tree_code *code2,
5867 int *multi_step_cvt,
5868 VEC (tree, heap) **interm_types)
5870 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5871 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5872 struct loop *vect_loop = NULL;
5874 enum machine_mode vec_mode;
5875 enum insn_code icode1, icode2;
5876 optab optab1, optab2;
5877 tree vectype = vectype_in;
5878 tree wide_vectype = vectype_out;
5879 enum tree_code c1, c2;
5882 vect_loop = LOOP_VINFO_LOOP (loop_info);
5884 /* The result of a vectorized widening operation usually requires two vectors
5885 (because the widened results do not fit int one vector). The generated
5886 vector results would normally be expected to be generated in the same
5887 order as in the original scalar computation, i.e. if 8 results are
5888 generated in each vector iteration, they are to be organized as follows:
5889 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5891 However, in the special case that the result of the widening operation is
5892 used in a reduction computation only, the order doesn't matter (because
5893 when vectorizing a reduction we change the order of the computation).
5894 Some targets can take advantage of this and generate more efficient code.
5895 For example, targets like Altivec, that support widen_mult using a sequence
5896 of {mult_even,mult_odd} generate the following vectors:
5897 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5899 When vectorizing outer-loops, we execute the inner-loop sequentially
5900 (each vectorized inner-loop iteration contributes to VF outer-loop
5901 iterations in parallel). We therefore don't allow to change the order
5902 of the computation in the inner-loop during outer-loop vectorization. */
5905 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5906 && !nested_in_vect_loop_p (vect_loop, stmt))
5912 && code == WIDEN_MULT_EXPR
5913 && targetm.vectorize.builtin_mul_widen_even
5914 && targetm.vectorize.builtin_mul_widen_even (vectype)
5915 && targetm.vectorize.builtin_mul_widen_odd
5916 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5918 if (vect_print_dump_info (REPORT_DETAILS))
5919 fprintf (vect_dump, "Unordered widening operation detected.");
5921 *code1 = *code2 = CALL_EXPR;
5922 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5923 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5929 case WIDEN_MULT_EXPR:
5930 if (BYTES_BIG_ENDIAN)
5932 c1 = VEC_WIDEN_MULT_HI_EXPR;
5933 c2 = VEC_WIDEN_MULT_LO_EXPR;
5937 c2 = VEC_WIDEN_MULT_HI_EXPR;
5938 c1 = VEC_WIDEN_MULT_LO_EXPR;
5942 case WIDEN_LSHIFT_EXPR:
5943 if (BYTES_BIG_ENDIAN)
5945 c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
5946 c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
5950 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5951 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5956 if (BYTES_BIG_ENDIAN)
5958 c1 = VEC_UNPACK_HI_EXPR;
5959 c2 = VEC_UNPACK_LO_EXPR;
5963 c2 = VEC_UNPACK_HI_EXPR;
5964 c1 = VEC_UNPACK_LO_EXPR;
5969 if (BYTES_BIG_ENDIAN)
5971 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5972 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5976 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5977 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5981 case FIX_TRUNC_EXPR:
5982 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5983 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5984 computing the operation. */
5991 if (code == FIX_TRUNC_EXPR)
5993 /* The signedness is determined from output operand. */
5994 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5995 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5999 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6000 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6003 if (!optab1 || !optab2)
6006 vec_mode = TYPE_MODE (vectype);
6007 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6008 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6011 /* Check if it's a multi-step conversion that can be done using intermediate
6013 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
6014 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
6017 tree prev_type = vectype, intermediate_type;
6018 enum machine_mode intermediate_mode, prev_mode = vec_mode;
6019 optab optab3, optab4;
6021 if (!CONVERT_EXPR_CODE_P (code))
6027 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6028 intermediate steps in promotion sequence. We try
6029 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6031 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6032 for (i = 0; i < 3; i++)
6034 intermediate_mode = insn_data[icode1].operand[0].mode;
6035 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6036 TYPE_UNSIGNED (prev_type));
6037 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6038 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6040 if (!optab3 || !optab4
6041 || ((icode1 = optab_handler (optab1, prev_mode))
6042 == CODE_FOR_nothing)
6043 || insn_data[icode1].operand[0].mode != intermediate_mode
6044 || ((icode2 = optab_handler (optab2, prev_mode))
6045 == CODE_FOR_nothing)
6046 || insn_data[icode2].operand[0].mode != intermediate_mode
6047 || ((icode1 = optab_handler (optab3, intermediate_mode))
6048 == CODE_FOR_nothing)
6049 || ((icode2 = optab_handler (optab4, intermediate_mode))
6050 == CODE_FOR_nothing))
6053 VEC_quick_push (tree, *interm_types, intermediate_type);
6054 (*multi_step_cvt)++;
6056 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6057 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6060 prev_type = intermediate_type;
6061 prev_mode = intermediate_mode;
6073 /* Function supportable_narrowing_operation
6075 Check whether an operation represented by the code CODE is a
6076 narrowing operation that is supported by the target platform in
6077 vector form (i.e., when operating on arguments of type VECTYPE_IN
6078 and producing a result of type VECTYPE_OUT).
6080 Narrowing operations we currently support are NOP (CONVERT) and
6081 FIX_TRUNC. This function checks if these operations are supported by
6082 the target platform directly via vector tree-codes.
6085 - CODE1 is the code of a vector operation to be used when
6086 vectorizing the operation, if available.
6087 - MULTI_STEP_CVT determines the number of required intermediate steps in
6088 case of multi-step conversion (like int->short->char - in that case
6089 MULTI_STEP_CVT will be 1).
6090 - INTERM_TYPES contains the intermediate type required to perform the
6091 narrowing operation (short in the above example). */
6094 supportable_narrowing_operation (enum tree_code code,
6095 tree vectype_out, tree vectype_in,
6096 enum tree_code *code1, int *multi_step_cvt,
6097 VEC (tree, heap) **interm_types)
6099 enum machine_mode vec_mode;
6100 enum insn_code icode1;
6101 optab optab1, interm_optab;
6102 tree vectype = vectype_in;
6103 tree narrow_vectype = vectype_out;
6105 tree intermediate_type, prev_type;
6111 c1 = VEC_PACK_TRUNC_EXPR;
6114 case FIX_TRUNC_EXPR:
6115 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6119 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6120 tree code and optabs used for computing the operation. */
6127 if (code == FIX_TRUNC_EXPR)
6128 /* The signedness is determined from output operand. */
6129 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6131 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6136 vec_mode = TYPE_MODE (vectype);
6137 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6140 /* Check if it's a multi-step conversion that can be done using intermediate
6142 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
6144 enum machine_mode intermediate_mode, prev_mode = vec_mode;
6147 prev_type = vectype;
6148 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6149 intermediate steps in promotion sequence. We try
6150 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6152 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6153 for (i = 0; i < 3; i++)
6155 intermediate_mode = insn_data[icode1].operand[0].mode;
6156 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6157 TYPE_UNSIGNED (prev_type));
6158 interm_optab = optab_for_tree_code (c1, intermediate_type,
6161 || ((icode1 = optab_handler (optab1, prev_mode))
6162 == CODE_FOR_nothing)
6163 || insn_data[icode1].operand[0].mode != intermediate_mode
6164 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6165 == CODE_FOR_nothing))
6168 VEC_quick_push (tree, *interm_types, intermediate_type);
6169 (*multi_step_cvt)++;
6171 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6174 prev_type = intermediate_type;
6175 prev_mode = intermediate_mode;