1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
337 Generally, LIVE_P and RELEVANT are used to define the liveness and
338 relevance info of the DEF_STMT of this USE:
339 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
340 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
342 - case 1: If USE is used only for address computations (e.g. array indexing),
343 which does not need to be directly vectorized, then the liveness/relevance
344 of the respective DEF_STMT is left unchanged.
345 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
346 skip DEF_STMT cause it had already been processed.
347 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
348 be modified accordingly.
350 Return true if everything is as expected. Return false otherwise. */
353 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
354 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
358 stmt_vec_info dstmt_vinfo;
359 basic_block bb, def_bb;
362 enum vect_def_type dt;
364 /* case 1: we are only interested in uses that need to be vectorized. Uses
365 that are used for address computation are not considered relevant. */
366 if (!exist_non_indexing_operands_for_use_p (use, stmt))
369 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
372 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 if (!def_stmt || gimple_nop_p (def_stmt))
379 def_bb = gimple_bb (def_stmt);
380 if (!flow_bb_inside_loop_p (loop, def_bb))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump, "def_stmt is out of loop.");
387 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
388 DEF_STMT must have already been processed, because this should be the
389 only way that STMT, which is a reduction-phi, was put in the worklist,
390 as there should be no other uses for DEF_STMT in the loop. So we just
391 check that everything is as expected, and we are done. */
392 dstmt_vinfo = vinfo_for_stmt (def_stmt);
393 bb = gimple_bb (stmt);
394 if (gimple_code (stmt) == GIMPLE_PHI
395 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
396 && gimple_code (def_stmt) != GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
398 && bb->loop_father == def_bb->loop_father)
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
402 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
403 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
404 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
405 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
406 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 /* case 3a: outer-loop stmt defining an inner-loop stmt:
411 outer-loop-header-bb:
417 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
419 if (vect_print_dump_info (REPORT_DETAILS))
420 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424 case vect_unused_in_scope:
425 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
426 vect_used_in_scope : vect_unused_in_scope;
429 case vect_used_in_outer_by_reduction:
430 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
431 relevant = vect_used_by_reduction;
434 case vect_used_in_outer:
435 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
436 relevant = vect_used_in_scope;
439 case vect_used_in_scope:
447 /* case 3b: inner-loop stmt defining an outer-loop stmt:
448 outer-loop-header-bb:
452 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
454 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
456 if (vect_print_dump_info (REPORT_DETAILS))
457 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461 case vect_unused_in_scope:
462 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
463 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
464 vect_used_in_outer_by_reduction : vect_unused_in_scope;
467 case vect_used_by_reduction:
468 relevant = vect_used_in_outer_by_reduction;
471 case vect_used_in_scope:
472 relevant = vect_used_in_outer;
480 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
481 is_pattern_stmt_p (stmt_vinfo));
486 /* Function vect_mark_stmts_to_be_vectorized.
488 Not all stmts in the loop need to be vectorized. For example:
497 Stmt 1 and 3 do not need to be vectorized, because loop control and
498 addressing of vectorized data-refs are handled differently.
500 This pass detects such stmts. */
503 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
505 VEC(gimple,heap) *worklist;
506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
507 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
508 unsigned int nbbs = loop->num_nodes;
509 gimple_stmt_iterator si;
512 stmt_vec_info stmt_vinfo;
516 enum vect_relevant relevant, tmp_relevant;
517 enum vect_def_type def_type;
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
522 worklist = VEC_alloc (gimple, heap, 64);
524 /* 1. Init worklist. */
525 for (i = 0; i < nbbs; i++)
528 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
531 if (vect_print_dump_info (REPORT_DETAILS))
533 fprintf (vect_dump, "init: phi relevant? ");
534 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
537 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
538 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
540 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
542 stmt = gsi_stmt (si);
543 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump, "init: stmt relevant? ");
546 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
549 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
550 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
554 /* 2. Process_worklist */
555 while (VEC_length (gimple, worklist) > 0)
560 stmt = VEC_pop (gimple, worklist);
561 if (vect_print_dump_info (REPORT_DETAILS))
563 fprintf (vect_dump, "worklist: examine stmt: ");
564 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
567 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
568 (DEF_STMT) as relevant/irrelevant and live/dead according to the
569 liveness and relevance properties of STMT. */
570 stmt_vinfo = vinfo_for_stmt (stmt);
571 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
572 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
574 /* Generally, the liveness and relevance properties of STMT are
575 propagated as is to the DEF_STMTs of its USEs:
576 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
577 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
579 One exception is when STMT has been identified as defining a reduction
580 variable; in this case we set the liveness/relevance as follows:
582 relevant = vect_used_by_reduction
583 This is because we distinguish between two kinds of relevant stmts -
584 those that are used by a reduction computation, and those that are
585 (also) used by a regular computation. This allows us later on to
586 identify stmts that are used solely by a reduction, and therefore the
587 order of the results that they produce does not have to be kept. */
589 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
590 tmp_relevant = relevant;
593 case vect_reduction_def:
594 switch (tmp_relevant)
596 case vect_unused_in_scope:
597 relevant = vect_used_by_reduction;
600 case vect_used_by_reduction:
601 if (gimple_code (stmt) == GIMPLE_PHI)
606 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "unsupported use of reduction.");
609 VEC_free (gimple, heap, worklist);
616 case vect_nested_cycle:
617 if (tmp_relevant != vect_unused_in_scope
618 && tmp_relevant != vect_used_in_outer_by_reduction
619 && tmp_relevant != vect_used_in_outer)
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "unsupported use of nested cycle.");
624 VEC_free (gimple, heap, worklist);
631 case vect_double_reduction_def:
632 if (tmp_relevant != vect_unused_in_scope
633 && tmp_relevant != vect_used_by_reduction)
635 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "unsupported use of double reduction.");
638 VEC_free (gimple, heap, worklist);
649 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
651 /* Pattern statements are not inserted into the code, so
652 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
653 have to scan the RHS or function arguments instead. */
654 if (is_gimple_assign (stmt))
656 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
657 tree op = gimple_assign_rhs1 (stmt);
660 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
662 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
663 live_p, relevant, &worklist)
664 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
665 live_p, relevant, &worklist))
667 VEC_free (gimple, heap, worklist);
672 for (; i < gimple_num_ops (stmt); i++)
674 op = gimple_op (stmt, i);
675 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
678 VEC_free (gimple, heap, worklist);
683 else if (is_gimple_call (stmt))
685 for (i = 0; i < gimple_call_num_args (stmt); i++)
687 tree arg = gimple_call_arg (stmt, i);
688 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
691 VEC_free (gimple, heap, worklist);
698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
700 tree op = USE_FROM_PTR (use_p);
701 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
704 VEC_free (gimple, heap, worklist);
708 } /* while worklist */
710 VEC_free (gimple, heap, worklist);
715 /* Get cost by calling cost target builtin. */
718 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
720 tree dummy_type = NULL;
723 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
728 /* Get cost for STMT. */
731 cost_for_stmt (gimple stmt)
733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
735 switch (STMT_VINFO_TYPE (stmt_info))
737 case load_vec_info_type:
738 return vect_get_stmt_cost (scalar_load);
739 case store_vec_info_type:
740 return vect_get_stmt_cost (scalar_store);
741 case op_vec_info_type:
742 case condition_vec_info_type:
743 case assignment_vec_info_type:
744 case reduc_vec_info_type:
745 case induc_vec_info_type:
746 case type_promotion_vec_info_type:
747 case type_demotion_vec_info_type:
748 case type_conversion_vec_info_type:
749 case call_vec_info_type:
750 return vect_get_stmt_cost (scalar_stmt);
751 case undef_vec_info_type:
757 /* Function vect_model_simple_cost.
759 Models cost for simple operations, i.e. those that only emit ncopies of a
760 single op. Right now, this does not account for multiple insns that could
761 be generated for the single vector op. We will handle that shortly. */
764 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
765 enum vect_def_type *dt, slp_tree slp_node)
768 int inside_cost = 0, outside_cost = 0;
770 /* The SLP costs were already calculated during SLP tree build. */
771 if (PURE_SLP_STMT (stmt_info))
774 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
776 /* FORNOW: Assuming maximum 2 args per stmts. */
777 for (i = 0; i < 2; i++)
779 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
780 outside_cost += vect_get_stmt_cost (vector_stmt);
783 if (vect_print_dump_info (REPORT_COST))
784 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
785 "outside_cost = %d .", inside_cost, outside_cost);
787 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
788 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
789 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
793 /* Function vect_cost_strided_group_size
795 For strided load or store, return the group_size only if it is the first
796 load or store of a group, else return 1. This ensures that group size is
797 only returned once per group. */
800 vect_cost_strided_group_size (stmt_vec_info stmt_info)
802 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
804 if (first_stmt == STMT_VINFO_STMT (stmt_info))
805 return GROUP_SIZE (stmt_info);
811 /* Function vect_model_store_cost
813 Models cost for stores. In the case of strided accesses, one access
814 has the overhead of the strided access attributed to it. */
817 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
818 bool store_lanes_p, enum vect_def_type dt,
822 unsigned int inside_cost = 0, outside_cost = 0;
823 struct data_reference *first_dr;
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
830 if (dt == vect_constant_def || dt == vect_external_def)
831 outside_cost = vect_get_stmt_cost (scalar_to_vec);
833 /* Strided access? */
834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
838 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
843 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
844 group_size = vect_cost_strided_group_size (stmt_info);
847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
849 /* Not a strided access. */
853 first_dr = STMT_VINFO_DATA_REF (stmt_info);
856 /* We assume that the cost of a single store-lanes instruction is
857 equivalent to the cost of GROUP_SIZE separate stores. If a strided
858 access is instead being provided by a permute-and-store operation,
859 include the cost of the permutes. */
860 if (!store_lanes_p && group_size > 1)
862 /* Uses a high and low interleave operation for each needed permute. */
863 inside_cost = ncopies * exact_log2(group_size) * group_size
864 * vect_get_stmt_cost (vector_stmt);
866 if (vect_print_dump_info (REPORT_COST))
867 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
872 /* Costs of the stores. */
873 vect_get_store_cost (first_dr, ncopies, &inside_cost);
875 if (vect_print_dump_info (REPORT_COST))
876 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
877 "outside_cost = %d .", inside_cost, outside_cost);
879 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
880 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
881 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
885 /* Calculate cost of DR's memory access. */
887 vect_get_store_cost (struct data_reference *dr, int ncopies,
888 unsigned int *inside_cost)
890 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
892 switch (alignment_support_scheme)
896 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: aligned.");
904 case dr_unaligned_supported:
906 gimple stmt = DR_STMT (dr);
907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
910 /* Here, we assign an additional cost for the unaligned store. */
911 *inside_cost += ncopies
912 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
913 vectype, DR_MISALIGNMENT (dr));
915 if (vect_print_dump_info (REPORT_COST))
916 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
928 /* Function vect_model_load_cost
930 Models cost for loads. In the case of strided accesses, the last access
931 has the overhead of the strided access attributed to it. Since unaligned
932 accesses are supported for loads, we also account for the costs of the
933 access scheme chosen. */
936 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
942 unsigned int inside_cost = 0, outside_cost = 0;
944 /* The SLP costs were already calculated during SLP tree build. */
945 if (PURE_SLP_STMT (stmt_info))
948 /* Strided accesses? */
949 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
950 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
952 group_size = vect_cost_strided_group_size (stmt_info);
953 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
955 /* Not a strided access. */
962 /* We assume that the cost of a single load-lanes instruction is
963 equivalent to the cost of GROUP_SIZE separate loads. If a strided
964 access is instead being provided by a load-and-permute operation,
965 include the cost of the permutes. */
966 if (!load_lanes_p && group_size > 1)
968 /* Uses an even and odd extract operations for each needed permute. */
969 inside_cost = ncopies * exact_log2(group_size) * group_size
970 * vect_get_stmt_cost (vector_stmt);
972 if (vect_print_dump_info (REPORT_COST))
973 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
977 /* The loads themselves. */
978 vect_get_load_cost (first_dr, ncopies,
979 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
981 &inside_cost, &outside_cost);
983 if (vect_print_dump_info (REPORT_COST))
984 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
985 "outside_cost = %d .", inside_cost, outside_cost);
987 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
988 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
989 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
993 /* Calculate cost of DR's memory access. */
995 vect_get_load_cost (struct data_reference *dr, int ncopies,
996 bool add_realign_cost, unsigned int *inside_cost,
997 unsigned int *outside_cost)
999 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 switch (alignment_support_scheme)
1005 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1007 if (vect_print_dump_info (REPORT_COST))
1008 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1012 case dr_unaligned_supported:
1014 gimple stmt = DR_STMT (dr);
1015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1016 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1018 /* Here, we assign an additional cost for the unaligned load. */
1019 *inside_cost += ncopies
1020 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1021 vectype, DR_MISALIGNMENT (dr));
1022 if (vect_print_dump_info (REPORT_COST))
1023 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1028 case dr_explicit_realign:
1030 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1031 + vect_get_stmt_cost (vector_stmt));
1033 /* FIXME: If the misalignment remains fixed across the iterations of
1034 the containing loop, the following cost should be added to the
1036 if (targetm.vectorize.builtin_mask_for_load)
1037 *inside_cost += vect_get_stmt_cost (vector_stmt);
1041 case dr_explicit_realign_optimized:
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1047 /* Unaligned software pipeline has a load of an address, an initial
1048 load, and possibly a mask operation to "prime" the loop. However,
1049 if this is an access in a group of loads, which provide strided
1050 access, then the above cost should only be considered for one
1051 access in the group. Inside the loop, there is a load op
1052 and a realignment op. */
1054 if (add_realign_cost)
1056 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1057 if (targetm.vectorize.builtin_mask_for_load)
1058 *outside_cost += vect_get_stmt_cost (vector_stmt);
1061 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1062 + vect_get_stmt_cost (vector_stmt));
1072 /* Function vect_init_vector.
1074 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1075 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1076 is not NULL. Otherwise, place the initialization at the loop preheader.
1077 Return the DEF of INIT_STMT.
1078 It will be used in the vectorization of STMT. */
1081 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1082 gimple_stmt_iterator *gsi)
1084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1092 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1093 add_referenced_var (new_var);
1094 init_stmt = gimple_build_assign (new_var, vector_var);
1095 new_temp = make_ssa_name (new_var, init_stmt);
1096 gimple_assign_set_lhs (init_stmt, new_temp);
1099 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1108 if (nested_in_vect_loop_p (loop, stmt))
1111 pe = loop_preheader_edge (loop);
1112 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1113 gcc_assert (!new_bb);
1117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1119 gimple_stmt_iterator gsi_bb_start;
1121 gcc_assert (bb_vinfo);
1122 bb = BB_VINFO_BB (bb_vinfo);
1123 gsi_bb_start = gsi_after_labels (bb);
1124 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "created new init_stmt: ");
1131 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1134 vec_oprnd = gimple_assign_lhs (init_stmt);
1139 /* Function vect_get_vec_def_for_operand.
1141 OP is an operand in STMT. This function returns a (vector) def that will be
1142 used in the vectorized stmt for STMT.
1144 In the case that OP is an SSA_NAME which is defined in the loop, then
1145 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1147 In case OP is an invariant or constant, a new stmt that creates a vector def
1148 needs to be introduced. */
1151 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1156 stmt_vec_info def_stmt_info = NULL;
1157 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1158 unsigned int nunits;
1159 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1165 enum vect_def_type dt;
1169 if (vect_print_dump_info (REPORT_DETAILS))
1171 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1172 print_generic_expr (vect_dump, op, TDF_SLIM);
1175 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1177 gcc_assert (is_simple_use);
1178 if (vect_print_dump_info (REPORT_DETAILS))
1182 fprintf (vect_dump, "def = ");
1183 print_generic_expr (vect_dump, def, TDF_SLIM);
1187 fprintf (vect_dump, " def_stmt = ");
1188 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1194 /* Case 1: operand is a constant. */
1195 case vect_constant_def:
1197 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1198 gcc_assert (vector_type);
1199 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1204 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1205 if (vect_print_dump_info (REPORT_DETAILS))
1206 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1208 vec_cst = build_vector_from_val (vector_type,
1209 fold_convert (TREE_TYPE (vector_type),
1211 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1214 /* Case 2: operand is defined outside the loop - loop invariant. */
1215 case vect_external_def:
1217 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1218 gcc_assert (vector_type);
1219 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1224 /* Create 'vec_inv = {inv,inv,..,inv}' */
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "Create vector_inv.");
1228 for (i = nunits - 1; i >= 0; --i)
1230 t = tree_cons (NULL_TREE, def, t);
1233 /* FIXME: use build_constructor directly. */
1234 vec_inv = build_constructor_from_list (vector_type, t);
1235 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1238 /* Case 3: operand is defined inside the loop. */
1239 case vect_internal_def:
1242 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1244 /* Get the def from the vectorized stmt. */
1245 def_stmt_info = vinfo_for_stmt (def_stmt);
1247 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1248 /* Get vectorized pattern statement. */
1250 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1251 && !STMT_VINFO_RELEVANT (def_stmt_info))
1252 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1253 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1254 gcc_assert (vec_stmt);
1255 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1256 vec_oprnd = PHI_RESULT (vec_stmt);
1257 else if (is_gimple_call (vec_stmt))
1258 vec_oprnd = gimple_call_lhs (vec_stmt);
1260 vec_oprnd = gimple_assign_lhs (vec_stmt);
1264 /* Case 4: operand is defined by a loop header phi - reduction */
1265 case vect_reduction_def:
1266 case vect_double_reduction_def:
1267 case vect_nested_cycle:
1271 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1272 loop = (gimple_bb (def_stmt))->loop_father;
1274 /* Get the def before the loop */
1275 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1276 return get_initial_def_for_reduction (stmt, op, scalar_def);
1279 /* Case 5: operand is defined by loop-header phi - induction. */
1280 case vect_induction_def:
1282 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1284 /* Get the def from the vectorized stmt. */
1285 def_stmt_info = vinfo_for_stmt (def_stmt);
1286 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1287 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1288 vec_oprnd = PHI_RESULT (vec_stmt);
1290 vec_oprnd = gimple_get_lhs (vec_stmt);
1300 /* Function vect_get_vec_def_for_stmt_copy
1302 Return a vector-def for an operand. This function is used when the
1303 vectorized stmt to be created (by the caller to this function) is a "copy"
1304 created in case the vectorized result cannot fit in one vector, and several
1305 copies of the vector-stmt are required. In this case the vector-def is
1306 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1307 of the stmt that defines VEC_OPRND.
1308 DT is the type of the vector def VEC_OPRND.
1311 In case the vectorization factor (VF) is bigger than the number
1312 of elements that can fit in a vectype (nunits), we have to generate
1313 more than one vector stmt to vectorize the scalar stmt. This situation
1314 arises when there are multiple data-types operated upon in the loop; the
1315 smallest data-type determines the VF, and as a result, when vectorizing
1316 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1317 vector stmt (each computing a vector of 'nunits' results, and together
1318 computing 'VF' results in each iteration). This function is called when
1319 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1320 which VF=16 and nunits=4, so the number of copies required is 4):
1322 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1324 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1325 VS1.1: vx.1 = memref1 VS1.2
1326 VS1.2: vx.2 = memref2 VS1.3
1327 VS1.3: vx.3 = memref3
1329 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1330 VSnew.1: vz1 = vx.1 + ... VSnew.2
1331 VSnew.2: vz2 = vx.2 + ... VSnew.3
1332 VSnew.3: vz3 = vx.3 + ...
1334 The vectorization of S1 is explained in vectorizable_load.
1335 The vectorization of S2:
1336 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1337 the function 'vect_get_vec_def_for_operand' is called to
1338 get the relevant vector-def for each operand of S2. For operand x it
1339 returns the vector-def 'vx.0'.
1341 To create the remaining copies of the vector-stmt (VSnew.j), this
1342 function is called to get the relevant vector-def for each operand. It is
1343 obtained from the respective VS1.j stmt, which is recorded in the
1344 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1346 For example, to obtain the vector-def 'vx.1' in order to create the
1347 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1348 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1349 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1350 and return its def ('vx.1').
1351 Overall, to create the above sequence this function will be called 3 times:
1352 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1353 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1354 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1357 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1359 gimple vec_stmt_for_operand;
1360 stmt_vec_info def_stmt_info;
1362 /* Do nothing; can reuse same def. */
1363 if (dt == vect_external_def || dt == vect_constant_def )
1366 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1367 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1368 gcc_assert (def_stmt_info);
1369 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1370 gcc_assert (vec_stmt_for_operand);
1371 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1372 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1373 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1375 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1380 /* Get vectorized definitions for the operands to create a copy of an original
1381 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1384 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1385 VEC(tree,heap) **vec_oprnds0,
1386 VEC(tree,heap) **vec_oprnds1)
1388 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1390 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1391 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1393 if (vec_oprnds1 && *vec_oprnds1)
1395 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1396 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1397 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1402 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1406 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1407 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1411 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1416 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1417 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1418 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1422 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1423 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1424 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1430 /* Function vect_finish_stmt_generation.
1432 Insert a new stmt. */
1435 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1436 gimple_stmt_iterator *gsi)
1438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1439 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1440 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1442 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1444 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1446 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1449 if (vect_print_dump_info (REPORT_DETAILS))
1451 fprintf (vect_dump, "add new stmt: ");
1452 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1455 gimple_set_location (vec_stmt, gimple_location (stmt));
1458 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1459 a function declaration if the target has a vectorized version
1460 of the function, or NULL_TREE if the function cannot be vectorized. */
1463 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1465 tree fndecl = gimple_call_fndecl (call);
1467 /* We only handle functions that do not read or clobber memory -- i.e.
1468 const or novops ones. */
1469 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1473 || TREE_CODE (fndecl) != FUNCTION_DECL
1474 || !DECL_BUILT_IN (fndecl))
1477 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1481 /* Function vectorizable_call.
1483 Check if STMT performs a function call that can be vectorized.
1484 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1485 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1486 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1489 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1494 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1496 tree vectype_out, vectype_in;
1499 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1500 tree fndecl, new_temp, def, rhs_type;
1502 enum vect_def_type dt[3]
1503 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1504 gimple new_stmt = NULL;
1506 VEC(tree, heap) *vargs = NULL;
1507 enum { NARROW, NONE, WIDEN } modifier;
1511 /* FORNOW: unsupported in basic block SLP. */
1512 gcc_assert (loop_vinfo);
1514 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1517 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1520 /* FORNOW: SLP not supported. */
1521 if (STMT_SLP_TYPE (stmt_info))
1524 /* Is STMT a vectorizable call? */
1525 if (!is_gimple_call (stmt))
1528 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1531 if (stmt_can_throw_internal (stmt))
1534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1536 /* Process function arguments. */
1537 rhs_type = NULL_TREE;
1538 vectype_in = NULL_TREE;
1539 nargs = gimple_call_num_args (stmt);
1541 /* Bail out if the function has more than three arguments, we do not have
1542 interesting builtin functions to vectorize with more than two arguments
1543 except for fma. No arguments is also not good. */
1544 if (nargs == 0 || nargs > 3)
1547 for (i = 0; i < nargs; i++)
1551 op = gimple_call_arg (stmt, i);
1553 /* We can only handle calls with arguments of the same type. */
1555 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "argument types differ.");
1562 rhs_type = TREE_TYPE (op);
1564 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1565 &def_stmt, &def, &dt[i], &opvectype))
1567 if (vect_print_dump_info (REPORT_DETAILS))
1568 fprintf (vect_dump, "use not simple.");
1573 vectype_in = opvectype;
1575 && opvectype != vectype_in)
1577 if (vect_print_dump_info (REPORT_DETAILS))
1578 fprintf (vect_dump, "argument vector types differ.");
1582 /* If all arguments are external or constant defs use a vector type with
1583 the same size as the output vector type. */
1585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1587 gcc_assert (vectype_in);
1590 if (vect_print_dump_info (REPORT_DETAILS))
1592 fprintf (vect_dump, "no vectype for scalar type ");
1593 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1600 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1602 if (nunits_in == nunits_out / 2)
1604 else if (nunits_out == nunits_in)
1606 else if (nunits_out == nunits_in / 2)
1611 /* For now, we only vectorize functions if a target specific builtin
1612 is available. TODO -- in some cases, it might be profitable to
1613 insert the calls for pieces of the vector, in order to be able
1614 to vectorize other operations in the loop. */
1615 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1616 if (fndecl == NULL_TREE)
1618 if (vect_print_dump_info (REPORT_DETAILS))
1619 fprintf (vect_dump, "function is not vectorizable.");
1624 gcc_assert (!gimple_vuse (stmt));
1626 if (modifier == NARROW)
1627 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1629 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1631 /* Sanity check: make sure that at least one copy of the vectorized stmt
1632 needs to be generated. */
1633 gcc_assert (ncopies >= 1);
1635 if (!vec_stmt) /* transformation not required. */
1637 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1638 if (vect_print_dump_info (REPORT_DETAILS))
1639 fprintf (vect_dump, "=== vectorizable_call ===");
1640 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "transform call.");
1650 scalar_dest = gimple_call_lhs (stmt);
1651 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1653 prev_stmt_info = NULL;
1657 for (j = 0; j < ncopies; ++j)
1659 /* Build argument list for the vectorized call. */
1661 vargs = VEC_alloc (tree, heap, nargs);
1663 VEC_truncate (tree, vargs, 0);
1665 for (i = 0; i < nargs; i++)
1667 op = gimple_call_arg (stmt, i);
1670 = vect_get_vec_def_for_operand (op, stmt, NULL);
1673 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1675 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1678 VEC_quick_push (tree, vargs, vec_oprnd0);
1681 new_stmt = gimple_build_call_vec (fndecl, vargs);
1682 new_temp = make_ssa_name (vec_dest, new_stmt);
1683 gimple_call_set_lhs (new_stmt, new_temp);
1685 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1686 mark_symbols_for_renaming (new_stmt);
1689 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1691 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1693 prev_stmt_info = vinfo_for_stmt (new_stmt);
1699 for (j = 0; j < ncopies; ++j)
1701 /* Build argument list for the vectorized call. */
1703 vargs = VEC_alloc (tree, heap, nargs * 2);
1705 VEC_truncate (tree, vargs, 0);
1707 for (i = 0; i < nargs; i++)
1709 op = gimple_call_arg (stmt, i);
1713 = vect_get_vec_def_for_operand (op, stmt, NULL);
1715 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1719 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1721 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1723 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1726 VEC_quick_push (tree, vargs, vec_oprnd0);
1727 VEC_quick_push (tree, vargs, vec_oprnd1);
1730 new_stmt = gimple_build_call_vec (fndecl, vargs);
1731 new_temp = make_ssa_name (vec_dest, new_stmt);
1732 gimple_call_set_lhs (new_stmt, new_temp);
1734 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1735 mark_symbols_for_renaming (new_stmt);
1738 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1742 prev_stmt_info = vinfo_for_stmt (new_stmt);
1745 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1750 /* No current target implements this case. */
1754 VEC_free (tree, heap, vargs);
1756 /* Update the exception handling table with the vector stmt if necessary. */
1757 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1758 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1760 /* The call in STMT might prevent it from being removed in dce.
1761 We however cannot remove it here, due to the way the ssa name
1762 it defines is mapped to the new definition. So just replace
1763 rhs of the statement with something harmless. */
1765 type = TREE_TYPE (scalar_dest);
1766 if (is_pattern_stmt_p (stmt_info))
1767 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1769 lhs = gimple_call_lhs (stmt);
1770 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1771 set_vinfo_for_stmt (new_stmt, stmt_info);
1772 set_vinfo_for_stmt (stmt, NULL);
1773 STMT_VINFO_STMT (stmt_info) = new_stmt;
1774 gsi_replace (gsi, new_stmt, false);
1775 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1781 /* Function vect_gen_widened_results_half
1783 Create a vector stmt whose code, type, number of arguments, and result
1784 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1785 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1786 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1787 needs to be created (DECL is a function-decl of a target-builtin).
1788 STMT is the original scalar stmt that we are vectorizing. */
1791 vect_gen_widened_results_half (enum tree_code code,
1793 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1794 tree vec_dest, gimple_stmt_iterator *gsi,
1800 /* Generate half of the widened result: */
1801 if (code == CALL_EXPR)
1803 /* Target specific support */
1804 if (op_type == binary_op)
1805 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1807 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1808 new_temp = make_ssa_name (vec_dest, new_stmt);
1809 gimple_call_set_lhs (new_stmt, new_temp);
1813 /* Generic support */
1814 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1815 if (op_type != binary_op)
1817 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1819 new_temp = make_ssa_name (vec_dest, new_stmt);
1820 gimple_assign_set_lhs (new_stmt, new_temp);
1822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1828 /* Check if STMT performs a conversion operation, that can be vectorized.
1829 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1830 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1831 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1834 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1835 gimple *vec_stmt, slp_tree slp_node)
1840 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1841 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1843 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1844 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1848 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1849 gimple new_stmt = NULL;
1850 stmt_vec_info prev_stmt_info;
1853 tree vectype_out, vectype_in;
1857 enum { NARROW, NONE, WIDEN } modifier;
1859 VEC(tree,heap) *vec_oprnds0 = NULL;
1861 VEC(tree,heap) *dummy = NULL;
1864 /* Is STMT a vectorizable conversion? */
1866 /* FORNOW: unsupported in basic block SLP. */
1867 gcc_assert (loop_vinfo);
1869 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1872 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1875 if (!is_gimple_assign (stmt))
1878 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1881 code = gimple_assign_rhs_code (stmt);
1882 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1885 /* Check types of lhs and rhs. */
1886 scalar_dest = gimple_assign_lhs (stmt);
1887 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1889 op0 = gimple_assign_rhs1 (stmt);
1890 rhs_type = TREE_TYPE (op0);
1891 /* Check the operands of the operation. */
1892 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1893 &def_stmt, &def, &dt[0], &vectype_in))
1895 if (vect_print_dump_info (REPORT_DETAILS))
1896 fprintf (vect_dump, "use not simple.");
1899 /* If op0 is an external or constant defs use a vector type of
1900 the same size as the output vector type. */
1902 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1904 gcc_assert (vectype_in);
1907 if (vect_print_dump_info (REPORT_DETAILS))
1909 fprintf (vect_dump, "no vectype for scalar type ");
1910 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1917 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1918 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1919 if (nunits_in == nunits_out / 2)
1921 else if (nunits_out == nunits_in)
1923 else if (nunits_out == nunits_in / 2)
1928 if (modifier == NARROW)
1929 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1931 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1933 /* Multiple types in SLP are handled by creating the appropriate number of
1934 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1936 if (slp_node || PURE_SLP_STMT (stmt_info))
1939 /* Sanity check: make sure that at least one copy of the vectorized stmt
1940 needs to be generated. */
1941 gcc_assert (ncopies >= 1);
1943 /* Supportable by target? */
1944 if ((modifier == NONE
1945 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1946 || (modifier == WIDEN
1947 && !supportable_widening_operation (code, stmt,
1948 vectype_out, vectype_in,
1951 &dummy_int, &dummy))
1952 || (modifier == NARROW
1953 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1954 &code1, &dummy_int, &dummy)))
1956 if (vect_print_dump_info (REPORT_DETAILS))
1957 fprintf (vect_dump, "conversion not supported by target.");
1961 if (modifier != NONE)
1963 /* FORNOW: SLP not supported. */
1964 if (STMT_SLP_TYPE (stmt_info))
1968 if (!vec_stmt) /* transformation not required. */
1970 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1975 if (vect_print_dump_info (REPORT_DETAILS))
1976 fprintf (vect_dump, "transform conversion.");
1979 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1981 if (modifier == NONE && !slp_node)
1982 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1984 prev_stmt_info = NULL;
1988 for (j = 0; j < ncopies; j++)
1991 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1993 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1996 targetm.vectorize.builtin_conversion (code,
1997 vectype_out, vectype_in);
1998 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2000 /* Arguments are ready. create the new vector stmt. */
2001 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
2002 new_temp = make_ssa_name (vec_dest, new_stmt);
2003 gimple_call_set_lhs (new_stmt, new_temp);
2004 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2006 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2010 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2012 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2013 prev_stmt_info = vinfo_for_stmt (new_stmt);
2018 /* In case the vectorization factor (VF) is bigger than the number
2019 of elements that we can fit in a vectype (nunits), we have to
2020 generate more than one vector stmt - i.e - we need to "unroll"
2021 the vector stmt by a factor VF/nunits. */
2022 for (j = 0; j < ncopies; j++)
2025 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2027 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2029 /* Generate first half of the widened result: */
2031 = vect_gen_widened_results_half (code1, decl1,
2032 vec_oprnd0, vec_oprnd1,
2033 unary_op, vec_dest, gsi, stmt);
2035 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2037 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2038 prev_stmt_info = vinfo_for_stmt (new_stmt);
2040 /* Generate second half of the widened result: */
2042 = vect_gen_widened_results_half (code2, decl2,
2043 vec_oprnd0, vec_oprnd1,
2044 unary_op, vec_dest, gsi, stmt);
2045 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2046 prev_stmt_info = vinfo_for_stmt (new_stmt);
2051 /* In case the vectorization factor (VF) is bigger than the number
2052 of elements that we can fit in a vectype (nunits), we have to
2053 generate more than one vector stmt - i.e - we need to "unroll"
2054 the vector stmt by a factor VF/nunits. */
2055 for (j = 0; j < ncopies; j++)
2060 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2061 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2065 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2066 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2069 /* Arguments are ready. Create the new vector stmt. */
2070 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2072 new_temp = make_ssa_name (vec_dest, new_stmt);
2073 gimple_assign_set_lhs (new_stmt, new_temp);
2074 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2077 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2079 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2081 prev_stmt_info = vinfo_for_stmt (new_stmt);
2084 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2088 VEC_free (tree, heap, vec_oprnds0);
2094 /* Function vectorizable_assignment.
2096 Check if STMT performs an assignment (copy) that can be vectorized.
2097 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2098 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2099 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2102 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2103 gimple *vec_stmt, slp_tree slp_node)
2108 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2114 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2115 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2118 VEC(tree,heap) *vec_oprnds = NULL;
2120 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2121 gimple new_stmt = NULL;
2122 stmt_vec_info prev_stmt_info = NULL;
2123 enum tree_code code;
2126 /* Multiple types in SLP are handled by creating the appropriate number of
2127 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2129 if (slp_node || PURE_SLP_STMT (stmt_info))
2132 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2134 gcc_assert (ncopies >= 1);
2136 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2139 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2142 /* Is vectorizable assignment? */
2143 if (!is_gimple_assign (stmt))
2146 scalar_dest = gimple_assign_lhs (stmt);
2147 if (TREE_CODE (scalar_dest) != SSA_NAME)
2150 code = gimple_assign_rhs_code (stmt);
2151 if (gimple_assign_single_p (stmt)
2152 || code == PAREN_EXPR
2153 || CONVERT_EXPR_CODE_P (code))
2154 op = gimple_assign_rhs1 (stmt);
2158 if (code == VIEW_CONVERT_EXPR)
2159 op = TREE_OPERAND (op, 0);
2161 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2162 &def_stmt, &def, &dt[0], &vectype_in))
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 fprintf (vect_dump, "use not simple.");
2169 /* We can handle NOP_EXPR conversions that do not change the number
2170 of elements or the vector size. */
2171 if ((CONVERT_EXPR_CODE_P (code)
2172 || code == VIEW_CONVERT_EXPR)
2174 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2175 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2176 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2179 /* We do not handle bit-precision changes. */
2180 if ((CONVERT_EXPR_CODE_P (code)
2181 || code == VIEW_CONVERT_EXPR)
2182 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2183 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2184 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2185 || ((TYPE_PRECISION (TREE_TYPE (op))
2186 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2187 /* But a conversion that does not change the bit-pattern is ok. */
2188 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2189 > TYPE_PRECISION (TREE_TYPE (op)))
2190 && TYPE_UNSIGNED (TREE_TYPE (op))))
2192 if (vect_print_dump_info (REPORT_DETAILS))
2193 fprintf (vect_dump, "type conversion to/from bit-precision "
2198 if (!vec_stmt) /* transformation not required. */
2200 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2201 if (vect_print_dump_info (REPORT_DETAILS))
2202 fprintf (vect_dump, "=== vectorizable_assignment ===");
2203 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "transform assignment.");
2212 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2215 for (j = 0; j < ncopies; j++)
2219 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2221 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2223 /* Arguments are ready. create the new vector stmt. */
2224 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2226 if (CONVERT_EXPR_CODE_P (code)
2227 || code == VIEW_CONVERT_EXPR)
2228 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2229 new_stmt = gimple_build_assign (vec_dest, vop);
2230 new_temp = make_ssa_name (vec_dest, new_stmt);
2231 gimple_assign_set_lhs (new_stmt, new_temp);
2232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2234 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2241 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2243 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2245 prev_stmt_info = vinfo_for_stmt (new_stmt);
2248 VEC_free (tree, heap, vec_oprnds);
2253 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2254 either as shift by a scalar or by a vector. */
2257 vect_supportable_shift (enum tree_code code, tree scalar_type)
2260 enum machine_mode vec_mode;
2265 vectype = get_vectype_for_scalar_type (scalar_type);
2269 optab = optab_for_tree_code (code, vectype, optab_scalar);
2271 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2273 optab = optab_for_tree_code (code, vectype, optab_vector);
2275 || (optab_handler (optab, TYPE_MODE (vectype))
2276 == CODE_FOR_nothing))
2280 vec_mode = TYPE_MODE (vectype);
2281 icode = (int) optab_handler (optab, vec_mode);
2282 if (icode == CODE_FOR_nothing)
2289 /* Function vectorizable_shift.
2291 Check if STMT performs a shift operation that can be vectorized.
2292 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2293 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2294 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2297 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2298 gimple *vec_stmt, slp_tree slp_node)
2302 tree op0, op1 = NULL;
2303 tree vec_oprnd1 = NULL_TREE;
2304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2306 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2307 enum tree_code code;
2308 enum machine_mode vec_mode;
2312 enum machine_mode optab_op2_mode;
2315 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2316 gimple new_stmt = NULL;
2317 stmt_vec_info prev_stmt_info;
2323 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2326 bool scalar_shift_arg = true;
2327 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2330 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2333 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2336 /* Is STMT a vectorizable binary/unary operation? */
2337 if (!is_gimple_assign (stmt))
2340 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2343 code = gimple_assign_rhs_code (stmt);
2345 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2346 || code == RROTATE_EXPR))
2349 scalar_dest = gimple_assign_lhs (stmt);
2350 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2351 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2352 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2354 if (vect_print_dump_info (REPORT_DETAILS))
2355 fprintf (vect_dump, "bit-precision shifts not supported.");
2359 op0 = gimple_assign_rhs1 (stmt);
2360 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2361 &def_stmt, &def, &dt[0], &vectype))
2363 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "use not simple.");
2367 /* If op0 is an external or constant def use a vector type with
2368 the same size as the output vector type. */
2370 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2372 gcc_assert (vectype);
2375 if (vect_print_dump_info (REPORT_DETAILS))
2377 fprintf (vect_dump, "no vectype for scalar type ");
2378 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2384 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2385 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2386 if (nunits_out != nunits_in)
2389 op1 = gimple_assign_rhs2 (stmt);
2390 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2392 if (vect_print_dump_info (REPORT_DETAILS))
2393 fprintf (vect_dump, "use not simple.");
2398 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2402 /* Multiple types in SLP are handled by creating the appropriate number of
2403 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2405 if (slp_node || PURE_SLP_STMT (stmt_info))
2408 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2410 gcc_assert (ncopies >= 1);
2412 /* Determine whether the shift amount is a vector, or scalar. If the
2413 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2415 if (dt[1] == vect_internal_def && !slp_node)
2416 scalar_shift_arg = false;
2417 else if (dt[1] == vect_constant_def
2418 || dt[1] == vect_external_def
2419 || dt[1] == vect_internal_def)
2421 /* In SLP, need to check whether the shift count is the same,
2422 in loops if it is a constant or invariant, it is always
2426 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2429 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2430 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2431 scalar_shift_arg = false;
2436 if (vect_print_dump_info (REPORT_DETAILS))
2437 fprintf (vect_dump, "operand mode requires invariant argument.");
2441 /* Vector shifted by vector. */
2442 if (!scalar_shift_arg)
2444 optab = optab_for_tree_code (code, vectype, optab_vector);
2445 if (vect_print_dump_info (REPORT_DETAILS))
2446 fprintf (vect_dump, "vector/vector shift/rotate found.");
2448 /* See if the machine has a vector shifted by scalar insn and if not
2449 then see if it has a vector shifted by vector insn. */
2452 optab = optab_for_tree_code (code, vectype, optab_scalar);
2454 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2456 if (vect_print_dump_info (REPORT_DETAILS))
2457 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2461 optab = optab_for_tree_code (code, vectype, optab_vector);
2463 && (optab_handler (optab, TYPE_MODE (vectype))
2464 != CODE_FOR_nothing))
2466 scalar_shift_arg = false;
2468 if (vect_print_dump_info (REPORT_DETAILS))
2469 fprintf (vect_dump, "vector/vector shift/rotate found.");
2471 /* Unlike the other binary operators, shifts/rotates have
2472 the rhs being int, instead of the same type as the lhs,
2473 so make sure the scalar is the right type if we are
2474 dealing with vectors of short/char. */
2475 if (dt[1] == vect_constant_def)
2476 op1 = fold_convert (TREE_TYPE (vectype), op1);
2481 /* Supportable by target? */
2484 if (vect_print_dump_info (REPORT_DETAILS))
2485 fprintf (vect_dump, "no optab.");
2488 vec_mode = TYPE_MODE (vectype);
2489 icode = (int) optab_handler (optab, vec_mode);
2490 if (icode == CODE_FOR_nothing)
2492 if (vect_print_dump_info (REPORT_DETAILS))
2493 fprintf (vect_dump, "op not supported by target.");
2494 /* Check only during analysis. */
2495 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2496 || (vf < vect_min_worthwhile_factor (code)
2499 if (vect_print_dump_info (REPORT_DETAILS))
2500 fprintf (vect_dump, "proceeding using word mode.");
2503 /* Worthwhile without SIMD support? Check only during analysis. */
2504 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2505 && vf < vect_min_worthwhile_factor (code)
2508 if (vect_print_dump_info (REPORT_DETAILS))
2509 fprintf (vect_dump, "not worthwhile without SIMD support.");
2513 if (!vec_stmt) /* transformation not required. */
2515 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2516 if (vect_print_dump_info (REPORT_DETAILS))
2517 fprintf (vect_dump, "=== vectorizable_shift ===");
2518 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2524 if (vect_print_dump_info (REPORT_DETAILS))
2525 fprintf (vect_dump, "transform binary/unary operation.");
2528 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2530 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2531 created in the previous stages of the recursion, so no allocation is
2532 needed, except for the case of shift with scalar shift argument. In that
2533 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2534 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2535 In case of loop-based vectorization we allocate VECs of size 1. We
2536 allocate VEC_OPRNDS1 only in case of binary operation. */
2539 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2540 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2542 else if (scalar_shift_arg)
2543 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2545 prev_stmt_info = NULL;
2546 for (j = 0; j < ncopies; j++)
2551 if (scalar_shift_arg)
2553 /* Vector shl and shr insn patterns can be defined with scalar
2554 operand 2 (shift operand). In this case, use constant or loop
2555 invariant op1 directly, without extending it to vector mode
2557 optab_op2_mode = insn_data[icode].operand[2].mode;
2558 if (!VECTOR_MODE_P (optab_op2_mode))
2560 if (vect_print_dump_info (REPORT_DETAILS))
2561 fprintf (vect_dump, "operand 1 using scalar mode.");
2563 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2566 /* Store vec_oprnd1 for every vector stmt to be created
2567 for SLP_NODE. We check during the analysis that all
2568 the shift arguments are the same.
2569 TODO: Allow different constants for different vector
2570 stmts generated for an SLP instance. */
2571 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2572 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2577 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2578 (a special case for certain kind of vector shifts); otherwise,
2579 operand 1 should be of a vector type (the usual case). */
2581 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2584 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2588 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2590 /* Arguments are ready. Create the new vector stmt. */
2591 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2593 vop1 = VEC_index (tree, vec_oprnds1, i);
2594 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2595 new_temp = make_ssa_name (vec_dest, new_stmt);
2596 gimple_assign_set_lhs (new_stmt, new_temp);
2597 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2599 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2606 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2608 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2609 prev_stmt_info = vinfo_for_stmt (new_stmt);
2612 VEC_free (tree, heap, vec_oprnds0);
2613 VEC_free (tree, heap, vec_oprnds1);
2619 /* Function vectorizable_operation.
2621 Check if STMT performs a binary, unary or ternary operation that can
2623 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2624 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2625 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2628 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2629 gimple *vec_stmt, slp_tree slp_node)
2633 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2636 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2637 enum tree_code code;
2638 enum machine_mode vec_mode;
2645 enum vect_def_type dt[3]
2646 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2647 gimple new_stmt = NULL;
2648 stmt_vec_info prev_stmt_info;
2654 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2655 tree vop0, vop1, vop2;
2656 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2659 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2662 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2665 /* Is STMT a vectorizable binary/unary operation? */
2666 if (!is_gimple_assign (stmt))
2669 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2672 code = gimple_assign_rhs_code (stmt);
2674 /* For pointer addition, we should use the normal plus for
2675 the vector addition. */
2676 if (code == POINTER_PLUS_EXPR)
2679 /* Support only unary or binary operations. */
2680 op_type = TREE_CODE_LENGTH (code);
2681 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2683 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2689 scalar_dest = gimple_assign_lhs (stmt);
2690 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2692 /* Most operations cannot handle bit-precision types without extra
2694 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2695 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2696 /* Exception are bitwise binary operations. */
2697 && code != BIT_IOR_EXPR
2698 && code != BIT_XOR_EXPR
2699 && code != BIT_AND_EXPR)
2701 if (vect_print_dump_info (REPORT_DETAILS))
2702 fprintf (vect_dump, "bit-precision arithmetic not supported.");
2706 op0 = gimple_assign_rhs1 (stmt);
2707 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2708 &def_stmt, &def, &dt[0], &vectype))
2710 if (vect_print_dump_info (REPORT_DETAILS))
2711 fprintf (vect_dump, "use not simple.");
2714 /* If op0 is an external or constant def use a vector type with
2715 the same size as the output vector type. */
2717 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2719 gcc_assert (vectype);
2722 if (vect_print_dump_info (REPORT_DETAILS))
2724 fprintf (vect_dump, "no vectype for scalar type ");
2725 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2731 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2732 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2733 if (nunits_out != nunits_in)
2736 if (op_type == binary_op || op_type == ternary_op)
2738 op1 = gimple_assign_rhs2 (stmt);
2739 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2742 if (vect_print_dump_info (REPORT_DETAILS))
2743 fprintf (vect_dump, "use not simple.");
2747 if (op_type == ternary_op)
2749 op2 = gimple_assign_rhs3 (stmt);
2750 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2753 if (vect_print_dump_info (REPORT_DETAILS))
2754 fprintf (vect_dump, "use not simple.");
2760 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2764 /* Multiple types in SLP are handled by creating the appropriate number of
2765 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2767 if (slp_node || PURE_SLP_STMT (stmt_info))
2770 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2772 gcc_assert (ncopies >= 1);
2774 /* Shifts are handled in vectorizable_shift (). */
2775 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2776 || code == RROTATE_EXPR)
2779 optab = optab_for_tree_code (code, vectype, optab_default);
2781 /* Supportable by target? */
2784 if (vect_print_dump_info (REPORT_DETAILS))
2785 fprintf (vect_dump, "no optab.");
2788 vec_mode = TYPE_MODE (vectype);
2789 icode = (int) optab_handler (optab, vec_mode);
2790 if (icode == CODE_FOR_nothing)
2792 if (vect_print_dump_info (REPORT_DETAILS))
2793 fprintf (vect_dump, "op not supported by target.");
2794 /* Check only during analysis. */
2795 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2796 || (vf < vect_min_worthwhile_factor (code)
2799 if (vect_print_dump_info (REPORT_DETAILS))
2800 fprintf (vect_dump, "proceeding using word mode.");
2803 /* Worthwhile without SIMD support? Check only during analysis. */
2804 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2805 && vf < vect_min_worthwhile_factor (code)
2808 if (vect_print_dump_info (REPORT_DETAILS))
2809 fprintf (vect_dump, "not worthwhile without SIMD support.");
2813 if (!vec_stmt) /* transformation not required. */
2815 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2816 if (vect_print_dump_info (REPORT_DETAILS))
2817 fprintf (vect_dump, "=== vectorizable_operation ===");
2818 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2824 if (vect_print_dump_info (REPORT_DETAILS))
2825 fprintf (vect_dump, "transform binary/unary operation.");
2828 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2830 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2831 created in the previous stages of the recursion, so no allocation is
2832 needed, except for the case of shift with scalar shift argument. In that
2833 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2834 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2835 In case of loop-based vectorization we allocate VECs of size 1. We
2836 allocate VEC_OPRNDS1 only in case of binary operation. */
2839 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2840 if (op_type == binary_op || op_type == ternary_op)
2841 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2842 if (op_type == ternary_op)
2843 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2846 /* In case the vectorization factor (VF) is bigger than the number
2847 of elements that we can fit in a vectype (nunits), we have to generate
2848 more than one vector stmt - i.e - we need to "unroll" the
2849 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2850 from one copy of the vector stmt to the next, in the field
2851 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2852 stages to find the correct vector defs to be used when vectorizing
2853 stmts that use the defs of the current stmt. The example below
2854 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2855 we need to create 4 vectorized stmts):
2857 before vectorization:
2858 RELATED_STMT VEC_STMT
2862 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2864 RELATED_STMT VEC_STMT
2865 VS1_0: vx0 = memref0 VS1_1 -
2866 VS1_1: vx1 = memref1 VS1_2 -
2867 VS1_2: vx2 = memref2 VS1_3 -
2868 VS1_3: vx3 = memref3 - -
2869 S1: x = load - VS1_0
2872 step2: vectorize stmt S2 (done here):
2873 To vectorize stmt S2 we first need to find the relevant vector
2874 def for the first operand 'x'. This is, as usual, obtained from
2875 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2876 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2877 relevant vector def 'vx0'. Having found 'vx0' we can generate
2878 the vector stmt VS2_0, and as usual, record it in the
2879 STMT_VINFO_VEC_STMT of stmt S2.
2880 When creating the second copy (VS2_1), we obtain the relevant vector
2881 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2882 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2883 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2884 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2885 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2886 chain of stmts and pointers:
2887 RELATED_STMT VEC_STMT
2888 VS1_0: vx0 = memref0 VS1_1 -
2889 VS1_1: vx1 = memref1 VS1_2 -
2890 VS1_2: vx2 = memref2 VS1_3 -
2891 VS1_3: vx3 = memref3 - -
2892 S1: x = load - VS1_0
2893 VS2_0: vz0 = vx0 + v1 VS2_1 -
2894 VS2_1: vz1 = vx1 + v1 VS2_2 -
2895 VS2_2: vz2 = vx2 + v1 VS2_3 -
2896 VS2_3: vz3 = vx3 + v1 - -
2897 S2: z = x + 1 - VS2_0 */
2899 prev_stmt_info = NULL;
2900 for (j = 0; j < ncopies; j++)
2905 if (op_type == binary_op || op_type == ternary_op)
2906 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2909 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2911 if (op_type == ternary_op)
2913 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2914 VEC_quick_push (tree, vec_oprnds2,
2915 vect_get_vec_def_for_operand (op2, stmt, NULL));
2920 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2921 if (op_type == ternary_op)
2923 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2924 VEC_quick_push (tree, vec_oprnds2,
2925 vect_get_vec_def_for_stmt_copy (dt[2],
2930 /* Arguments are ready. Create the new vector stmt. */
2931 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2933 vop1 = ((op_type == binary_op || op_type == ternary_op)
2934 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2935 vop2 = ((op_type == ternary_op)
2936 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2937 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2939 new_temp = make_ssa_name (vec_dest, new_stmt);
2940 gimple_assign_set_lhs (new_stmt, new_temp);
2941 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2943 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2950 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2952 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2953 prev_stmt_info = vinfo_for_stmt (new_stmt);
2956 VEC_free (tree, heap, vec_oprnds0);
2958 VEC_free (tree, heap, vec_oprnds1);
2960 VEC_free (tree, heap, vec_oprnds2);
2966 /* Get vectorized definitions for loop-based vectorization. For the first
2967 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2968 scalar operand), and for the rest we get a copy with
2969 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2970 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2971 The vectors are collected into VEC_OPRNDS. */
2974 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2975 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2979 /* Get first vector operand. */
2980 /* All the vector operands except the very first one (that is scalar oprnd)
2982 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2983 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2985 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2987 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2989 /* Get second vector operand. */
2990 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2991 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2995 /* For conversion in multiple steps, continue to get operands
2998 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3002 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3003 For multi-step conversions store the resulting vectors and call the function
3007 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
3008 int multi_step_cvt, gimple stmt,
3009 VEC (tree, heap) *vec_dsts,
3010 gimple_stmt_iterator *gsi,
3011 slp_tree slp_node, enum tree_code code,
3012 stmt_vec_info *prev_stmt_info)
3015 tree vop0, vop1, new_tmp, vec_dest;
3017 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3019 vec_dest = VEC_pop (tree, vec_dsts);
3021 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
3023 /* Create demotion operation. */
3024 vop0 = VEC_index (tree, *vec_oprnds, i);
3025 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
3026 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3027 new_tmp = make_ssa_name (vec_dest, new_stmt);
3028 gimple_assign_set_lhs (new_stmt, new_tmp);
3029 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3032 /* Store the resulting vector for next recursive call. */
3033 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
3036 /* This is the last step of the conversion sequence. Store the
3037 vectors in SLP_NODE or in vector info of the scalar statement
3038 (or in STMT_VINFO_RELATED_STMT chain). */
3040 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3043 if (!*prev_stmt_info)
3044 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3046 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3048 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3053 /* For multi-step demotion operations we first generate demotion operations
3054 from the source type to the intermediate types, and then combine the
3055 results (stored in VEC_OPRNDS) in demotion operation to the destination
3059 /* At each level of recursion we have have of the operands we had at the
3061 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3062 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3063 stmt, vec_dsts, gsi, slp_node,
3064 code, prev_stmt_info);
3069 /* Function vectorizable_type_demotion
3071 Check if STMT performs a binary or unary operation that involves
3072 type demotion, and if it can be vectorized.
3073 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3074 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3075 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3078 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3079 gimple *vec_stmt, slp_tree slp_node)
3084 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3085 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3086 enum tree_code code, code1 = ERROR_MARK;
3089 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3090 stmt_vec_info prev_stmt_info;
3097 int multi_step_cvt = 0;
3098 VEC (tree, heap) *vec_oprnds0 = NULL;
3099 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3100 tree last_oprnd, intermediate_type;
3101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3103 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3106 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3109 /* Is STMT a vectorizable type-demotion operation? */
3110 if (!is_gimple_assign (stmt))
3113 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3116 code = gimple_assign_rhs_code (stmt);
3117 if (!CONVERT_EXPR_CODE_P (code))
3120 scalar_dest = gimple_assign_lhs (stmt);
3121 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3123 /* Check the operands of the operation. */
3124 op0 = gimple_assign_rhs1 (stmt);
3125 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3126 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3127 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3128 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
3131 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3132 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3133 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3134 || ((TYPE_PRECISION (TREE_TYPE (op0))
3135 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3137 if (vect_print_dump_info (REPORT_DETAILS))
3138 fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
3142 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3143 &def_stmt, &def, &dt[0], &vectype_in))
3145 if (vect_print_dump_info (REPORT_DETAILS))
3146 fprintf (vect_dump, "use not simple.");
3149 /* If op0 is an external def use a vector type with the
3150 same size as the output vector type if possible. */
3152 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3154 gcc_assert (vectype_in);
3157 if (vect_print_dump_info (REPORT_DETAILS))
3159 fprintf (vect_dump, "no vectype for scalar type ");
3160 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3166 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3167 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3168 if (nunits_in >= nunits_out)
3171 /* Multiple types in SLP are handled by creating the appropriate number of
3172 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3174 if (slp_node || PURE_SLP_STMT (stmt_info))
3177 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3178 gcc_assert (ncopies >= 1);
3180 /* Supportable by target? */
3181 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3182 &code1, &multi_step_cvt, &interm_types))
3185 if (!vec_stmt) /* transformation not required. */
3187 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3188 if (vect_print_dump_info (REPORT_DETAILS))
3189 fprintf (vect_dump, "=== vectorizable_demotion ===");
3190 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3195 if (vect_print_dump_info (REPORT_DETAILS))
3196 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3199 /* In case of multi-step demotion, we first generate demotion operations to
3200 the intermediate types, and then from that types to the final one.
3201 We create vector destinations for the intermediate type (TYPES) received
3202 from supportable_narrowing_operation, and store them in the correct order
3203 for future use in vect_create_vectorized_demotion_stmts(). */
3205 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3207 vec_dsts = VEC_alloc (tree, heap, 1);
3209 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3210 VEC_quick_push (tree, vec_dsts, vec_dest);
3214 for (i = VEC_length (tree, interm_types) - 1;
3215 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3217 vec_dest = vect_create_destination_var (scalar_dest,
3219 VEC_quick_push (tree, vec_dsts, vec_dest);
3223 /* In case the vectorization factor (VF) is bigger than the number
3224 of elements that we can fit in a vectype (nunits), we have to generate
3225 more than one vector stmt - i.e - we need to "unroll" the
3226 vector stmt by a factor VF/nunits. */
3228 prev_stmt_info = NULL;
3229 for (j = 0; j < ncopies; j++)
3233 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3236 VEC_free (tree, heap, vec_oprnds0);
3237 vec_oprnds0 = VEC_alloc (tree, heap,
3238 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3239 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3240 vect_pow2 (multi_step_cvt) - 1);
3243 /* Arguments are ready. Create the new vector stmts. */
3244 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3245 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3246 multi_step_cvt, stmt, tmp_vec_dsts,
3247 gsi, slp_node, code1,
3251 VEC_free (tree, heap, vec_oprnds0);
3252 VEC_free (tree, heap, vec_dsts);
3253 VEC_free (tree, heap, tmp_vec_dsts);
3254 VEC_free (tree, heap, interm_types);
3256 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3261 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3262 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3263 the resulting vectors and call the function recursively. */
3266 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3267 VEC (tree, heap) **vec_oprnds1,
3268 int multi_step_cvt, gimple stmt,
3269 VEC (tree, heap) *vec_dsts,
3270 gimple_stmt_iterator *gsi,
3271 slp_tree slp_node, enum tree_code code1,
3272 enum tree_code code2, tree decl1,
3273 tree decl2, int op_type,
3274 stmt_vec_info *prev_stmt_info)
3277 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3278 gimple new_stmt1, new_stmt2;
3279 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3280 VEC (tree, heap) *vec_tmp;
3282 vec_dest = VEC_pop (tree, vec_dsts);
3283 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3285 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3287 if (op_type == binary_op)
3288 vop1 = VEC_index (tree, *vec_oprnds1, i);
3292 /* Generate the two halves of promotion operation. */
3293 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3294 op_type, vec_dest, gsi, stmt);
3295 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3296 op_type, vec_dest, gsi, stmt);
3297 if (is_gimple_call (new_stmt1))
3299 new_tmp1 = gimple_call_lhs (new_stmt1);
3300 new_tmp2 = gimple_call_lhs (new_stmt2);
3304 new_tmp1 = gimple_assign_lhs (new_stmt1);
3305 new_tmp2 = gimple_assign_lhs (new_stmt2);
3310 /* Store the results for the recursive call. */
3311 VEC_quick_push (tree, vec_tmp, new_tmp1);
3312 VEC_quick_push (tree, vec_tmp, new_tmp2);
3316 /* Last step of promotion sequience - store the results. */
3319 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3320 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3324 if (!*prev_stmt_info)
3325 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3327 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3329 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3330 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3331 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3338 /* For multi-step promotion operation we first generate we call the
3339 function recurcively for every stage. We start from the input type,
3340 create promotion operations to the intermediate types, and then
3341 create promotions to the output type. */
3342 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3343 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3344 multi_step_cvt - 1, stmt,
3345 vec_dsts, gsi, slp_node, code1,
3346 code2, decl2, decl2, op_type,
3350 VEC_free (tree, heap, vec_tmp);
3354 /* Function vectorizable_type_promotion
3356 Check if STMT performs a binary or unary operation that involves
3357 type promotion, and if it can be vectorized.
3358 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3359 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3360 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3363 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3364 gimple *vec_stmt, slp_tree slp_node)
3368 tree op0, op1 = NULL;
3369 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3370 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3371 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3372 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3373 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3377 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3378 stmt_vec_info prev_stmt_info;
3385 tree intermediate_type = NULL_TREE;
3386 int multi_step_cvt = 0;
3387 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3388 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3389 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3392 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3395 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3398 /* Is STMT a vectorizable type-promotion operation? */
3399 if (!is_gimple_assign (stmt))
3402 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3405 code = gimple_assign_rhs_code (stmt);
3406 if (!CONVERT_EXPR_CODE_P (code)
3407 && code != WIDEN_MULT_EXPR
3408 && code != WIDEN_LSHIFT_EXPR)
3411 scalar_dest = gimple_assign_lhs (stmt);
3412 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3414 /* Check the operands of the operation. */
3415 op0 = gimple_assign_rhs1 (stmt);
3416 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3417 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3418 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3419 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3420 && CONVERT_EXPR_CODE_P (code))))
3423 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3424 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3425 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3426 || ((TYPE_PRECISION (TREE_TYPE (op0))
3427 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3429 if (vect_print_dump_info (REPORT_DETAILS))
3430 fprintf (vect_dump, "type promotion to/from bit-precision "
3435 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3436 &def_stmt, &def, &dt[0], &vectype_in))
3438 if (vect_print_dump_info (REPORT_DETAILS))
3439 fprintf (vect_dump, "use not simple.");
3443 op_type = TREE_CODE_LENGTH (code);
3444 if (op_type == binary_op)
3448 op1 = gimple_assign_rhs2 (stmt);
3449 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3451 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3453 if (CONSTANT_CLASS_P (op0))
3454 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3455 &def_stmt, &def, &dt[1], &vectype_in);
3457 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3462 if (vect_print_dump_info (REPORT_DETAILS))
3463 fprintf (vect_dump, "use not simple.");
3469 /* If op0 is an external or constant def use a vector type with
3470 the same size as the output vector type. */
3472 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3474 gcc_assert (vectype_in);
3477 if (vect_print_dump_info (REPORT_DETAILS))
3479 fprintf (vect_dump, "no vectype for scalar type ");
3480 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3486 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3487 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3488 if (nunits_in <= nunits_out)
3491 /* Multiple types in SLP are handled by creating the appropriate number of
3492 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3494 if (slp_node || PURE_SLP_STMT (stmt_info))
3497 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3499 gcc_assert (ncopies >= 1);
3501 /* Supportable by target? */
3502 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3503 &decl1, &decl2, &code1, &code2,
3504 &multi_step_cvt, &interm_types))
3507 /* Binary widening operation can only be supported directly by the
3509 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3511 if (!vec_stmt) /* transformation not required. */
3513 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3514 if (vect_print_dump_info (REPORT_DETAILS))
3515 fprintf (vect_dump, "=== vectorizable_promotion ===");
3516 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3522 if (vect_print_dump_info (REPORT_DETAILS))
3523 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3526 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3528 if (CONSTANT_CLASS_P (op0))
3529 op0 = fold_convert (TREE_TYPE (op1), op0);
3530 else if (CONSTANT_CLASS_P (op1))
3531 op1 = fold_convert (TREE_TYPE (op0), op1);
3535 /* In case of multi-step promotion, we first generate promotion operations
3536 to the intermediate types, and then from that types to the final one.
3537 We store vector destination in VEC_DSTS in the correct order for
3538 recursive creation of promotion operations in
3539 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3540 according to TYPES recieved from supportable_widening_operation(). */
3542 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3544 vec_dsts = VEC_alloc (tree, heap, 1);
3546 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3547 VEC_quick_push (tree, vec_dsts, vec_dest);
3551 for (i = VEC_length (tree, interm_types) - 1;
3552 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3554 vec_dest = vect_create_destination_var (scalar_dest,
3556 VEC_quick_push (tree, vec_dsts, vec_dest);
3562 vec_oprnds0 = VEC_alloc (tree, heap,
3563 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3564 if (op_type == binary_op)
3565 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3567 else if (code == WIDEN_LSHIFT_EXPR)
3568 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3570 /* In case the vectorization factor (VF) is bigger than the number
3571 of elements that we can fit in a vectype (nunits), we have to generate
3572 more than one vector stmt - i.e - we need to "unroll" the
3573 vector stmt by a factor VF/nunits. */
3575 prev_stmt_info = NULL;
3576 for (j = 0; j < ncopies; j++)
3583 if (code == WIDEN_LSHIFT_EXPR)
3586 /* Store vec_oprnd1 for every vector stmt to be created
3587 for SLP_NODE. We check during the analysis that all
3588 the shift arguments are the same. */
3589 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3590 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3592 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
3596 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3601 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3602 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3603 if (op_type == binary_op)
3605 if (code == WIDEN_LSHIFT_EXPR)
3608 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3609 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3615 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3616 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3617 if (op_type == binary_op)
3619 if (code == WIDEN_LSHIFT_EXPR)
3622 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3623 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3627 /* Arguments are ready. Create the new vector stmts. */
3628 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3629 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3630 multi_step_cvt, stmt,
3632 gsi, slp_node, code1, code2,
3633 decl1, decl2, op_type,
3637 VEC_free (tree, heap, vec_dsts);
3638 VEC_free (tree, heap, tmp_vec_dsts);
3639 VEC_free (tree, heap, interm_types);
3640 VEC_free (tree, heap, vec_oprnds0);
3641 VEC_free (tree, heap, vec_oprnds1);
3643 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3648 /* Function vectorizable_store.
3650 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3652 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3653 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3654 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3657 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3663 tree vec_oprnd = NULL_TREE;
3664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3665 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3666 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3669 struct loop *loop = NULL;
3670 enum machine_mode vec_mode;
3672 enum dr_alignment_support alignment_support_scheme;
3675 enum vect_def_type dt;
3676 stmt_vec_info prev_stmt_info = NULL;
3677 tree dataref_ptr = NULL_TREE;
3678 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3681 gimple next_stmt, first_stmt = NULL;
3682 bool strided_store = false;
3683 bool store_lanes_p = false;
3684 unsigned int group_size, i;
3685 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3687 VEC(tree,heap) *vec_oprnds = NULL;
3688 bool slp = (slp_node != NULL);
3689 unsigned int vec_num;
3690 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3694 loop = LOOP_VINFO_LOOP (loop_vinfo);
3696 /* Multiple types in SLP are handled by creating the appropriate number of
3697 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3699 if (slp || PURE_SLP_STMT (stmt_info))
3702 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3704 gcc_assert (ncopies >= 1);
3706 /* FORNOW. This restriction should be relaxed. */
3707 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3709 if (vect_print_dump_info (REPORT_DETAILS))
3710 fprintf (vect_dump, "multiple types in nested loop.");
3714 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3717 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3720 /* Is vectorizable store? */
3722 if (!is_gimple_assign (stmt))
3725 scalar_dest = gimple_assign_lhs (stmt);
3726 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3727 && is_pattern_stmt_p (stmt_info))
3728 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3729 if (TREE_CODE (scalar_dest) != ARRAY_REF
3730 && TREE_CODE (scalar_dest) != INDIRECT_REF
3731 && TREE_CODE (scalar_dest) != COMPONENT_REF
3732 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3733 && TREE_CODE (scalar_dest) != REALPART_EXPR
3734 && TREE_CODE (scalar_dest) != MEM_REF)
3737 gcc_assert (gimple_assign_single_p (stmt));
3738 op = gimple_assign_rhs1 (stmt);
3739 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3741 if (vect_print_dump_info (REPORT_DETAILS))
3742 fprintf (vect_dump, "use not simple.");
3746 elem_type = TREE_TYPE (vectype);
3747 vec_mode = TYPE_MODE (vectype);
3749 /* FORNOW. In some cases can vectorize even if data-type not supported
3750 (e.g. - array initialization with 0). */
3751 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3754 if (!STMT_VINFO_DATA_REF (stmt_info))
3757 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3759 if (vect_print_dump_info (REPORT_DETAILS))
3760 fprintf (vect_dump, "negative step for store.");
3764 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3766 strided_store = true;
3767 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3768 if (!slp && !PURE_SLP_STMT (stmt_info))
3770 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3771 if (vect_store_lanes_supported (vectype, group_size))
3772 store_lanes_p = true;
3773 else if (!vect_strided_store_supported (vectype, group_size))
3777 if (first_stmt == stmt)
3779 /* STMT is the leader of the group. Check the operands of all the
3780 stmts of the group. */
3781 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3784 gcc_assert (gimple_assign_single_p (next_stmt));
3785 op = gimple_assign_rhs1 (next_stmt);
3786 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3789 if (vect_print_dump_info (REPORT_DETAILS))
3790 fprintf (vect_dump, "use not simple.");
3793 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3798 if (!vec_stmt) /* transformation not required. */
3800 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3801 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3809 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3810 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3812 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3815 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3817 /* We vectorize all the stmts of the interleaving group when we
3818 reach the last stmt in the group. */
3819 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3820 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3829 strided_store = false;
3830 /* VEC_NUM is the number of vect stmts to be created for this
3832 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3833 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3834 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3837 /* VEC_NUM is the number of vect stmts to be created for this
3839 vec_num = group_size;
3845 group_size = vec_num = 1;
3848 if (vect_print_dump_info (REPORT_DETAILS))
3849 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3851 dr_chain = VEC_alloc (tree, heap, group_size);
3852 oprnds = VEC_alloc (tree, heap, group_size);
3854 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3855 gcc_assert (alignment_support_scheme);
3856 /* Targets with store-lane instructions must not require explicit
3858 gcc_assert (!store_lanes_p
3859 || alignment_support_scheme == dr_aligned
3860 || alignment_support_scheme == dr_unaligned_supported);
3863 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3865 aggr_type = vectype;
3867 /* In case the vectorization factor (VF) is bigger than the number
3868 of elements that we can fit in a vectype (nunits), we have to generate
3869 more than one vector stmt - i.e - we need to "unroll" the
3870 vector stmt by a factor VF/nunits. For more details see documentation in
3871 vect_get_vec_def_for_copy_stmt. */
3873 /* In case of interleaving (non-unit strided access):
3880 We create vectorized stores starting from base address (the access of the
3881 first stmt in the chain (S2 in the above example), when the last store stmt
3882 of the chain (S4) is reached:
3885 VS2: &base + vec_size*1 = vx0
3886 VS3: &base + vec_size*2 = vx1
3887 VS4: &base + vec_size*3 = vx3
3889 Then permutation statements are generated:
3891 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3892 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3895 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3896 (the order of the data-refs in the output of vect_permute_store_chain
3897 corresponds to the order of scalar stmts in the interleaving chain - see
3898 the documentation of vect_permute_store_chain()).
3900 In case of both multiple types and interleaving, above vector stores and
3901 permutation stmts are created for every copy. The result vector stmts are
3902 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3903 STMT_VINFO_RELATED_STMT for the next copies.
3906 prev_stmt_info = NULL;
3907 for (j = 0; j < ncopies; j++)
3916 /* Get vectorized arguments for SLP_NODE. */
3917 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3920 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3924 /* For interleaved stores we collect vectorized defs for all the
3925 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3926 used as an input to vect_permute_store_chain(), and OPRNDS as
3927 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3929 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3930 OPRNDS are of size 1. */
3931 next_stmt = first_stmt;
3932 for (i = 0; i < group_size; i++)
3934 /* Since gaps are not supported for interleaved stores,
3935 GROUP_SIZE is the exact number of stmts in the chain.
3936 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3937 there is no interleaving, GROUP_SIZE is 1, and only one
3938 iteration of the loop will be executed. */
3939 gcc_assert (next_stmt
3940 && gimple_assign_single_p (next_stmt));
3941 op = gimple_assign_rhs1 (next_stmt);
3943 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3945 VEC_quick_push(tree, dr_chain, vec_oprnd);
3946 VEC_quick_push(tree, oprnds, vec_oprnd);
3947 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3951 /* We should have catched mismatched types earlier. */
3952 gcc_assert (useless_type_conversion_p (vectype,
3953 TREE_TYPE (vec_oprnd)));
3954 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3955 NULL_TREE, &dummy, gsi,
3956 &ptr_incr, false, &inv_p);
3957 gcc_assert (bb_vinfo || !inv_p);
3961 /* For interleaved stores we created vectorized defs for all the
3962 defs stored in OPRNDS in the previous iteration (previous copy).
3963 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3964 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3966 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3967 OPRNDS are of size 1. */
3968 for (i = 0; i < group_size; i++)
3970 op = VEC_index (tree, oprnds, i);
3971 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3973 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3974 VEC_replace(tree, dr_chain, i, vec_oprnd);
3975 VEC_replace(tree, oprnds, i, vec_oprnd);
3977 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3978 TYPE_SIZE_UNIT (aggr_type));
3985 /* Combine all the vectors into an array. */
3986 vec_array = create_vector_array (vectype, vec_num);
3987 for (i = 0; i < vec_num; i++)
3989 vec_oprnd = VEC_index (tree, dr_chain, i);
3990 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3994 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3995 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3996 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3997 gimple_call_set_lhs (new_stmt, data_ref);
3998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3999 mark_symbols_for_renaming (new_stmt);
4006 result_chain = VEC_alloc (tree, heap, group_size);
4008 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4012 next_stmt = first_stmt;
4013 for (i = 0; i < vec_num; i++)
4015 struct ptr_info_def *pi;
4018 /* Bump the vector pointer. */
4019 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4023 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4024 else if (strided_store)
4025 /* For strided stores vectorized defs are interleaved in
4026 vect_permute_store_chain(). */
4027 vec_oprnd = VEC_index (tree, result_chain, i);
4029 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4030 build_int_cst (reference_alias_ptr_type
4031 (DR_REF (first_dr)), 0));
4032 pi = get_ptr_info (dataref_ptr);
4033 pi->align = TYPE_ALIGN_UNIT (vectype);
4034 if (aligned_access_p (first_dr))
4036 else if (DR_MISALIGNMENT (first_dr) == -1)
4038 TREE_TYPE (data_ref)
4039 = build_aligned_type (TREE_TYPE (data_ref),
4040 TYPE_ALIGN (elem_type));
4041 pi->align = TYPE_ALIGN_UNIT (elem_type);
4046 TREE_TYPE (data_ref)
4047 = build_aligned_type (TREE_TYPE (data_ref),
4048 TYPE_ALIGN (elem_type));
4049 pi->misalign = DR_MISALIGNMENT (first_dr);
4052 /* Arguments are ready. Create the new vector stmt. */
4053 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4054 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4055 mark_symbols_for_renaming (new_stmt);
4060 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4068 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4070 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4071 prev_stmt_info = vinfo_for_stmt (new_stmt);
4075 VEC_free (tree, heap, dr_chain);
4076 VEC_free (tree, heap, oprnds);
4078 VEC_free (tree, heap, result_chain);
4080 VEC_free (tree, heap, vec_oprnds);
4085 /* Given a vector type VECTYPE returns a builtin DECL to be used
4086 for vector permutation and returns the mask that implements
4087 reversal of the vector elements. If that is impossible to do,
4091 perm_mask_for_reverse (tree vectype)
4093 tree mask_element_type, mask_type, mask_vec = NULL;
4096 if (!can_vec_perm_expr_p (vectype, NULL_TREE))
4100 = lang_hooks.types.type_for_size
4101 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4102 mask_type = get_vectype_for_scalar_type (mask_element_type);
4103 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4105 for (i = 0; i < nunits; i++)
4106 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
4107 mask_vec = build_vector (mask_type, mask_vec);
4109 if (!can_vec_perm_expr_p (vectype, mask_vec))
4115 /* Given a vector variable X, that was generated for the scalar LHS of
4116 STMT, generate instructions to reverse the vector elements of X,
4117 insert them a *GSI and return the permuted vector variable. */
4120 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4122 tree vectype = TREE_TYPE (x);
4123 tree mask_vec, perm_dest, data_ref;
4126 mask_vec = perm_mask_for_reverse (vectype);
4128 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4130 /* Generate the permute statement. */
4131 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4133 data_ref = make_ssa_name (perm_dest, perm_stmt);
4134 gimple_set_lhs (perm_stmt, data_ref);
4135 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4140 /* vectorizable_load.
4142 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4144 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4145 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4146 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4149 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4150 slp_tree slp_node, slp_instance slp_node_instance)
4153 tree vec_dest = NULL;
4154 tree data_ref = NULL;
4155 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4156 stmt_vec_info prev_stmt_info;
4157 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4158 struct loop *loop = NULL;
4159 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4160 bool nested_in_vect_loop = false;
4161 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4162 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4165 enum machine_mode mode;
4166 gimple new_stmt = NULL;
4168 enum dr_alignment_support alignment_support_scheme;
4169 tree dataref_ptr = NULL_TREE;
4171 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4173 int i, j, group_size;
4174 tree msq = NULL_TREE, lsq;
4175 tree offset = NULL_TREE;
4176 tree realignment_token = NULL_TREE;
4178 VEC(tree,heap) *dr_chain = NULL;
4179 bool strided_load = false;
4180 bool load_lanes_p = false;
4184 bool compute_in_loop = false;
4185 struct loop *at_loop;
4187 bool slp = (slp_node != NULL);
4188 bool slp_perm = false;
4189 enum tree_code code;
4190 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4196 loop = LOOP_VINFO_LOOP (loop_vinfo);
4197 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4198 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4203 /* Multiple types in SLP are handled by creating the appropriate number of
4204 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4206 if (slp || PURE_SLP_STMT (stmt_info))
4209 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4211 gcc_assert (ncopies >= 1);
4213 /* FORNOW. This restriction should be relaxed. */
4214 if (nested_in_vect_loop && ncopies > 1)
4216 if (vect_print_dump_info (REPORT_DETAILS))
4217 fprintf (vect_dump, "multiple types in nested loop.");
4221 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4224 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4227 /* Is vectorizable load? */
4228 if (!is_gimple_assign (stmt))
4231 scalar_dest = gimple_assign_lhs (stmt);
4232 if (TREE_CODE (scalar_dest) != SSA_NAME)
4235 code = gimple_assign_rhs_code (stmt);
4236 if (code != ARRAY_REF
4237 && code != INDIRECT_REF
4238 && code != COMPONENT_REF
4239 && code != IMAGPART_EXPR
4240 && code != REALPART_EXPR
4242 && TREE_CODE_CLASS (code) != tcc_declaration)
4245 if (!STMT_VINFO_DATA_REF (stmt_info))
4248 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4249 if (negative && ncopies > 1)
4251 if (vect_print_dump_info (REPORT_DETAILS))
4252 fprintf (vect_dump, "multiple types with negative step.");
4256 elem_type = TREE_TYPE (vectype);
4257 mode = TYPE_MODE (vectype);
4259 /* FORNOW. In some cases can vectorize even if data-type not supported
4260 (e.g. - data copies). */
4261 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4263 if (vect_print_dump_info (REPORT_DETAILS))
4264 fprintf (vect_dump, "Aligned load, but unsupported type.");
4268 /* Check if the load is a part of an interleaving chain. */
4269 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4271 strided_load = true;
4273 gcc_assert (! nested_in_vect_loop);
4275 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4276 if (!slp && !PURE_SLP_STMT (stmt_info))
4278 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4279 if (vect_load_lanes_supported (vectype, group_size))
4280 load_lanes_p = true;
4281 else if (!vect_strided_load_supported (vectype, group_size))
4288 gcc_assert (!strided_load);
4289 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4290 if (alignment_support_scheme != dr_aligned
4291 && alignment_support_scheme != dr_unaligned_supported)
4293 if (vect_print_dump_info (REPORT_DETAILS))
4294 fprintf (vect_dump, "negative step but alignment required.");
4297 if (!perm_mask_for_reverse (vectype))
4299 if (vect_print_dump_info (REPORT_DETAILS))
4300 fprintf (vect_dump, "negative step and reversing not supported.");
4305 if (!vec_stmt) /* transformation not required. */
4307 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4308 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4312 if (vect_print_dump_info (REPORT_DETAILS))
4313 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4319 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4321 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4322 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4323 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4325 /* Check if the chain of loads is already vectorized. */
4326 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4328 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4331 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4332 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4334 /* VEC_NUM is the number of vect stmts to be created for this group. */
4337 strided_load = false;
4338 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4339 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4343 vec_num = group_size;
4349 group_size = vec_num = 1;
4352 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4353 gcc_assert (alignment_support_scheme);
4354 /* Targets with load-lane instructions must not require explicit
4356 gcc_assert (!load_lanes_p
4357 || alignment_support_scheme == dr_aligned
4358 || alignment_support_scheme == dr_unaligned_supported);
4360 /* In case the vectorization factor (VF) is bigger than the number
4361 of elements that we can fit in a vectype (nunits), we have to generate
4362 more than one vector stmt - i.e - we need to "unroll" the
4363 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4364 from one copy of the vector stmt to the next, in the field
4365 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4366 stages to find the correct vector defs to be used when vectorizing
4367 stmts that use the defs of the current stmt. The example below
4368 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4369 need to create 4 vectorized stmts):
4371 before vectorization:
4372 RELATED_STMT VEC_STMT
4376 step 1: vectorize stmt S1:
4377 We first create the vector stmt VS1_0, and, as usual, record a
4378 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4379 Next, we create the vector stmt VS1_1, and record a pointer to
4380 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4381 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4383 RELATED_STMT VEC_STMT
4384 VS1_0: vx0 = memref0 VS1_1 -
4385 VS1_1: vx1 = memref1 VS1_2 -
4386 VS1_2: vx2 = memref2 VS1_3 -
4387 VS1_3: vx3 = memref3 - -
4388 S1: x = load - VS1_0
4391 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4392 information we recorded in RELATED_STMT field is used to vectorize
4395 /* In case of interleaving (non-unit strided access):
4402 Vectorized loads are created in the order of memory accesses
4403 starting from the access of the first stmt of the chain:
4406 VS2: vx1 = &base + vec_size*1
4407 VS3: vx3 = &base + vec_size*2
4408 VS4: vx4 = &base + vec_size*3
4410 Then permutation statements are generated:
4412 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4413 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4416 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4417 (the order of the data-refs in the output of vect_permute_load_chain
4418 corresponds to the order of scalar stmts in the interleaving chain - see
4419 the documentation of vect_permute_load_chain()).
4420 The generation of permutation stmts and recording them in
4421 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4423 In case of both multiple types and interleaving, the vector loads and
4424 permutation stmts above are created for every copy. The result vector
4425 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4426 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4428 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4429 on a target that supports unaligned accesses (dr_unaligned_supported)
4430 we generate the following code:
4434 p = p + indx * vectype_size;
4439 Otherwise, the data reference is potentially unaligned on a target that
4440 does not support unaligned accesses (dr_explicit_realign_optimized) -
4441 then generate the following code, in which the data in each iteration is
4442 obtained by two vector loads, one from the previous iteration, and one
4443 from the current iteration:
4445 msq_init = *(floor(p1))
4446 p2 = initial_addr + VS - 1;
4447 realignment_token = call target_builtin;
4450 p2 = p2 + indx * vectype_size
4452 vec_dest = realign_load (msq, lsq, realignment_token)
4457 /* If the misalignment remains the same throughout the execution of the
4458 loop, we can create the init_addr and permutation mask at the loop
4459 preheader. Otherwise, it needs to be created inside the loop.
4460 This can only occur when vectorizing memory accesses in the inner-loop
4461 nested within an outer-loop that is being vectorized. */
4463 if (loop && nested_in_vect_loop_p (loop, stmt)
4464 && (TREE_INT_CST_LOW (DR_STEP (dr))
4465 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4467 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4468 compute_in_loop = true;
4471 if ((alignment_support_scheme == dr_explicit_realign_optimized
4472 || alignment_support_scheme == dr_explicit_realign)
4473 && !compute_in_loop)
4475 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4476 alignment_support_scheme, NULL_TREE,
4478 if (alignment_support_scheme == dr_explicit_realign_optimized)
4480 phi = SSA_NAME_DEF_STMT (msq);
4481 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4488 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4491 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4493 aggr_type = vectype;
4495 prev_stmt_info = NULL;
4496 for (j = 0; j < ncopies; j++)
4498 /* 1. Create the vector or array pointer update chain. */
4500 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4501 offset, &dummy, gsi,
4502 &ptr_incr, false, &inv_p);
4504 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4505 TYPE_SIZE_UNIT (aggr_type));
4507 if (strided_load || slp_perm)
4508 dr_chain = VEC_alloc (tree, heap, vec_num);
4514 vec_array = create_vector_array (vectype, vec_num);
4517 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4518 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4519 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4520 gimple_call_set_lhs (new_stmt, vec_array);
4521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4522 mark_symbols_for_renaming (new_stmt);
4524 /* Extract each vector into an SSA_NAME. */
4525 for (i = 0; i < vec_num; i++)
4527 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4529 VEC_quick_push (tree, dr_chain, new_temp);
4532 /* Record the mapping between SSA_NAMEs and statements. */
4533 vect_record_strided_load_vectors (stmt, dr_chain);
4537 for (i = 0; i < vec_num; i++)
4540 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4543 /* 2. Create the vector-load in the loop. */
4544 switch (alignment_support_scheme)
4547 case dr_unaligned_supported:
4549 struct ptr_info_def *pi;
4551 = build2 (MEM_REF, vectype, dataref_ptr,
4552 build_int_cst (reference_alias_ptr_type
4553 (DR_REF (first_dr)), 0));
4554 pi = get_ptr_info (dataref_ptr);
4555 pi->align = TYPE_ALIGN_UNIT (vectype);
4556 if (alignment_support_scheme == dr_aligned)
4558 gcc_assert (aligned_access_p (first_dr));
4561 else if (DR_MISALIGNMENT (first_dr) == -1)
4563 TREE_TYPE (data_ref)
4564 = build_aligned_type (TREE_TYPE (data_ref),
4565 TYPE_ALIGN (elem_type));
4566 pi->align = TYPE_ALIGN_UNIT (elem_type);
4571 TREE_TYPE (data_ref)
4572 = build_aligned_type (TREE_TYPE (data_ref),
4573 TYPE_ALIGN (elem_type));
4574 pi->misalign = DR_MISALIGNMENT (first_dr);
4578 case dr_explicit_realign:
4583 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4585 if (compute_in_loop)
4586 msq = vect_setup_realignment (first_stmt, gsi,
4588 dr_explicit_realign,
4591 new_stmt = gimple_build_assign_with_ops
4592 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4594 (TREE_TYPE (dataref_ptr),
4595 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4596 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4597 gimple_assign_set_lhs (new_stmt, ptr);
4598 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4600 = build2 (MEM_REF, vectype, ptr,
4601 build_int_cst (reference_alias_ptr_type
4602 (DR_REF (first_dr)), 0));
4603 vec_dest = vect_create_destination_var (scalar_dest,
4605 new_stmt = gimple_build_assign (vec_dest, data_ref);
4606 new_temp = make_ssa_name (vec_dest, new_stmt);
4607 gimple_assign_set_lhs (new_stmt, new_temp);
4608 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4609 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4613 bump = size_binop (MULT_EXPR, vs_minus_1,
4614 TYPE_SIZE_UNIT (elem_type));
4615 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4616 new_stmt = gimple_build_assign_with_ops
4617 (BIT_AND_EXPR, NULL_TREE, ptr,
4620 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4621 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4622 gimple_assign_set_lhs (new_stmt, ptr);
4623 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4625 = build2 (MEM_REF, vectype, ptr,
4626 build_int_cst (reference_alias_ptr_type
4627 (DR_REF (first_dr)), 0));
4630 case dr_explicit_realign_optimized:
4631 new_stmt = gimple_build_assign_with_ops
4632 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4634 (TREE_TYPE (dataref_ptr),
4635 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4636 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4638 gimple_assign_set_lhs (new_stmt, new_temp);
4639 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4641 = build2 (MEM_REF, vectype, new_temp,
4642 build_int_cst (reference_alias_ptr_type
4643 (DR_REF (first_dr)), 0));
4648 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4649 new_stmt = gimple_build_assign (vec_dest, data_ref);
4650 new_temp = make_ssa_name (vec_dest, new_stmt);
4651 gimple_assign_set_lhs (new_stmt, new_temp);
4652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4653 mark_symbols_for_renaming (new_stmt);
4655 /* 3. Handle explicit realignment if necessary/supported.
4657 vec_dest = realign_load (msq, lsq, realignment_token) */
4658 if (alignment_support_scheme == dr_explicit_realign_optimized
4659 || alignment_support_scheme == dr_explicit_realign)
4661 lsq = gimple_assign_lhs (new_stmt);
4662 if (!realignment_token)
4663 realignment_token = dataref_ptr;
4664 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4666 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4669 new_temp = make_ssa_name (vec_dest, new_stmt);
4670 gimple_assign_set_lhs (new_stmt, new_temp);
4671 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4673 if (alignment_support_scheme == dr_explicit_realign_optimized)
4676 if (i == vec_num - 1 && j == ncopies - 1)
4677 add_phi_arg (phi, lsq,
4678 loop_latch_edge (containing_loop),
4684 /* 4. Handle invariant-load. */
4685 if (inv_p && !bb_vinfo)
4688 gimple_stmt_iterator gsi2 = *gsi;
4689 gcc_assert (!strided_load);
4691 vec_inv = build_vector_from_val (vectype, scalar_dest);
4692 new_temp = vect_init_vector (stmt, vec_inv,
4694 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4699 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4700 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4703 /* Collect vector loads and later create their permutation in
4704 vect_transform_strided_load (). */
4705 if (strided_load || slp_perm)
4706 VEC_quick_push (tree, dr_chain, new_temp);
4708 /* Store vector loads in the corresponding SLP_NODE. */
4709 if (slp && !slp_perm)
4710 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4715 if (slp && !slp_perm)
4720 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4721 slp_node_instance, false))
4723 VEC_free (tree, heap, dr_chain);
4732 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4733 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4738 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4741 prev_stmt_info = vinfo_for_stmt (new_stmt);
4745 VEC_free (tree, heap, dr_chain);
4751 /* Function vect_is_simple_cond.
4754 LOOP - the loop that is being vectorized.
4755 COND - Condition that is checked for simple use.
4758 *COMP_VECTYPE - the vector type for the comparison.
4760 Returns whether a COND can be vectorized. Checks whether
4761 condition operands are supportable using vec_is_simple_use. */
4764 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4768 enum vect_def_type dt;
4769 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4771 if (!COMPARISON_CLASS_P (cond))
4774 lhs = TREE_OPERAND (cond, 0);
4775 rhs = TREE_OPERAND (cond, 1);
4777 if (TREE_CODE (lhs) == SSA_NAME)
4779 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4780 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4784 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4785 && TREE_CODE (lhs) != FIXED_CST)
4788 if (TREE_CODE (rhs) == SSA_NAME)
4790 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4791 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4795 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4796 && TREE_CODE (rhs) != FIXED_CST)
4799 *comp_vectype = vectype1 ? vectype1 : vectype2;
4803 /* vectorizable_condition.
4805 Check if STMT is conditional modify expression that can be vectorized.
4806 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4807 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4810 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4811 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4812 else caluse if it is 2).
4814 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4817 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4818 gimple *vec_stmt, tree reduc_def, int reduc_index)
4820 tree scalar_dest = NULL_TREE;
4821 tree vec_dest = NULL_TREE;
4822 tree cond_expr, then_clause, else_clause;
4823 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4824 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4826 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4827 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4828 tree vec_compare, vec_cond_expr;
4830 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4832 enum vect_def_type dt, dts[4];
4833 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4834 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4835 enum tree_code code;
4836 stmt_vec_info prev_stmt_info = NULL;
4839 /* FORNOW: unsupported in basic block SLP. */
4840 gcc_assert (loop_vinfo);
4842 /* FORNOW: SLP not supported. */
4843 if (STMT_SLP_TYPE (stmt_info))
4846 gcc_assert (ncopies >= 1);
4847 if (reduc_index && ncopies > 1)
4848 return false; /* FORNOW */
4850 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4853 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4854 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4858 /* FORNOW: not yet supported. */
4859 if (STMT_VINFO_LIVE_P (stmt_info))
4861 if (vect_print_dump_info (REPORT_DETAILS))
4862 fprintf (vect_dump, "value used after loop.");
4866 /* Is vectorizable conditional operation? */
4867 if (!is_gimple_assign (stmt))
4870 code = gimple_assign_rhs_code (stmt);
4872 if (code != COND_EXPR)
4875 cond_expr = gimple_assign_rhs1 (stmt);
4876 then_clause = gimple_assign_rhs2 (stmt);
4877 else_clause = gimple_assign_rhs3 (stmt);
4879 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4883 if (TREE_CODE (then_clause) == SSA_NAME)
4885 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4886 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4887 &then_def_stmt, &def, &dt))
4890 else if (TREE_CODE (then_clause) != INTEGER_CST
4891 && TREE_CODE (then_clause) != REAL_CST
4892 && TREE_CODE (then_clause) != FIXED_CST)
4895 if (TREE_CODE (else_clause) == SSA_NAME)
4897 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4898 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4899 &else_def_stmt, &def, &dt))
4902 else if (TREE_CODE (else_clause) != INTEGER_CST
4903 && TREE_CODE (else_clause) != REAL_CST
4904 && TREE_CODE (else_clause) != FIXED_CST)
4909 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4910 return expand_vec_cond_expr_p (vectype, comp_vectype);
4916 scalar_dest = gimple_assign_lhs (stmt);
4917 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4919 /* Handle cond expr. */
4920 for (j = 0; j < ncopies; j++)
4927 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4929 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4930 NULL, >emp, &def, &dts[0]);
4932 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4934 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4935 NULL, >emp, &def, &dts[1]);
4936 if (reduc_index == 1)
4937 vec_then_clause = reduc_def;
4940 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4942 vect_is_simple_use (then_clause, loop_vinfo,
4943 NULL, >emp, &def, &dts[2]);
4945 if (reduc_index == 2)
4946 vec_else_clause = reduc_def;
4949 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4951 vect_is_simple_use (else_clause, loop_vinfo,
4952 NULL, >emp, &def, &dts[3]);
4957 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4958 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4959 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4961 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4965 /* Arguments are ready. Create the new vector stmt. */
4966 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4967 vec_cond_lhs, vec_cond_rhs);
4968 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4969 vec_compare, vec_then_clause, vec_else_clause);
4971 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4972 new_temp = make_ssa_name (vec_dest, new_stmt);
4973 gimple_assign_set_lhs (new_stmt, new_temp);
4974 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4976 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4978 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4980 prev_stmt_info = vinfo_for_stmt (new_stmt);
4987 /* Make sure the statement is vectorizable. */
4990 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4992 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4993 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4994 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4996 tree scalar_type, vectype;
4997 gimple pattern_stmt, pattern_def_stmt;
4999 if (vect_print_dump_info (REPORT_DETAILS))
5001 fprintf (vect_dump, "==> examining statement: ");
5002 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5005 if (gimple_has_volatile_ops (stmt))
5007 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5008 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5013 /* Skip stmts that do not need to be vectorized. In loops this is expected
5015 - the COND_EXPR which is the loop exit condition
5016 - any LABEL_EXPRs in the loop
5017 - computations that are used only for array indexing or loop control.
5018 In basic blocks we only analyze statements that are a part of some SLP
5019 instance, therefore, all the statements are relevant.
5021 Pattern statement need to be analyzed instead of the original statement
5022 if the original statement is not relevant. Otherwise, we analyze both
5025 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5026 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5027 && !STMT_VINFO_LIVE_P (stmt_info))
5029 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5031 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5032 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5034 /* Analyze PATTERN_STMT instead of the original stmt. */
5035 stmt = pattern_stmt;
5036 stmt_info = vinfo_for_stmt (pattern_stmt);
5037 if (vect_print_dump_info (REPORT_DETAILS))
5039 fprintf (vect_dump, "==> examining pattern statement: ");
5040 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5045 if (vect_print_dump_info (REPORT_DETAILS))
5046 fprintf (vect_dump, "irrelevant.");
5051 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5053 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5054 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5056 /* Analyze PATTERN_STMT too. */
5057 if (vect_print_dump_info (REPORT_DETAILS))
5059 fprintf (vect_dump, "==> examining pattern statement: ");
5060 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5063 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5067 if (is_pattern_stmt_p (stmt_info)
5068 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5069 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5070 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5072 /* Analyze def stmt of STMT if it's a pattern stmt. */
5073 if (vect_print_dump_info (REPORT_DETAILS))
5075 fprintf (vect_dump, "==> examining pattern def statement: ");
5076 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5079 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5084 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5086 case vect_internal_def:
5089 case vect_reduction_def:
5090 case vect_nested_cycle:
5091 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5092 || relevance == vect_used_in_outer_by_reduction
5093 || relevance == vect_unused_in_scope));
5096 case vect_induction_def:
5097 case vect_constant_def:
5098 case vect_external_def:
5099 case vect_unknown_def_type:
5106 gcc_assert (PURE_SLP_STMT (stmt_info));
5108 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5109 if (vect_print_dump_info (REPORT_DETAILS))
5111 fprintf (vect_dump, "get vectype for scalar type: ");
5112 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5115 vectype = get_vectype_for_scalar_type (scalar_type);
5118 if (vect_print_dump_info (REPORT_DETAILS))
5120 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5121 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5126 if (vect_print_dump_info (REPORT_DETAILS))
5128 fprintf (vect_dump, "vectype: ");
5129 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5132 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5135 if (STMT_VINFO_RELEVANT_P (stmt_info))
5137 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5138 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5139 *need_to_vectorize = true;
5144 && (STMT_VINFO_RELEVANT_P (stmt_info)
5145 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5146 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5147 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5148 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5149 || vectorizable_shift (stmt, NULL, NULL, NULL)
5150 || vectorizable_operation (stmt, NULL, NULL, NULL)
5151 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5152 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5153 || vectorizable_call (stmt, NULL, NULL)
5154 || vectorizable_store (stmt, NULL, NULL, NULL)
5155 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5156 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5160 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5161 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5162 || vectorizable_shift (stmt, NULL, NULL, node)
5163 || vectorizable_operation (stmt, NULL, NULL, node)
5164 || vectorizable_assignment (stmt, NULL, NULL, node)
5165 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5166 || vectorizable_store (stmt, NULL, NULL, node));
5171 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5173 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5174 fprintf (vect_dump, "supported: ");
5175 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5184 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5185 need extra handling, except for vectorizable reductions. */
5186 if (STMT_VINFO_LIVE_P (stmt_info)
5187 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5188 ok = vectorizable_live_operation (stmt, NULL, NULL);
5192 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5194 fprintf (vect_dump, "not vectorized: live stmt not ");
5195 fprintf (vect_dump, "supported: ");
5196 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5206 /* Function vect_transform_stmt.
5208 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5211 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5212 bool *strided_store, slp_tree slp_node,
5213 slp_instance slp_node_instance)
5215 bool is_store = false;
5216 gimple vec_stmt = NULL;
5217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5220 switch (STMT_VINFO_TYPE (stmt_info))
5222 case type_demotion_vec_info_type:
5223 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5227 case type_promotion_vec_info_type:
5228 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5232 case type_conversion_vec_info_type:
5233 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5237 case induc_vec_info_type:
5238 gcc_assert (!slp_node);
5239 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5243 case shift_vec_info_type:
5244 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5248 case op_vec_info_type:
5249 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5253 case assignment_vec_info_type:
5254 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5258 case load_vec_info_type:
5259 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5264 case store_vec_info_type:
5265 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5267 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5269 /* In case of interleaving, the whole chain is vectorized when the
5270 last store in the chain is reached. Store stmts before the last
5271 one are skipped, and there vec_stmt_info shouldn't be freed
5273 *strided_store = true;
5274 if (STMT_VINFO_VEC_STMT (stmt_info))
5281 case condition_vec_info_type:
5282 gcc_assert (!slp_node);
5283 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5287 case call_vec_info_type:
5288 gcc_assert (!slp_node);
5289 done = vectorizable_call (stmt, gsi, &vec_stmt);
5290 stmt = gsi_stmt (*gsi);
5293 case reduc_vec_info_type:
5294 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5299 if (!STMT_VINFO_LIVE_P (stmt_info))
5301 if (vect_print_dump_info (REPORT_DETAILS))
5302 fprintf (vect_dump, "stmt not supported.");
5307 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5308 is being vectorized, but outside the immediately enclosing loop. */
5310 && STMT_VINFO_LOOP_VINFO (stmt_info)
5311 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5312 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5313 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5314 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5315 || STMT_VINFO_RELEVANT (stmt_info) ==
5316 vect_used_in_outer_by_reduction))
5318 struct loop *innerloop = LOOP_VINFO_LOOP (
5319 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5320 imm_use_iterator imm_iter;
5321 use_operand_p use_p;
5325 if (vect_print_dump_info (REPORT_DETAILS))
5326 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5328 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5329 (to be used when vectorizing outer-loop stmts that use the DEF of
5331 if (gimple_code (stmt) == GIMPLE_PHI)
5332 scalar_dest = PHI_RESULT (stmt);
5334 scalar_dest = gimple_assign_lhs (stmt);
5336 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5338 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5340 exit_phi = USE_STMT (use_p);
5341 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5346 /* Handle stmts whose DEF is used outside the loop-nest that is
5347 being vectorized. */
5348 if (STMT_VINFO_LIVE_P (stmt_info)
5349 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5351 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5356 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5362 /* Remove a group of stores (for SLP or interleaving), free their
5366 vect_remove_stores (gimple first_stmt)
5368 gimple next = first_stmt;
5370 gimple_stmt_iterator next_si;
5374 /* Free the attached stmt_vec_info and remove the stmt. */
5375 next_si = gsi_for_stmt (next);
5376 gsi_remove (&next_si, true);
5377 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5378 free_stmt_vec_info (next);
5384 /* Function new_stmt_vec_info.
5386 Create and initialize a new stmt_vec_info struct for STMT. */
5389 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5390 bb_vec_info bb_vinfo)
5393 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5395 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5396 STMT_VINFO_STMT (res) = stmt;
5397 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5398 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5399 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5400 STMT_VINFO_LIVE_P (res) = false;
5401 STMT_VINFO_VECTYPE (res) = NULL;
5402 STMT_VINFO_VEC_STMT (res) = NULL;
5403 STMT_VINFO_VECTORIZABLE (res) = true;
5404 STMT_VINFO_IN_PATTERN_P (res) = false;
5405 STMT_VINFO_RELATED_STMT (res) = NULL;
5406 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5407 STMT_VINFO_DATA_REF (res) = NULL;
5409 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5410 STMT_VINFO_DR_OFFSET (res) = NULL;
5411 STMT_VINFO_DR_INIT (res) = NULL;
5412 STMT_VINFO_DR_STEP (res) = NULL;
5413 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5415 if (gimple_code (stmt) == GIMPLE_PHI
5416 && is_loop_header_bb_p (gimple_bb (stmt)))
5417 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5419 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5421 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5422 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5423 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5424 STMT_SLP_TYPE (res) = loop_vect;
5425 GROUP_FIRST_ELEMENT (res) = NULL;
5426 GROUP_NEXT_ELEMENT (res) = NULL;
5427 GROUP_SIZE (res) = 0;
5428 GROUP_STORE_COUNT (res) = 0;
5429 GROUP_GAP (res) = 0;
5430 GROUP_SAME_DR_STMT (res) = NULL;
5431 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5437 /* Create a hash table for stmt_vec_info. */
5440 init_stmt_vec_info_vec (void)
5442 gcc_assert (!stmt_vec_info_vec);
5443 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5447 /* Free hash table for stmt_vec_info. */
5450 free_stmt_vec_info_vec (void)
5452 gcc_assert (stmt_vec_info_vec);
5453 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5457 /* Free stmt vectorization related info. */
5460 free_stmt_vec_info (gimple stmt)
5462 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5467 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5468 set_vinfo_for_stmt (stmt, NULL);
5473 /* Function get_vectype_for_scalar_type_and_size.
5475 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5479 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5481 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5482 enum machine_mode simd_mode;
5483 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5490 /* We can't build a vector type of elements with alignment bigger than
5492 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5495 /* For vector types of elements whose mode precision doesn't
5496 match their types precision we use a element type of mode
5497 precision. The vectorization routines will have to make sure
5498 they support the proper result truncation/extension. */
5499 if (INTEGRAL_TYPE_P (scalar_type)
5500 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5501 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5502 TYPE_UNSIGNED (scalar_type));
5504 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5505 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5508 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5509 When the component mode passes the above test simply use a type
5510 corresponding to that mode. The theory is that any use that
5511 would cause problems with this will disable vectorization anyway. */
5512 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5513 && !INTEGRAL_TYPE_P (scalar_type)
5514 && !POINTER_TYPE_P (scalar_type))
5515 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5517 /* If no size was supplied use the mode the target prefers. Otherwise
5518 lookup a vector mode of the specified size. */
5520 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5522 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5523 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5527 vectype = build_vector_type (scalar_type, nunits);
5528 if (vect_print_dump_info (REPORT_DETAILS))
5530 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5531 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5537 if (vect_print_dump_info (REPORT_DETAILS))
5539 fprintf (vect_dump, "vectype: ");
5540 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5543 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5544 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5546 if (vect_print_dump_info (REPORT_DETAILS))
5547 fprintf (vect_dump, "mode not supported by target.");
5554 unsigned int current_vector_size;
5556 /* Function get_vectype_for_scalar_type.
5558 Returns the vector type corresponding to SCALAR_TYPE as supported
5562 get_vectype_for_scalar_type (tree scalar_type)
5565 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5566 current_vector_size);
5568 && current_vector_size == 0)
5569 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5573 /* Function get_same_sized_vectype
5575 Returns a vector type corresponding to SCALAR_TYPE of size
5576 VECTOR_TYPE if supported by the target. */
5579 get_same_sized_vectype (tree scalar_type, tree vector_type)
5581 return get_vectype_for_scalar_type_and_size
5582 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5585 /* Function vect_is_simple_use.
5588 LOOP_VINFO - the vect info of the loop that is being vectorized.
5589 BB_VINFO - the vect info of the basic block that is being vectorized.
5590 OPERAND - operand of a stmt in the loop or bb.
5591 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5593 Returns whether a stmt with OPERAND can be vectorized.
5594 For loops, supportable operands are constants, loop invariants, and operands
5595 that are defined by the current iteration of the loop. Unsupportable
5596 operands are those that are defined by a previous iteration of the loop (as
5597 is the case in reduction/induction computations).
5598 For basic blocks, supportable operands are constants and bb invariants.
5599 For now, operands defined outside the basic block are not supported. */
5602 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5603 bb_vec_info bb_vinfo, gimple *def_stmt,
5604 tree *def, enum vect_def_type *dt)
5607 stmt_vec_info stmt_vinfo;
5608 struct loop *loop = NULL;
5611 loop = LOOP_VINFO_LOOP (loop_vinfo);
5616 if (vect_print_dump_info (REPORT_DETAILS))
5618 fprintf (vect_dump, "vect_is_simple_use: operand ");
5619 print_generic_expr (vect_dump, operand, TDF_SLIM);
5622 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5624 *dt = vect_constant_def;
5628 if (is_gimple_min_invariant (operand))
5631 *dt = vect_external_def;
5635 if (TREE_CODE (operand) == PAREN_EXPR)
5637 if (vect_print_dump_info (REPORT_DETAILS))
5638 fprintf (vect_dump, "non-associatable copy.");
5639 operand = TREE_OPERAND (operand, 0);
5642 if (TREE_CODE (operand) != SSA_NAME)
5644 if (vect_print_dump_info (REPORT_DETAILS))
5645 fprintf (vect_dump, "not ssa-name.");
5649 *def_stmt = SSA_NAME_DEF_STMT (operand);
5650 if (*def_stmt == NULL)
5652 if (vect_print_dump_info (REPORT_DETAILS))
5653 fprintf (vect_dump, "no def_stmt.");
5657 if (vect_print_dump_info (REPORT_DETAILS))
5659 fprintf (vect_dump, "def_stmt: ");
5660 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5663 /* Empty stmt is expected only in case of a function argument.
5664 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5665 if (gimple_nop_p (*def_stmt))
5668 *dt = vect_external_def;
5672 bb = gimple_bb (*def_stmt);
5674 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5675 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5676 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5677 *dt = vect_external_def;
5680 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5681 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5684 if (*dt == vect_unknown_def_type)
5686 if (vect_print_dump_info (REPORT_DETAILS))
5687 fprintf (vect_dump, "Unsupported pattern.");
5691 if (vect_print_dump_info (REPORT_DETAILS))
5692 fprintf (vect_dump, "type of def: %d.",*dt);
5694 switch (gimple_code (*def_stmt))
5697 *def = gimple_phi_result (*def_stmt);
5701 *def = gimple_assign_lhs (*def_stmt);
5705 *def = gimple_call_lhs (*def_stmt);
5710 if (vect_print_dump_info (REPORT_DETAILS))
5711 fprintf (vect_dump, "unsupported defining stmt: ");
5718 /* Function vect_is_simple_use_1.
5720 Same as vect_is_simple_use_1 but also determines the vector operand
5721 type of OPERAND and stores it to *VECTYPE. If the definition of
5722 OPERAND is vect_uninitialized_def, vect_constant_def or
5723 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5724 is responsible to compute the best suited vector type for the
5728 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5729 bb_vec_info bb_vinfo, gimple *def_stmt,
5730 tree *def, enum vect_def_type *dt, tree *vectype)
5732 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5735 /* Now get a vector type if the def is internal, otherwise supply
5736 NULL_TREE and leave it up to the caller to figure out a proper
5737 type for the use stmt. */
5738 if (*dt == vect_internal_def
5739 || *dt == vect_induction_def
5740 || *dt == vect_reduction_def
5741 || *dt == vect_double_reduction_def
5742 || *dt == vect_nested_cycle)
5744 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5746 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5747 && !STMT_VINFO_RELEVANT (stmt_info)
5748 && !STMT_VINFO_LIVE_P (stmt_info))
5749 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5751 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5752 gcc_assert (*vectype != NULL_TREE);
5754 else if (*dt == vect_uninitialized_def
5755 || *dt == vect_constant_def
5756 || *dt == vect_external_def)
5757 *vectype = NULL_TREE;
5765 /* Function supportable_widening_operation
5767 Check whether an operation represented by the code CODE is a
5768 widening operation that is supported by the target platform in
5769 vector form (i.e., when operating on arguments of type VECTYPE_IN
5770 producing a result of type VECTYPE_OUT).
5772 Widening operations we currently support are NOP (CONVERT), FLOAT
5773 and WIDEN_MULT. This function checks if these operations are supported
5774 by the target platform either directly (via vector tree-codes), or via
5778 - CODE1 and CODE2 are codes of vector operations to be used when
5779 vectorizing the operation, if available.
5780 - DECL1 and DECL2 are decls of target builtin functions to be used
5781 when vectorizing the operation, if available. In this case,
5782 CODE1 and CODE2 are CALL_EXPR.
5783 - MULTI_STEP_CVT determines the number of required intermediate steps in
5784 case of multi-step conversion (like char->short->int - in that case
5785 MULTI_STEP_CVT will be 1).
5786 - INTERM_TYPES contains the intermediate type required to perform the
5787 widening operation (short in the above example). */
5790 supportable_widening_operation (enum tree_code code, gimple stmt,
5791 tree vectype_out, tree vectype_in,
5792 tree *decl1, tree *decl2,
5793 enum tree_code *code1, enum tree_code *code2,
5794 int *multi_step_cvt,
5795 VEC (tree, heap) **interm_types)
5797 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5798 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5799 struct loop *vect_loop = NULL;
5801 enum machine_mode vec_mode;
5802 enum insn_code icode1, icode2;
5803 optab optab1, optab2;
5804 tree vectype = vectype_in;
5805 tree wide_vectype = vectype_out;
5806 enum tree_code c1, c2;
5809 vect_loop = LOOP_VINFO_LOOP (loop_info);
5811 /* The result of a vectorized widening operation usually requires two vectors
5812 (because the widened results do not fit int one vector). The generated
5813 vector results would normally be expected to be generated in the same
5814 order as in the original scalar computation, i.e. if 8 results are
5815 generated in each vector iteration, they are to be organized as follows:
5816 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5818 However, in the special case that the result of the widening operation is
5819 used in a reduction computation only, the order doesn't matter (because
5820 when vectorizing a reduction we change the order of the computation).
5821 Some targets can take advantage of this and generate more efficient code.
5822 For example, targets like Altivec, that support widen_mult using a sequence
5823 of {mult_even,mult_odd} generate the following vectors:
5824 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5826 When vectorizing outer-loops, we execute the inner-loop sequentially
5827 (each vectorized inner-loop iteration contributes to VF outer-loop
5828 iterations in parallel). We therefore don't allow to change the order
5829 of the computation in the inner-loop during outer-loop vectorization. */
5832 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5833 && !nested_in_vect_loop_p (vect_loop, stmt))
5839 && code == WIDEN_MULT_EXPR
5840 && targetm.vectorize.builtin_mul_widen_even
5841 && targetm.vectorize.builtin_mul_widen_even (vectype)
5842 && targetm.vectorize.builtin_mul_widen_odd
5843 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5845 if (vect_print_dump_info (REPORT_DETAILS))
5846 fprintf (vect_dump, "Unordered widening operation detected.");
5848 *code1 = *code2 = CALL_EXPR;
5849 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5850 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5856 case WIDEN_MULT_EXPR:
5857 if (BYTES_BIG_ENDIAN)
5859 c1 = VEC_WIDEN_MULT_HI_EXPR;
5860 c2 = VEC_WIDEN_MULT_LO_EXPR;
5864 c2 = VEC_WIDEN_MULT_HI_EXPR;
5865 c1 = VEC_WIDEN_MULT_LO_EXPR;
5869 case WIDEN_LSHIFT_EXPR:
5870 if (BYTES_BIG_ENDIAN)
5872 c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
5873 c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
5877 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5878 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5883 if (BYTES_BIG_ENDIAN)
5885 c1 = VEC_UNPACK_HI_EXPR;
5886 c2 = VEC_UNPACK_LO_EXPR;
5890 c2 = VEC_UNPACK_HI_EXPR;
5891 c1 = VEC_UNPACK_LO_EXPR;
5896 if (BYTES_BIG_ENDIAN)
5898 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5899 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5903 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5904 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5908 case FIX_TRUNC_EXPR:
5909 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5910 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5911 computing the operation. */
5918 if (code == FIX_TRUNC_EXPR)
5920 /* The signedness is determined from output operand. */
5921 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5922 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5926 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5927 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5930 if (!optab1 || !optab2)
5933 vec_mode = TYPE_MODE (vectype);
5934 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5935 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5938 /* Check if it's a multi-step conversion that can be done using intermediate
5940 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5941 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5944 tree prev_type = vectype, intermediate_type;
5945 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5946 optab optab3, optab4;
5948 if (!CONVERT_EXPR_CODE_P (code))
5954 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5955 intermediate steps in promotion sequence. We try
5956 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5958 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5959 for (i = 0; i < 3; i++)
5961 intermediate_mode = insn_data[icode1].operand[0].mode;
5962 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5963 TYPE_UNSIGNED (prev_type));
5964 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5965 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5967 if (!optab3 || !optab4
5968 || ((icode1 = optab_handler (optab1, prev_mode))
5969 == CODE_FOR_nothing)
5970 || insn_data[icode1].operand[0].mode != intermediate_mode
5971 || ((icode2 = optab_handler (optab2, prev_mode))
5972 == CODE_FOR_nothing)
5973 || insn_data[icode2].operand[0].mode != intermediate_mode
5974 || ((icode1 = optab_handler (optab3, intermediate_mode))
5975 == CODE_FOR_nothing)
5976 || ((icode2 = optab_handler (optab4, intermediate_mode))
5977 == CODE_FOR_nothing))
5980 VEC_quick_push (tree, *interm_types, intermediate_type);
5981 (*multi_step_cvt)++;
5983 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5984 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5987 prev_type = intermediate_type;
5988 prev_mode = intermediate_mode;
6000 /* Function supportable_narrowing_operation
6002 Check whether an operation represented by the code CODE is a
6003 narrowing operation that is supported by the target platform in
6004 vector form (i.e., when operating on arguments of type VECTYPE_IN
6005 and producing a result of type VECTYPE_OUT).
6007 Narrowing operations we currently support are NOP (CONVERT) and
6008 FIX_TRUNC. This function checks if these operations are supported by
6009 the target platform directly via vector tree-codes.
6012 - CODE1 is the code of a vector operation to be used when
6013 vectorizing the operation, if available.
6014 - MULTI_STEP_CVT determines the number of required intermediate steps in
6015 case of multi-step conversion (like int->short->char - in that case
6016 MULTI_STEP_CVT will be 1).
6017 - INTERM_TYPES contains the intermediate type required to perform the
6018 narrowing operation (short in the above example). */
6021 supportable_narrowing_operation (enum tree_code code,
6022 tree vectype_out, tree vectype_in,
6023 enum tree_code *code1, int *multi_step_cvt,
6024 VEC (tree, heap) **interm_types)
6026 enum machine_mode vec_mode;
6027 enum insn_code icode1;
6028 optab optab1, interm_optab;
6029 tree vectype = vectype_in;
6030 tree narrow_vectype = vectype_out;
6032 tree intermediate_type, prev_type;
6038 c1 = VEC_PACK_TRUNC_EXPR;
6041 case FIX_TRUNC_EXPR:
6042 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6046 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6047 tree code and optabs used for computing the operation. */
6054 if (code == FIX_TRUNC_EXPR)
6055 /* The signedness is determined from output operand. */
6056 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6058 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6063 vec_mode = TYPE_MODE (vectype);
6064 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6067 /* Check if it's a multi-step conversion that can be done using intermediate
6069 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
6071 enum machine_mode intermediate_mode, prev_mode = vec_mode;
6074 prev_type = vectype;
6075 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6076 intermediate steps in promotion sequence. We try
6077 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6079 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6080 for (i = 0; i < 3; i++)
6082 intermediate_mode = insn_data[icode1].operand[0].mode;
6083 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6084 TYPE_UNSIGNED (prev_type));
6085 interm_optab = optab_for_tree_code (c1, intermediate_type,
6088 || ((icode1 = optab_handler (optab1, prev_mode))
6089 == CODE_FOR_nothing)
6090 || insn_data[icode1].operand[0].mode != intermediate_mode
6091 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6092 == CODE_FOR_nothing))
6095 VEC_quick_push (tree, *interm_types, intermediate_type);
6096 (*multi_step_cvt)++;
6098 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6101 prev_type = intermediate_type;
6102 prev_mode = intermediate_mode;