1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
337 Generally, LIVE_P and RELEVANT are used to define the liveness and
338 relevance info of the DEF_STMT of this USE:
339 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
340 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
342 - case 1: If USE is used only for address computations (e.g. array indexing),
343 which does not need to be directly vectorized, then the liveness/relevance
344 of the respective DEF_STMT is left unchanged.
345 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
346 skip DEF_STMT cause it had already been processed.
347 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
348 be modified accordingly.
350 Return true if everything is as expected. Return false otherwise. */
353 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
354 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
358 stmt_vec_info dstmt_vinfo;
359 basic_block bb, def_bb;
362 enum vect_def_type dt;
364 /* case 1: we are only interested in uses that need to be vectorized. Uses
365 that are used for address computation are not considered relevant. */
366 if (!exist_non_indexing_operands_for_use_p (use, stmt))
369 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
372 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 if (!def_stmt || gimple_nop_p (def_stmt))
379 def_bb = gimple_bb (def_stmt);
380 if (!flow_bb_inside_loop_p (loop, def_bb))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump, "def_stmt is out of loop.");
387 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
388 DEF_STMT must have already been processed, because this should be the
389 only way that STMT, which is a reduction-phi, was put in the worklist,
390 as there should be no other uses for DEF_STMT in the loop. So we just
391 check that everything is as expected, and we are done. */
392 dstmt_vinfo = vinfo_for_stmt (def_stmt);
393 bb = gimple_bb (stmt);
394 if (gimple_code (stmt) == GIMPLE_PHI
395 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
396 && gimple_code (def_stmt) != GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
398 && bb->loop_father == def_bb->loop_father)
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
402 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
403 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
404 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
405 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
406 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 /* case 3a: outer-loop stmt defining an inner-loop stmt:
411 outer-loop-header-bb:
417 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
419 if (vect_print_dump_info (REPORT_DETAILS))
420 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424 case vect_unused_in_scope:
425 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
426 vect_used_in_scope : vect_unused_in_scope;
429 case vect_used_in_outer_by_reduction:
430 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
431 relevant = vect_used_by_reduction;
434 case vect_used_in_outer:
435 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
436 relevant = vect_used_in_scope;
439 case vect_used_in_scope:
447 /* case 3b: inner-loop stmt defining an outer-loop stmt:
448 outer-loop-header-bb:
452 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
454 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
456 if (vect_print_dump_info (REPORT_DETAILS))
457 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461 case vect_unused_in_scope:
462 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
463 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
464 vect_used_in_outer_by_reduction : vect_unused_in_scope;
467 case vect_used_by_reduction:
468 relevant = vect_used_in_outer_by_reduction;
471 case vect_used_in_scope:
472 relevant = vect_used_in_outer;
480 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
481 is_pattern_stmt_p (stmt_vinfo));
486 /* Function vect_mark_stmts_to_be_vectorized.
488 Not all stmts in the loop need to be vectorized. For example:
497 Stmt 1 and 3 do not need to be vectorized, because loop control and
498 addressing of vectorized data-refs are handled differently.
500 This pass detects such stmts. */
503 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
505 VEC(gimple,heap) *worklist;
506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
507 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
508 unsigned int nbbs = loop->num_nodes;
509 gimple_stmt_iterator si;
512 stmt_vec_info stmt_vinfo;
516 enum vect_relevant relevant, tmp_relevant;
517 enum vect_def_type def_type;
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
522 worklist = VEC_alloc (gimple, heap, 64);
524 /* 1. Init worklist. */
525 for (i = 0; i < nbbs; i++)
528 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
531 if (vect_print_dump_info (REPORT_DETAILS))
533 fprintf (vect_dump, "init: phi relevant? ");
534 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
537 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
538 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
540 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
542 stmt = gsi_stmt (si);
543 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump, "init: stmt relevant? ");
546 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
549 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
550 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
554 /* 2. Process_worklist */
555 while (VEC_length (gimple, worklist) > 0)
560 stmt = VEC_pop (gimple, worklist);
561 if (vect_print_dump_info (REPORT_DETAILS))
563 fprintf (vect_dump, "worklist: examine stmt: ");
564 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
567 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
568 (DEF_STMT) as relevant/irrelevant and live/dead according to the
569 liveness and relevance properties of STMT. */
570 stmt_vinfo = vinfo_for_stmt (stmt);
571 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
572 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
574 /* Generally, the liveness and relevance properties of STMT are
575 propagated as is to the DEF_STMTs of its USEs:
576 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
577 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
579 One exception is when STMT has been identified as defining a reduction
580 variable; in this case we set the liveness/relevance as follows:
582 relevant = vect_used_by_reduction
583 This is because we distinguish between two kinds of relevant stmts -
584 those that are used by a reduction computation, and those that are
585 (also) used by a regular computation. This allows us later on to
586 identify stmts that are used solely by a reduction, and therefore the
587 order of the results that they produce does not have to be kept. */
589 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
590 tmp_relevant = relevant;
593 case vect_reduction_def:
594 switch (tmp_relevant)
596 case vect_unused_in_scope:
597 relevant = vect_used_by_reduction;
600 case vect_used_by_reduction:
601 if (gimple_code (stmt) == GIMPLE_PHI)
606 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "unsupported use of reduction.");
609 VEC_free (gimple, heap, worklist);
616 case vect_nested_cycle:
617 if (tmp_relevant != vect_unused_in_scope
618 && tmp_relevant != vect_used_in_outer_by_reduction
619 && tmp_relevant != vect_used_in_outer)
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "unsupported use of nested cycle.");
624 VEC_free (gimple, heap, worklist);
631 case vect_double_reduction_def:
632 if (tmp_relevant != vect_unused_in_scope
633 && tmp_relevant != vect_used_by_reduction)
635 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "unsupported use of double reduction.");
638 VEC_free (gimple, heap, worklist);
649 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
651 /* Pattern statements are not inserted into the code, so
652 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
653 have to scan the RHS or function arguments instead. */
654 if (is_gimple_assign (stmt))
656 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
657 tree op = gimple_assign_rhs1 (stmt);
660 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
662 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
663 live_p, relevant, &worklist)
664 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
665 live_p, relevant, &worklist))
667 VEC_free (gimple, heap, worklist);
672 for (; i < gimple_num_ops (stmt); i++)
674 op = gimple_op (stmt, i);
675 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
678 VEC_free (gimple, heap, worklist);
683 else if (is_gimple_call (stmt))
685 for (i = 0; i < gimple_call_num_args (stmt); i++)
687 tree arg = gimple_call_arg (stmt, i);
688 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
691 VEC_free (gimple, heap, worklist);
698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
700 tree op = USE_FROM_PTR (use_p);
701 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
704 VEC_free (gimple, heap, worklist);
708 } /* while worklist */
710 VEC_free (gimple, heap, worklist);
715 /* Get cost by calling cost target builtin. */
718 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
720 tree dummy_type = NULL;
723 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
728 /* Get cost for STMT. */
731 cost_for_stmt (gimple stmt)
733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
735 switch (STMT_VINFO_TYPE (stmt_info))
737 case load_vec_info_type:
738 return vect_get_stmt_cost (scalar_load);
739 case store_vec_info_type:
740 return vect_get_stmt_cost (scalar_store);
741 case op_vec_info_type:
742 case condition_vec_info_type:
743 case assignment_vec_info_type:
744 case reduc_vec_info_type:
745 case induc_vec_info_type:
746 case type_promotion_vec_info_type:
747 case type_demotion_vec_info_type:
748 case type_conversion_vec_info_type:
749 case call_vec_info_type:
750 return vect_get_stmt_cost (scalar_stmt);
751 case undef_vec_info_type:
757 /* Function vect_model_simple_cost.
759 Models cost for simple operations, i.e. those that only emit ncopies of a
760 single op. Right now, this does not account for multiple insns that could
761 be generated for the single vector op. We will handle that shortly. */
764 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
765 enum vect_def_type *dt, slp_tree slp_node)
768 int inside_cost = 0, outside_cost = 0;
770 /* The SLP costs were already calculated during SLP tree build. */
771 if (PURE_SLP_STMT (stmt_info))
774 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
776 /* FORNOW: Assuming maximum 2 args per stmts. */
777 for (i = 0; i < 2; i++)
779 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
780 outside_cost += vect_get_stmt_cost (vector_stmt);
783 if (vect_print_dump_info (REPORT_COST))
784 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
785 "outside_cost = %d .", inside_cost, outside_cost);
787 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
788 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
789 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
793 /* Function vect_cost_strided_group_size
795 For strided load or store, return the group_size only if it is the first
796 load or store of a group, else return 1. This ensures that group size is
797 only returned once per group. */
800 vect_cost_strided_group_size (stmt_vec_info stmt_info)
802 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
804 if (first_stmt == STMT_VINFO_STMT (stmt_info))
805 return GROUP_SIZE (stmt_info);
811 /* Function vect_model_store_cost
813 Models cost for stores. In the case of strided accesses, one access
814 has the overhead of the strided access attributed to it. */
817 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
818 bool store_lanes_p, enum vect_def_type dt,
822 unsigned int inside_cost = 0, outside_cost = 0;
823 struct data_reference *first_dr;
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
830 if (dt == vect_constant_def || dt == vect_external_def)
831 outside_cost = vect_get_stmt_cost (scalar_to_vec);
833 /* Strided access? */
834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
838 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
843 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
844 group_size = vect_cost_strided_group_size (stmt_info);
847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
849 /* Not a strided access. */
853 first_dr = STMT_VINFO_DATA_REF (stmt_info);
856 /* We assume that the cost of a single store-lanes instruction is
857 equivalent to the cost of GROUP_SIZE separate stores. If a strided
858 access is instead being provided by a permute-and-store operation,
859 include the cost of the permutes. */
860 if (!store_lanes_p && group_size > 1)
862 /* Uses a high and low interleave operation for each needed permute. */
863 inside_cost = ncopies * exact_log2(group_size) * group_size
864 * vect_get_stmt_cost (vector_stmt);
866 if (vect_print_dump_info (REPORT_COST))
867 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
872 /* Costs of the stores. */
873 vect_get_store_cost (first_dr, ncopies, &inside_cost);
875 if (vect_print_dump_info (REPORT_COST))
876 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
877 "outside_cost = %d .", inside_cost, outside_cost);
879 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
880 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
881 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
885 /* Calculate cost of DR's memory access. */
887 vect_get_store_cost (struct data_reference *dr, int ncopies,
888 unsigned int *inside_cost)
890 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
892 switch (alignment_support_scheme)
896 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: aligned.");
904 case dr_unaligned_supported:
906 gimple stmt = DR_STMT (dr);
907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
910 /* Here, we assign an additional cost for the unaligned store. */
911 *inside_cost += ncopies
912 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
913 vectype, DR_MISALIGNMENT (dr));
915 if (vect_print_dump_info (REPORT_COST))
916 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
928 /* Function vect_model_load_cost
930 Models cost for loads. In the case of strided accesses, the last access
931 has the overhead of the strided access attributed to it. Since unaligned
932 accesses are supported for loads, we also account for the costs of the
933 access scheme chosen. */
936 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
942 unsigned int inside_cost = 0, outside_cost = 0;
944 /* The SLP costs were already calculated during SLP tree build. */
945 if (PURE_SLP_STMT (stmt_info))
948 /* Strided accesses? */
949 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
950 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
952 group_size = vect_cost_strided_group_size (stmt_info);
953 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
955 /* Not a strided access. */
962 /* We assume that the cost of a single load-lanes instruction is
963 equivalent to the cost of GROUP_SIZE separate loads. If a strided
964 access is instead being provided by a load-and-permute operation,
965 include the cost of the permutes. */
966 if (!load_lanes_p && group_size > 1)
968 /* Uses an even and odd extract operations for each needed permute. */
969 inside_cost = ncopies * exact_log2(group_size) * group_size
970 * vect_get_stmt_cost (vector_stmt);
972 if (vect_print_dump_info (REPORT_COST))
973 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
977 /* The loads themselves. */
978 vect_get_load_cost (first_dr, ncopies,
979 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
981 &inside_cost, &outside_cost);
983 if (vect_print_dump_info (REPORT_COST))
984 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
985 "outside_cost = %d .", inside_cost, outside_cost);
987 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
988 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
989 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
993 /* Calculate cost of DR's memory access. */
995 vect_get_load_cost (struct data_reference *dr, int ncopies,
996 bool add_realign_cost, unsigned int *inside_cost,
997 unsigned int *outside_cost)
999 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 switch (alignment_support_scheme)
1005 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1007 if (vect_print_dump_info (REPORT_COST))
1008 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1012 case dr_unaligned_supported:
1014 gimple stmt = DR_STMT (dr);
1015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1016 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1018 /* Here, we assign an additional cost for the unaligned load. */
1019 *inside_cost += ncopies
1020 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1021 vectype, DR_MISALIGNMENT (dr));
1022 if (vect_print_dump_info (REPORT_COST))
1023 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1028 case dr_explicit_realign:
1030 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1031 + vect_get_stmt_cost (vector_stmt));
1033 /* FIXME: If the misalignment remains fixed across the iterations of
1034 the containing loop, the following cost should be added to the
1036 if (targetm.vectorize.builtin_mask_for_load)
1037 *inside_cost += vect_get_stmt_cost (vector_stmt);
1041 case dr_explicit_realign_optimized:
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1047 /* Unaligned software pipeline has a load of an address, an initial
1048 load, and possibly a mask operation to "prime" the loop. However,
1049 if this is an access in a group of loads, which provide strided
1050 access, then the above cost should only be considered for one
1051 access in the group. Inside the loop, there is a load op
1052 and a realignment op. */
1054 if (add_realign_cost)
1056 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1057 if (targetm.vectorize.builtin_mask_for_load)
1058 *outside_cost += vect_get_stmt_cost (vector_stmt);
1061 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1062 + vect_get_stmt_cost (vector_stmt));
1072 /* Function vect_init_vector.
1074 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1075 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1076 is not NULL. Otherwise, place the initialization at the loop preheader.
1077 Return the DEF of INIT_STMT.
1078 It will be used in the vectorization of STMT. */
1081 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1082 gimple_stmt_iterator *gsi)
1084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1092 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1093 add_referenced_var (new_var);
1094 init_stmt = gimple_build_assign (new_var, vector_var);
1095 new_temp = make_ssa_name (new_var, init_stmt);
1096 gimple_assign_set_lhs (init_stmt, new_temp);
1099 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1108 if (nested_in_vect_loop_p (loop, stmt))
1111 pe = loop_preheader_edge (loop);
1112 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1113 gcc_assert (!new_bb);
1117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1119 gimple_stmt_iterator gsi_bb_start;
1121 gcc_assert (bb_vinfo);
1122 bb = BB_VINFO_BB (bb_vinfo);
1123 gsi_bb_start = gsi_after_labels (bb);
1124 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "created new init_stmt: ");
1131 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1134 vec_oprnd = gimple_assign_lhs (init_stmt);
1139 /* Function vect_get_vec_def_for_operand.
1141 OP is an operand in STMT. This function returns a (vector) def that will be
1142 used in the vectorized stmt for STMT.
1144 In the case that OP is an SSA_NAME which is defined in the loop, then
1145 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1147 In case OP is an invariant or constant, a new stmt that creates a vector def
1148 needs to be introduced. */
1151 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1156 stmt_vec_info def_stmt_info = NULL;
1157 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1158 unsigned int nunits;
1159 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1165 enum vect_def_type dt;
1169 if (vect_print_dump_info (REPORT_DETAILS))
1171 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1172 print_generic_expr (vect_dump, op, TDF_SLIM);
1175 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1177 gcc_assert (is_simple_use);
1178 if (vect_print_dump_info (REPORT_DETAILS))
1182 fprintf (vect_dump, "def = ");
1183 print_generic_expr (vect_dump, def, TDF_SLIM);
1187 fprintf (vect_dump, " def_stmt = ");
1188 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1194 /* Case 1: operand is a constant. */
1195 case vect_constant_def:
1197 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1198 gcc_assert (vector_type);
1199 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1204 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1205 if (vect_print_dump_info (REPORT_DETAILS))
1206 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1208 vec_cst = build_vector_from_val (vector_type,
1209 fold_convert (TREE_TYPE (vector_type),
1211 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1214 /* Case 2: operand is defined outside the loop - loop invariant. */
1215 case vect_external_def:
1217 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1218 gcc_assert (vector_type);
1219 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1224 /* Create 'vec_inv = {inv,inv,..,inv}' */
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "Create vector_inv.");
1228 for (i = nunits - 1; i >= 0; --i)
1230 t = tree_cons (NULL_TREE, def, t);
1233 /* FIXME: use build_constructor directly. */
1234 vec_inv = build_constructor_from_list (vector_type, t);
1235 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1238 /* Case 3: operand is defined inside the loop. */
1239 case vect_internal_def:
1242 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1244 /* Get the def from the vectorized stmt. */
1245 def_stmt_info = vinfo_for_stmt (def_stmt);
1247 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1248 /* Get vectorized pattern statement. */
1250 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1251 && !STMT_VINFO_RELEVANT (def_stmt_info))
1252 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1253 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1254 gcc_assert (vec_stmt);
1255 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1256 vec_oprnd = PHI_RESULT (vec_stmt);
1257 else if (is_gimple_call (vec_stmt))
1258 vec_oprnd = gimple_call_lhs (vec_stmt);
1260 vec_oprnd = gimple_assign_lhs (vec_stmt);
1264 /* Case 4: operand is defined by a loop header phi - reduction */
1265 case vect_reduction_def:
1266 case vect_double_reduction_def:
1267 case vect_nested_cycle:
1271 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1272 loop = (gimple_bb (def_stmt))->loop_father;
1274 /* Get the def before the loop */
1275 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1276 return get_initial_def_for_reduction (stmt, op, scalar_def);
1279 /* Case 5: operand is defined by loop-header phi - induction. */
1280 case vect_induction_def:
1282 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1284 /* Get the def from the vectorized stmt. */
1285 def_stmt_info = vinfo_for_stmt (def_stmt);
1286 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1287 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1288 vec_oprnd = PHI_RESULT (vec_stmt);
1290 vec_oprnd = gimple_get_lhs (vec_stmt);
1300 /* Function vect_get_vec_def_for_stmt_copy
1302 Return a vector-def for an operand. This function is used when the
1303 vectorized stmt to be created (by the caller to this function) is a "copy"
1304 created in case the vectorized result cannot fit in one vector, and several
1305 copies of the vector-stmt are required. In this case the vector-def is
1306 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1307 of the stmt that defines VEC_OPRND.
1308 DT is the type of the vector def VEC_OPRND.
1311 In case the vectorization factor (VF) is bigger than the number
1312 of elements that can fit in a vectype (nunits), we have to generate
1313 more than one vector stmt to vectorize the scalar stmt. This situation
1314 arises when there are multiple data-types operated upon in the loop; the
1315 smallest data-type determines the VF, and as a result, when vectorizing
1316 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1317 vector stmt (each computing a vector of 'nunits' results, and together
1318 computing 'VF' results in each iteration). This function is called when
1319 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1320 which VF=16 and nunits=4, so the number of copies required is 4):
1322 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1324 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1325 VS1.1: vx.1 = memref1 VS1.2
1326 VS1.2: vx.2 = memref2 VS1.3
1327 VS1.3: vx.3 = memref3
1329 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1330 VSnew.1: vz1 = vx.1 + ... VSnew.2
1331 VSnew.2: vz2 = vx.2 + ... VSnew.3
1332 VSnew.3: vz3 = vx.3 + ...
1334 The vectorization of S1 is explained in vectorizable_load.
1335 The vectorization of S2:
1336 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1337 the function 'vect_get_vec_def_for_operand' is called to
1338 get the relevant vector-def for each operand of S2. For operand x it
1339 returns the vector-def 'vx.0'.
1341 To create the remaining copies of the vector-stmt (VSnew.j), this
1342 function is called to get the relevant vector-def for each operand. It is
1343 obtained from the respective VS1.j stmt, which is recorded in the
1344 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1346 For example, to obtain the vector-def 'vx.1' in order to create the
1347 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1348 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1349 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1350 and return its def ('vx.1').
1351 Overall, to create the above sequence this function will be called 3 times:
1352 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1353 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1354 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1357 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1359 gimple vec_stmt_for_operand;
1360 stmt_vec_info def_stmt_info;
1362 /* Do nothing; can reuse same def. */
1363 if (dt == vect_external_def || dt == vect_constant_def )
1366 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1367 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1368 gcc_assert (def_stmt_info);
1369 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1370 gcc_assert (vec_stmt_for_operand);
1371 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1372 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1373 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1375 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1380 /* Get vectorized definitions for the operands to create a copy of an original
1381 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1384 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1385 VEC(tree,heap) **vec_oprnds0,
1386 VEC(tree,heap) **vec_oprnds1)
1388 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1390 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1391 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1393 if (vec_oprnds1 && *vec_oprnds1)
1395 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1396 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1397 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1402 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1406 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1407 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1411 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1416 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1417 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1418 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1422 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1423 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1424 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1430 /* Function vect_finish_stmt_generation.
1432 Insert a new stmt. */
1435 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1436 gimple_stmt_iterator *gsi)
1438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1439 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1440 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1442 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1444 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1446 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1449 if (vect_print_dump_info (REPORT_DETAILS))
1451 fprintf (vect_dump, "add new stmt: ");
1452 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1455 gimple_set_location (vec_stmt, gimple_location (stmt));
1458 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1459 a function declaration if the target has a vectorized version
1460 of the function, or NULL_TREE if the function cannot be vectorized. */
1463 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1465 tree fndecl = gimple_call_fndecl (call);
1467 /* We only handle functions that do not read or clobber memory -- i.e.
1468 const or novops ones. */
1469 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1473 || TREE_CODE (fndecl) != FUNCTION_DECL
1474 || !DECL_BUILT_IN (fndecl))
1477 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1481 /* Function vectorizable_call.
1483 Check if STMT performs a function call that can be vectorized.
1484 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1485 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1486 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1489 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1494 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1496 tree vectype_out, vectype_in;
1499 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1500 tree fndecl, new_temp, def, rhs_type;
1502 enum vect_def_type dt[3]
1503 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1504 gimple new_stmt = NULL;
1506 VEC(tree, heap) *vargs = NULL;
1507 enum { NARROW, NONE, WIDEN } modifier;
1511 /* FORNOW: unsupported in basic block SLP. */
1512 gcc_assert (loop_vinfo);
1514 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1517 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1520 /* FORNOW: SLP not supported. */
1521 if (STMT_SLP_TYPE (stmt_info))
1524 /* Is STMT a vectorizable call? */
1525 if (!is_gimple_call (stmt))
1528 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1531 if (stmt_can_throw_internal (stmt))
1534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1536 /* Process function arguments. */
1537 rhs_type = NULL_TREE;
1538 vectype_in = NULL_TREE;
1539 nargs = gimple_call_num_args (stmt);
1541 /* Bail out if the function has more than three arguments, we do not have
1542 interesting builtin functions to vectorize with more than two arguments
1543 except for fma. No arguments is also not good. */
1544 if (nargs == 0 || nargs > 3)
1547 for (i = 0; i < nargs; i++)
1551 op = gimple_call_arg (stmt, i);
1553 /* We can only handle calls with arguments of the same type. */
1555 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "argument types differ.");
1562 rhs_type = TREE_TYPE (op);
1564 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1565 &def_stmt, &def, &dt[i], &opvectype))
1567 if (vect_print_dump_info (REPORT_DETAILS))
1568 fprintf (vect_dump, "use not simple.");
1573 vectype_in = opvectype;
1575 && opvectype != vectype_in)
1577 if (vect_print_dump_info (REPORT_DETAILS))
1578 fprintf (vect_dump, "argument vector types differ.");
1582 /* If all arguments are external or constant defs use a vector type with
1583 the same size as the output vector type. */
1585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1587 gcc_assert (vectype_in);
1590 if (vect_print_dump_info (REPORT_DETAILS))
1592 fprintf (vect_dump, "no vectype for scalar type ");
1593 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1600 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1602 if (nunits_in == nunits_out / 2)
1604 else if (nunits_out == nunits_in)
1606 else if (nunits_out == nunits_in / 2)
1611 /* For now, we only vectorize functions if a target specific builtin
1612 is available. TODO -- in some cases, it might be profitable to
1613 insert the calls for pieces of the vector, in order to be able
1614 to vectorize other operations in the loop. */
1615 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1616 if (fndecl == NULL_TREE)
1618 if (vect_print_dump_info (REPORT_DETAILS))
1619 fprintf (vect_dump, "function is not vectorizable.");
1624 gcc_assert (!gimple_vuse (stmt));
1626 if (modifier == NARROW)
1627 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1629 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1631 /* Sanity check: make sure that at least one copy of the vectorized stmt
1632 needs to be generated. */
1633 gcc_assert (ncopies >= 1);
1635 if (!vec_stmt) /* transformation not required. */
1637 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1638 if (vect_print_dump_info (REPORT_DETAILS))
1639 fprintf (vect_dump, "=== vectorizable_call ===");
1640 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "transform call.");
1650 scalar_dest = gimple_call_lhs (stmt);
1651 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1653 prev_stmt_info = NULL;
1657 for (j = 0; j < ncopies; ++j)
1659 /* Build argument list for the vectorized call. */
1661 vargs = VEC_alloc (tree, heap, nargs);
1663 VEC_truncate (tree, vargs, 0);
1665 for (i = 0; i < nargs; i++)
1667 op = gimple_call_arg (stmt, i);
1670 = vect_get_vec_def_for_operand (op, stmt, NULL);
1673 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1675 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1678 VEC_quick_push (tree, vargs, vec_oprnd0);
1681 new_stmt = gimple_build_call_vec (fndecl, vargs);
1682 new_temp = make_ssa_name (vec_dest, new_stmt);
1683 gimple_call_set_lhs (new_stmt, new_temp);
1685 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1686 mark_symbols_for_renaming (new_stmt);
1689 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1691 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1693 prev_stmt_info = vinfo_for_stmt (new_stmt);
1699 for (j = 0; j < ncopies; ++j)
1701 /* Build argument list for the vectorized call. */
1703 vargs = VEC_alloc (tree, heap, nargs * 2);
1705 VEC_truncate (tree, vargs, 0);
1707 for (i = 0; i < nargs; i++)
1709 op = gimple_call_arg (stmt, i);
1713 = vect_get_vec_def_for_operand (op, stmt, NULL);
1715 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1719 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1721 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1723 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1726 VEC_quick_push (tree, vargs, vec_oprnd0);
1727 VEC_quick_push (tree, vargs, vec_oprnd1);
1730 new_stmt = gimple_build_call_vec (fndecl, vargs);
1731 new_temp = make_ssa_name (vec_dest, new_stmt);
1732 gimple_call_set_lhs (new_stmt, new_temp);
1734 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1735 mark_symbols_for_renaming (new_stmt);
1738 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1742 prev_stmt_info = vinfo_for_stmt (new_stmt);
1745 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1750 /* No current target implements this case. */
1754 VEC_free (tree, heap, vargs);
1756 /* Update the exception handling table with the vector stmt if necessary. */
1757 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1758 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1760 /* The call in STMT might prevent it from being removed in dce.
1761 We however cannot remove it here, due to the way the ssa name
1762 it defines is mapped to the new definition. So just replace
1763 rhs of the statement with something harmless. */
1765 type = TREE_TYPE (scalar_dest);
1766 if (is_pattern_stmt_p (stmt_info))
1767 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1769 lhs = gimple_call_lhs (stmt);
1770 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1771 set_vinfo_for_stmt (new_stmt, stmt_info);
1772 set_vinfo_for_stmt (stmt, NULL);
1773 STMT_VINFO_STMT (stmt_info) = new_stmt;
1774 gsi_replace (gsi, new_stmt, false);
1775 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1781 /* Function vect_gen_widened_results_half
1783 Create a vector stmt whose code, type, number of arguments, and result
1784 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1785 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1786 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1787 needs to be created (DECL is a function-decl of a target-builtin).
1788 STMT is the original scalar stmt that we are vectorizing. */
1791 vect_gen_widened_results_half (enum tree_code code,
1793 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1794 tree vec_dest, gimple_stmt_iterator *gsi,
1800 /* Generate half of the widened result: */
1801 if (code == CALL_EXPR)
1803 /* Target specific support */
1804 if (op_type == binary_op)
1805 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1807 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1808 new_temp = make_ssa_name (vec_dest, new_stmt);
1809 gimple_call_set_lhs (new_stmt, new_temp);
1813 /* Generic support */
1814 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1815 if (op_type != binary_op)
1817 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1819 new_temp = make_ssa_name (vec_dest, new_stmt);
1820 gimple_assign_set_lhs (new_stmt, new_temp);
1822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1827 /* Check if STMT performs a conversion operation, that can be vectorized.
1828 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1829 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1833 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1834 gimple *vec_stmt, slp_tree slp_node)
1839 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1842 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1843 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1847 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1848 gimple new_stmt = NULL;
1849 stmt_vec_info prev_stmt_info;
1852 tree vectype_out, vectype_in;
1855 enum { NARROW, NONE, WIDEN } modifier;
1857 VEC(tree,heap) *vec_oprnds0 = NULL;
1859 VEC(tree,heap) *dummy = NULL;
1862 /* Is STMT a vectorizable conversion? */
1864 /* FORNOW: unsupported in basic block SLP. */
1865 gcc_assert (loop_vinfo);
1867 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1870 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1873 if (!is_gimple_assign (stmt))
1876 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1879 code = gimple_assign_rhs_code (stmt);
1880 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1883 /* Check types of lhs and rhs. */
1884 scalar_dest = gimple_assign_lhs (stmt);
1885 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1887 op0 = gimple_assign_rhs1 (stmt);
1888 rhs_type = TREE_TYPE (op0);
1889 /* Check the operands of the operation. */
1890 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1891 &def_stmt, &def, &dt[0], &vectype_in))
1893 if (vect_print_dump_info (REPORT_DETAILS))
1894 fprintf (vect_dump, "use not simple.");
1897 /* If op0 is an external or constant defs use a vector type of
1898 the same size as the output vector type. */
1900 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1902 gcc_assert (vectype_in);
1905 if (vect_print_dump_info (REPORT_DETAILS))
1907 fprintf (vect_dump, "no vectype for scalar type ");
1908 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1915 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1916 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1917 if (nunits_in == nunits_out / 2)
1919 else if (nunits_out == nunits_in)
1921 else if (nunits_out == nunits_in / 2)
1926 if (modifier == NARROW)
1927 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1929 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1931 /* Multiple types in SLP are handled by creating the appropriate number of
1932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1934 if (slp_node || PURE_SLP_STMT (stmt_info))
1937 /* Sanity check: make sure that at least one copy of the vectorized stmt
1938 needs to be generated. */
1939 gcc_assert (ncopies >= 1);
1941 /* Supportable by target? */
1942 if ((modifier == NONE
1943 && !supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1))
1944 || (modifier == WIDEN
1945 && !supportable_widening_operation (code, stmt,
1946 vectype_out, vectype_in,
1949 &dummy_int, &dummy))
1950 || (modifier == NARROW
1951 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1952 &code1, &dummy_int, &dummy)))
1954 if (vect_print_dump_info (REPORT_DETAILS))
1955 fprintf (vect_dump, "conversion not supported by target.");
1959 if (modifier != NONE)
1961 /* FORNOW: SLP not supported. */
1962 if (STMT_SLP_TYPE (stmt_info))
1966 if (!vec_stmt) /* transformation not required. */
1968 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1973 if (vect_print_dump_info (REPORT_DETAILS))
1974 fprintf (vect_dump, "transform conversion.");
1977 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1979 if (modifier == NONE && !slp_node)
1980 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1982 prev_stmt_info = NULL;
1986 for (j = 0; j < ncopies; j++)
1989 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1991 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1993 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1995 /* Arguments are ready, create the new vector stmt. */
1996 if (code1 == CALL_EXPR)
1998 new_stmt = gimple_build_call (decl1, 1, vop0);
1999 new_temp = make_ssa_name (vec_dest, new_stmt);
2000 gimple_call_set_lhs (new_stmt, new_temp);
2004 gcc_assert (TREE_CODE_LENGTH (code) == unary_op);
2005 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0,
2007 new_temp = make_ssa_name (vec_dest, new_stmt);
2008 gimple_assign_set_lhs (new_stmt, new_temp);
2011 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2013 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2017 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2019 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2020 prev_stmt_info = vinfo_for_stmt (new_stmt);
2025 /* In case the vectorization factor (VF) is bigger than the number
2026 of elements that we can fit in a vectype (nunits), we have to
2027 generate more than one vector stmt - i.e - we need to "unroll"
2028 the vector stmt by a factor VF/nunits. */
2029 for (j = 0; j < ncopies; j++)
2032 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2034 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2036 /* Generate first half of the widened result: */
2038 = vect_gen_widened_results_half (code1, decl1,
2039 vec_oprnd0, vec_oprnd1,
2040 unary_op, vec_dest, gsi, stmt);
2042 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2044 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2045 prev_stmt_info = vinfo_for_stmt (new_stmt);
2047 /* Generate second half of the widened result: */
2049 = vect_gen_widened_results_half (code2, decl2,
2050 vec_oprnd0, vec_oprnd1,
2051 unary_op, vec_dest, gsi, stmt);
2052 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2053 prev_stmt_info = vinfo_for_stmt (new_stmt);
2058 /* In case the vectorization factor (VF) is bigger than the number
2059 of elements that we can fit in a vectype (nunits), we have to
2060 generate more than one vector stmt - i.e - we need to "unroll"
2061 the vector stmt by a factor VF/nunits. */
2062 for (j = 0; j < ncopies; j++)
2067 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2068 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2072 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2073 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2076 /* Arguments are ready. Create the new vector stmt. */
2077 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2079 new_temp = make_ssa_name (vec_dest, new_stmt);
2080 gimple_assign_set_lhs (new_stmt, new_temp);
2081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2084 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2088 prev_stmt_info = vinfo_for_stmt (new_stmt);
2091 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2095 VEC_free (tree, heap, vec_oprnds0);
2101 /* Function vectorizable_assignment.
2103 Check if STMT performs an assignment (copy) that can be vectorized.
2104 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2105 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2106 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2109 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2110 gimple *vec_stmt, slp_tree slp_node)
2115 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2116 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2117 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2122 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2125 VEC(tree,heap) *vec_oprnds = NULL;
2127 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2128 gimple new_stmt = NULL;
2129 stmt_vec_info prev_stmt_info = NULL;
2130 enum tree_code code;
2133 /* Multiple types in SLP are handled by creating the appropriate number of
2134 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2136 if (slp_node || PURE_SLP_STMT (stmt_info))
2139 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2141 gcc_assert (ncopies >= 1);
2143 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2146 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2149 /* Is vectorizable assignment? */
2150 if (!is_gimple_assign (stmt))
2153 scalar_dest = gimple_assign_lhs (stmt);
2154 if (TREE_CODE (scalar_dest) != SSA_NAME)
2157 code = gimple_assign_rhs_code (stmt);
2158 if (gimple_assign_single_p (stmt)
2159 || code == PAREN_EXPR
2160 || CONVERT_EXPR_CODE_P (code))
2161 op = gimple_assign_rhs1 (stmt);
2165 if (code == VIEW_CONVERT_EXPR)
2166 op = TREE_OPERAND (op, 0);
2168 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2169 &def_stmt, &def, &dt[0], &vectype_in))
2171 if (vect_print_dump_info (REPORT_DETAILS))
2172 fprintf (vect_dump, "use not simple.");
2176 /* We can handle NOP_EXPR conversions that do not change the number
2177 of elements or the vector size. */
2178 if ((CONVERT_EXPR_CODE_P (code)
2179 || code == VIEW_CONVERT_EXPR)
2181 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2182 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2183 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2186 /* We do not handle bit-precision changes. */
2187 if ((CONVERT_EXPR_CODE_P (code)
2188 || code == VIEW_CONVERT_EXPR)
2189 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2190 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2191 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2192 || ((TYPE_PRECISION (TREE_TYPE (op))
2193 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2194 /* But a conversion that does not change the bit-pattern is ok. */
2195 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2196 > TYPE_PRECISION (TREE_TYPE (op)))
2197 && TYPE_UNSIGNED (TREE_TYPE (op))))
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump, "type conversion to/from bit-precision "
2205 if (!vec_stmt) /* transformation not required. */
2207 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "=== vectorizable_assignment ===");
2210 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2215 if (vect_print_dump_info (REPORT_DETAILS))
2216 fprintf (vect_dump, "transform assignment.");
2219 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2222 for (j = 0; j < ncopies; j++)
2226 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2228 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2230 /* Arguments are ready. create the new vector stmt. */
2231 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2233 if (CONVERT_EXPR_CODE_P (code)
2234 || code == VIEW_CONVERT_EXPR)
2235 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2236 new_stmt = gimple_build_assign (vec_dest, vop);
2237 new_temp = make_ssa_name (vec_dest, new_stmt);
2238 gimple_assign_set_lhs (new_stmt, new_temp);
2239 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2241 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2248 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2250 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2252 prev_stmt_info = vinfo_for_stmt (new_stmt);
2255 VEC_free (tree, heap, vec_oprnds);
2260 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2261 either as shift by a scalar or by a vector. */
2264 vect_supportable_shift (enum tree_code code, tree scalar_type)
2267 enum machine_mode vec_mode;
2272 vectype = get_vectype_for_scalar_type (scalar_type);
2276 optab = optab_for_tree_code (code, vectype, optab_scalar);
2278 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2280 optab = optab_for_tree_code (code, vectype, optab_vector);
2282 || (optab_handler (optab, TYPE_MODE (vectype))
2283 == CODE_FOR_nothing))
2287 vec_mode = TYPE_MODE (vectype);
2288 icode = (int) optab_handler (optab, vec_mode);
2289 if (icode == CODE_FOR_nothing)
2296 /* Function vectorizable_shift.
2298 Check if STMT performs a shift operation that can be vectorized.
2299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2304 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2305 gimple *vec_stmt, slp_tree slp_node)
2309 tree op0, op1 = NULL;
2310 tree vec_oprnd1 = NULL_TREE;
2311 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2314 enum tree_code code;
2315 enum machine_mode vec_mode;
2319 enum machine_mode optab_op2_mode;
2322 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2323 gimple new_stmt = NULL;
2324 stmt_vec_info prev_stmt_info;
2331 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2334 bool scalar_shift_arg = true;
2335 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2338 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2341 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2344 /* Is STMT a vectorizable binary/unary operation? */
2345 if (!is_gimple_assign (stmt))
2348 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2351 code = gimple_assign_rhs_code (stmt);
2353 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2354 || code == RROTATE_EXPR))
2357 scalar_dest = gimple_assign_lhs (stmt);
2358 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2359 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2360 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2362 if (vect_print_dump_info (REPORT_DETAILS))
2363 fprintf (vect_dump, "bit-precision shifts not supported.");
2367 op0 = gimple_assign_rhs1 (stmt);
2368 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2369 &def_stmt, &def, &dt[0], &vectype))
2371 if (vect_print_dump_info (REPORT_DETAILS))
2372 fprintf (vect_dump, "use not simple.");
2375 /* If op0 is an external or constant def use a vector type with
2376 the same size as the output vector type. */
2378 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2380 gcc_assert (vectype);
2383 if (vect_print_dump_info (REPORT_DETAILS))
2385 fprintf (vect_dump, "no vectype for scalar type ");
2386 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2392 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2393 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2394 if (nunits_out != nunits_in)
2397 op1 = gimple_assign_rhs2 (stmt);
2398 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2399 &dt[1], &op1_vectype))
2401 if (vect_print_dump_info (REPORT_DETAILS))
2402 fprintf (vect_dump, "use not simple.");
2407 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2411 /* Multiple types in SLP are handled by creating the appropriate number of
2412 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2414 if (slp_node || PURE_SLP_STMT (stmt_info))
2417 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2419 gcc_assert (ncopies >= 1);
2421 /* Determine whether the shift amount is a vector, or scalar. If the
2422 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2424 if (dt[1] == vect_internal_def && !slp_node)
2425 scalar_shift_arg = false;
2426 else if (dt[1] == vect_constant_def
2427 || dt[1] == vect_external_def
2428 || dt[1] == vect_internal_def)
2430 /* In SLP, need to check whether the shift count is the same,
2431 in loops if it is a constant or invariant, it is always
2435 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2438 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2439 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2440 scalar_shift_arg = false;
2445 if (vect_print_dump_info (REPORT_DETAILS))
2446 fprintf (vect_dump, "operand mode requires invariant argument.");
2450 /* Vector shifted by vector. */
2451 if (!scalar_shift_arg)
2453 optab = optab_for_tree_code (code, vectype, optab_vector);
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "vector/vector shift/rotate found.");
2457 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
2458 if (op1_vectype == NULL_TREE
2459 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
2461 if (vect_print_dump_info (REPORT_DETAILS))
2462 fprintf (vect_dump, "unusable type for last operand in"
2463 " vector/vector shift/rotate.");
2467 /* See if the machine has a vector shifted by scalar insn and if not
2468 then see if it has a vector shifted by vector insn. */
2471 optab = optab_for_tree_code (code, vectype, optab_scalar);
2473 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2475 if (vect_print_dump_info (REPORT_DETAILS))
2476 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2480 optab = optab_for_tree_code (code, vectype, optab_vector);
2482 && (optab_handler (optab, TYPE_MODE (vectype))
2483 != CODE_FOR_nothing))
2485 scalar_shift_arg = false;
2487 if (vect_print_dump_info (REPORT_DETAILS))
2488 fprintf (vect_dump, "vector/vector shift/rotate found.");
2490 /* Unlike the other binary operators, shifts/rotates have
2491 the rhs being int, instead of the same type as the lhs,
2492 so make sure the scalar is the right type if we are
2493 dealing with vectors of long long/long/short/char. */
2494 if (dt[1] == vect_constant_def)
2495 op1 = fold_convert (TREE_TYPE (vectype), op1);
2496 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
2500 && TYPE_MODE (TREE_TYPE (vectype))
2501 != TYPE_MODE (TREE_TYPE (op1)))
2503 if (vect_print_dump_info (REPORT_DETAILS))
2504 fprintf (vect_dump, "unusable type for last operand in"
2505 " vector/vector shift/rotate.");
2508 if (vec_stmt && !slp_node)
2510 op1 = fold_convert (TREE_TYPE (vectype), op1);
2511 op1 = vect_init_vector (stmt, op1,
2512 TREE_TYPE (vectype), NULL);
2519 /* Supportable by target? */
2522 if (vect_print_dump_info (REPORT_DETAILS))
2523 fprintf (vect_dump, "no optab.");
2526 vec_mode = TYPE_MODE (vectype);
2527 icode = (int) optab_handler (optab, vec_mode);
2528 if (icode == CODE_FOR_nothing)
2530 if (vect_print_dump_info (REPORT_DETAILS))
2531 fprintf (vect_dump, "op not supported by target.");
2532 /* Check only during analysis. */
2533 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2534 || (vf < vect_min_worthwhile_factor (code)
2537 if (vect_print_dump_info (REPORT_DETAILS))
2538 fprintf (vect_dump, "proceeding using word mode.");
2541 /* Worthwhile without SIMD support? Check only during analysis. */
2542 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2543 && vf < vect_min_worthwhile_factor (code)
2546 if (vect_print_dump_info (REPORT_DETAILS))
2547 fprintf (vect_dump, "not worthwhile without SIMD support.");
2551 if (!vec_stmt) /* transformation not required. */
2553 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2554 if (vect_print_dump_info (REPORT_DETAILS))
2555 fprintf (vect_dump, "=== vectorizable_shift ===");
2556 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2562 if (vect_print_dump_info (REPORT_DETAILS))
2563 fprintf (vect_dump, "transform binary/unary operation.");
2566 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2568 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2569 created in the previous stages of the recursion, so no allocation is
2570 needed, except for the case of shift with scalar shift argument. In that
2571 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2572 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2573 In case of loop-based vectorization we allocate VECs of size 1. We
2574 allocate VEC_OPRNDS1 only in case of binary operation. */
2577 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2578 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2580 else if (scalar_shift_arg)
2581 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2583 prev_stmt_info = NULL;
2584 for (j = 0; j < ncopies; j++)
2589 if (scalar_shift_arg)
2591 /* Vector shl and shr insn patterns can be defined with scalar
2592 operand 2 (shift operand). In this case, use constant or loop
2593 invariant op1 directly, without extending it to vector mode
2595 optab_op2_mode = insn_data[icode].operand[2].mode;
2596 if (!VECTOR_MODE_P (optab_op2_mode))
2598 if (vect_print_dump_info (REPORT_DETAILS))
2599 fprintf (vect_dump, "operand 1 using scalar mode.");
2601 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2604 /* Store vec_oprnd1 for every vector stmt to be created
2605 for SLP_NODE. We check during the analysis that all
2606 the shift arguments are the same.
2607 TODO: Allow different constants for different vector
2608 stmts generated for an SLP instance. */
2609 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2610 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2615 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2616 (a special case for certain kind of vector shifts); otherwise,
2617 operand 1 should be of a vector type (the usual case). */
2619 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2622 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2626 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2628 /* Arguments are ready. Create the new vector stmt. */
2629 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2631 vop1 = VEC_index (tree, vec_oprnds1, i);
2632 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2633 new_temp = make_ssa_name (vec_dest, new_stmt);
2634 gimple_assign_set_lhs (new_stmt, new_temp);
2635 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2637 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2644 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2646 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2647 prev_stmt_info = vinfo_for_stmt (new_stmt);
2650 VEC_free (tree, heap, vec_oprnds0);
2651 VEC_free (tree, heap, vec_oprnds1);
2657 /* Function vectorizable_operation.
2659 Check if STMT performs a binary, unary or ternary operation that can
2661 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2662 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2663 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2666 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2667 gimple *vec_stmt, slp_tree slp_node)
2671 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2674 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2675 enum tree_code code;
2676 enum machine_mode vec_mode;
2683 enum vect_def_type dt[3]
2684 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2685 gimple new_stmt = NULL;
2686 stmt_vec_info prev_stmt_info;
2692 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2693 tree vop0, vop1, vop2;
2694 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2697 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2700 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2703 /* Is STMT a vectorizable binary/unary operation? */
2704 if (!is_gimple_assign (stmt))
2707 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2710 code = gimple_assign_rhs_code (stmt);
2712 /* For pointer addition, we should use the normal plus for
2713 the vector addition. */
2714 if (code == POINTER_PLUS_EXPR)
2717 /* Support only unary or binary operations. */
2718 op_type = TREE_CODE_LENGTH (code);
2719 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2721 if (vect_print_dump_info (REPORT_DETAILS))
2722 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2727 scalar_dest = gimple_assign_lhs (stmt);
2728 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2730 /* Most operations cannot handle bit-precision types without extra
2732 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2733 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2734 /* Exception are bitwise binary operations. */
2735 && code != BIT_IOR_EXPR
2736 && code != BIT_XOR_EXPR
2737 && code != BIT_AND_EXPR)
2739 if (vect_print_dump_info (REPORT_DETAILS))
2740 fprintf (vect_dump, "bit-precision arithmetic not supported.");
2744 op0 = gimple_assign_rhs1 (stmt);
2745 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2746 &def_stmt, &def, &dt[0], &vectype))
2748 if (vect_print_dump_info (REPORT_DETAILS))
2749 fprintf (vect_dump, "use not simple.");
2752 /* If op0 is an external or constant def use a vector type with
2753 the same size as the output vector type. */
2755 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2757 gcc_assert (vectype);
2760 if (vect_print_dump_info (REPORT_DETAILS))
2762 fprintf (vect_dump, "no vectype for scalar type ");
2763 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2769 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2770 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2771 if (nunits_out != nunits_in)
2774 if (op_type == binary_op || op_type == ternary_op)
2776 op1 = gimple_assign_rhs2 (stmt);
2777 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2780 if (vect_print_dump_info (REPORT_DETAILS))
2781 fprintf (vect_dump, "use not simple.");
2785 if (op_type == ternary_op)
2787 op2 = gimple_assign_rhs3 (stmt);
2788 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2791 if (vect_print_dump_info (REPORT_DETAILS))
2792 fprintf (vect_dump, "use not simple.");
2798 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2802 /* Multiple types in SLP are handled by creating the appropriate number of
2803 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2805 if (slp_node || PURE_SLP_STMT (stmt_info))
2808 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2810 gcc_assert (ncopies >= 1);
2812 /* Shifts are handled in vectorizable_shift (). */
2813 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2814 || code == RROTATE_EXPR)
2817 optab = optab_for_tree_code (code, vectype, optab_default);
2819 /* Supportable by target? */
2822 if (vect_print_dump_info (REPORT_DETAILS))
2823 fprintf (vect_dump, "no optab.");
2826 vec_mode = TYPE_MODE (vectype);
2827 icode = (int) optab_handler (optab, vec_mode);
2828 if (icode == CODE_FOR_nothing)
2830 if (vect_print_dump_info (REPORT_DETAILS))
2831 fprintf (vect_dump, "op not supported by target.");
2832 /* Check only during analysis. */
2833 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2834 || (vf < vect_min_worthwhile_factor (code)
2837 if (vect_print_dump_info (REPORT_DETAILS))
2838 fprintf (vect_dump, "proceeding using word mode.");
2841 /* Worthwhile without SIMD support? Check only during analysis. */
2842 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2843 && vf < vect_min_worthwhile_factor (code)
2846 if (vect_print_dump_info (REPORT_DETAILS))
2847 fprintf (vect_dump, "not worthwhile without SIMD support.");
2851 if (!vec_stmt) /* transformation not required. */
2853 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2854 if (vect_print_dump_info (REPORT_DETAILS))
2855 fprintf (vect_dump, "=== vectorizable_operation ===");
2856 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2862 if (vect_print_dump_info (REPORT_DETAILS))
2863 fprintf (vect_dump, "transform binary/unary operation.");
2866 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2868 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2869 created in the previous stages of the recursion, so no allocation is
2870 needed, except for the case of shift with scalar shift argument. In that
2871 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2872 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2873 In case of loop-based vectorization we allocate VECs of size 1. We
2874 allocate VEC_OPRNDS1 only in case of binary operation. */
2877 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2878 if (op_type == binary_op || op_type == ternary_op)
2879 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2880 if (op_type == ternary_op)
2881 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2884 /* In case the vectorization factor (VF) is bigger than the number
2885 of elements that we can fit in a vectype (nunits), we have to generate
2886 more than one vector stmt - i.e - we need to "unroll" the
2887 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2888 from one copy of the vector stmt to the next, in the field
2889 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2890 stages to find the correct vector defs to be used when vectorizing
2891 stmts that use the defs of the current stmt. The example below
2892 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2893 we need to create 4 vectorized stmts):
2895 before vectorization:
2896 RELATED_STMT VEC_STMT
2900 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2902 RELATED_STMT VEC_STMT
2903 VS1_0: vx0 = memref0 VS1_1 -
2904 VS1_1: vx1 = memref1 VS1_2 -
2905 VS1_2: vx2 = memref2 VS1_3 -
2906 VS1_3: vx3 = memref3 - -
2907 S1: x = load - VS1_0
2910 step2: vectorize stmt S2 (done here):
2911 To vectorize stmt S2 we first need to find the relevant vector
2912 def for the first operand 'x'. This is, as usual, obtained from
2913 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2914 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2915 relevant vector def 'vx0'. Having found 'vx0' we can generate
2916 the vector stmt VS2_0, and as usual, record it in the
2917 STMT_VINFO_VEC_STMT of stmt S2.
2918 When creating the second copy (VS2_1), we obtain the relevant vector
2919 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2920 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2921 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2922 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2923 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2924 chain of stmts and pointers:
2925 RELATED_STMT VEC_STMT
2926 VS1_0: vx0 = memref0 VS1_1 -
2927 VS1_1: vx1 = memref1 VS1_2 -
2928 VS1_2: vx2 = memref2 VS1_3 -
2929 VS1_3: vx3 = memref3 - -
2930 S1: x = load - VS1_0
2931 VS2_0: vz0 = vx0 + v1 VS2_1 -
2932 VS2_1: vz1 = vx1 + v1 VS2_2 -
2933 VS2_2: vz2 = vx2 + v1 VS2_3 -
2934 VS2_3: vz3 = vx3 + v1 - -
2935 S2: z = x + 1 - VS2_0 */
2937 prev_stmt_info = NULL;
2938 for (j = 0; j < ncopies; j++)
2943 if (op_type == binary_op || op_type == ternary_op)
2944 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2947 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2949 if (op_type == ternary_op)
2951 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2952 VEC_quick_push (tree, vec_oprnds2,
2953 vect_get_vec_def_for_operand (op2, stmt, NULL));
2958 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2959 if (op_type == ternary_op)
2961 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2962 VEC_quick_push (tree, vec_oprnds2,
2963 vect_get_vec_def_for_stmt_copy (dt[2],
2968 /* Arguments are ready. Create the new vector stmt. */
2969 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2971 vop1 = ((op_type == binary_op || op_type == ternary_op)
2972 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2973 vop2 = ((op_type == ternary_op)
2974 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2975 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2977 new_temp = make_ssa_name (vec_dest, new_stmt);
2978 gimple_assign_set_lhs (new_stmt, new_temp);
2979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2981 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2988 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2991 prev_stmt_info = vinfo_for_stmt (new_stmt);
2994 VEC_free (tree, heap, vec_oprnds0);
2996 VEC_free (tree, heap, vec_oprnds1);
2998 VEC_free (tree, heap, vec_oprnds2);
3004 /* Get vectorized definitions for loop-based vectorization. For the first
3005 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3006 scalar operand), and for the rest we get a copy with
3007 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3008 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3009 The vectors are collected into VEC_OPRNDS. */
3012 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3013 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
3017 /* Get first vector operand. */
3018 /* All the vector operands except the very first one (that is scalar oprnd)
3020 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3021 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3023 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3025 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3027 /* Get second vector operand. */
3028 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3029 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3033 /* For conversion in multiple steps, continue to get operands
3036 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3040 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3041 For multi-step conversions store the resulting vectors and call the function
3045 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
3046 int multi_step_cvt, gimple stmt,
3047 VEC (tree, heap) *vec_dsts,
3048 gimple_stmt_iterator *gsi,
3049 slp_tree slp_node, enum tree_code code,
3050 stmt_vec_info *prev_stmt_info)
3053 tree vop0, vop1, new_tmp, vec_dest;
3055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3057 vec_dest = VEC_pop (tree, vec_dsts);
3059 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
3061 /* Create demotion operation. */
3062 vop0 = VEC_index (tree, *vec_oprnds, i);
3063 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
3064 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3065 new_tmp = make_ssa_name (vec_dest, new_stmt);
3066 gimple_assign_set_lhs (new_stmt, new_tmp);
3067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3070 /* Store the resulting vector for next recursive call. */
3071 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
3074 /* This is the last step of the conversion sequence. Store the
3075 vectors in SLP_NODE or in vector info of the scalar statement
3076 (or in STMT_VINFO_RELATED_STMT chain). */
3078 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3081 if (!*prev_stmt_info)
3082 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3084 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3086 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3091 /* For multi-step demotion operations we first generate demotion operations
3092 from the source type to the intermediate types, and then combine the
3093 results (stored in VEC_OPRNDS) in demotion operation to the destination
3097 /* At each level of recursion we have have of the operands we had at the
3099 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3100 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3101 stmt, vec_dsts, gsi, slp_node,
3102 code, prev_stmt_info);
3107 /* Function vectorizable_type_demotion
3109 Check if STMT performs a binary or unary operation that involves
3110 type demotion, and if it can be vectorized.
3111 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3112 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3113 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3116 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3117 gimple *vec_stmt, slp_tree slp_node)
3122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3124 enum tree_code code, code1 = ERROR_MARK;
3127 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3128 stmt_vec_info prev_stmt_info;
3135 int multi_step_cvt = 0;
3136 VEC (tree, heap) *vec_oprnds0 = NULL;
3137 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3138 tree last_oprnd, intermediate_type;
3139 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3141 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3144 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3147 /* Is STMT a vectorizable type-demotion operation? */
3148 if (!is_gimple_assign (stmt))
3151 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3154 code = gimple_assign_rhs_code (stmt);
3155 if (!CONVERT_EXPR_CODE_P (code))
3158 scalar_dest = gimple_assign_lhs (stmt);
3159 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3161 /* Check the operands of the operation. */
3162 op0 = gimple_assign_rhs1 (stmt);
3163 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3164 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3165 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3166 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
3169 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3170 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3171 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3172 || ((TYPE_PRECISION (TREE_TYPE (op0))
3173 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3175 if (vect_print_dump_info (REPORT_DETAILS))
3176 fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
3180 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3181 &def_stmt, &def, &dt[0], &vectype_in))
3183 if (vect_print_dump_info (REPORT_DETAILS))
3184 fprintf (vect_dump, "use not simple.");
3187 /* If op0 is an external def use a vector type with the
3188 same size as the output vector type if possible. */
3190 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3192 gcc_assert (vectype_in);
3195 if (vect_print_dump_info (REPORT_DETAILS))
3197 fprintf (vect_dump, "no vectype for scalar type ");
3198 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3204 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3205 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3206 if (nunits_in >= nunits_out)
3209 /* Multiple types in SLP are handled by creating the appropriate number of
3210 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3212 if (slp_node || PURE_SLP_STMT (stmt_info))
3215 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3216 gcc_assert (ncopies >= 1);
3218 /* Supportable by target? */
3219 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3220 &code1, &multi_step_cvt, &interm_types))
3223 if (!vec_stmt) /* transformation not required. */
3225 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3226 if (vect_print_dump_info (REPORT_DETAILS))
3227 fprintf (vect_dump, "=== vectorizable_demotion ===");
3228 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3233 if (vect_print_dump_info (REPORT_DETAILS))
3234 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3237 /* In case of multi-step demotion, we first generate demotion operations to
3238 the intermediate types, and then from that types to the final one.
3239 We create vector destinations for the intermediate type (TYPES) received
3240 from supportable_narrowing_operation, and store them in the correct order
3241 for future use in vect_create_vectorized_demotion_stmts(). */
3243 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3245 vec_dsts = VEC_alloc (tree, heap, 1);
3247 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3248 VEC_quick_push (tree, vec_dsts, vec_dest);
3252 for (i = VEC_length (tree, interm_types) - 1;
3253 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3255 vec_dest = vect_create_destination_var (scalar_dest,
3257 VEC_quick_push (tree, vec_dsts, vec_dest);
3261 /* In case the vectorization factor (VF) is bigger than the number
3262 of elements that we can fit in a vectype (nunits), we have to generate
3263 more than one vector stmt - i.e - we need to "unroll" the
3264 vector stmt by a factor VF/nunits. */
3266 prev_stmt_info = NULL;
3267 for (j = 0; j < ncopies; j++)
3271 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3274 VEC_free (tree, heap, vec_oprnds0);
3275 vec_oprnds0 = VEC_alloc (tree, heap,
3276 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3277 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3278 vect_pow2 (multi_step_cvt) - 1);
3281 /* Arguments are ready. Create the new vector stmts. */
3282 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3283 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3284 multi_step_cvt, stmt, tmp_vec_dsts,
3285 gsi, slp_node, code1,
3289 VEC_free (tree, heap, vec_oprnds0);
3290 VEC_free (tree, heap, vec_dsts);
3291 VEC_free (tree, heap, tmp_vec_dsts);
3292 VEC_free (tree, heap, interm_types);
3294 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3299 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3300 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3301 the resulting vectors and call the function recursively. */
3304 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3305 VEC (tree, heap) **vec_oprnds1,
3306 int multi_step_cvt, gimple stmt,
3307 VEC (tree, heap) *vec_dsts,
3308 gimple_stmt_iterator *gsi,
3309 slp_tree slp_node, enum tree_code code1,
3310 enum tree_code code2, tree decl1,
3311 tree decl2, int op_type,
3312 stmt_vec_info *prev_stmt_info)
3315 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3316 gimple new_stmt1, new_stmt2;
3317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3318 VEC (tree, heap) *vec_tmp;
3320 vec_dest = VEC_pop (tree, vec_dsts);
3321 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3323 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3325 if (op_type == binary_op)
3326 vop1 = VEC_index (tree, *vec_oprnds1, i);
3330 /* Generate the two halves of promotion operation. */
3331 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3332 op_type, vec_dest, gsi, stmt);
3333 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3334 op_type, vec_dest, gsi, stmt);
3335 if (is_gimple_call (new_stmt1))
3337 new_tmp1 = gimple_call_lhs (new_stmt1);
3338 new_tmp2 = gimple_call_lhs (new_stmt2);
3342 new_tmp1 = gimple_assign_lhs (new_stmt1);
3343 new_tmp2 = gimple_assign_lhs (new_stmt2);
3348 /* Store the results for the recursive call. */
3349 VEC_quick_push (tree, vec_tmp, new_tmp1);
3350 VEC_quick_push (tree, vec_tmp, new_tmp2);
3354 /* Last step of promotion sequience - store the results. */
3357 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3358 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3362 if (!*prev_stmt_info)
3363 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3365 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3367 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3368 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3369 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3376 /* For multi-step promotion operation we first generate we call the
3377 function recurcively for every stage. We start from the input type,
3378 create promotion operations to the intermediate types, and then
3379 create promotions to the output type. */
3380 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3381 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3382 multi_step_cvt - 1, stmt,
3383 vec_dsts, gsi, slp_node, code1,
3384 code2, decl2, decl2, op_type,
3388 VEC_free (tree, heap, vec_tmp);
3392 /* Function vectorizable_type_promotion
3394 Check if STMT performs a binary or unary operation that involves
3395 type promotion, and if it can be vectorized.
3396 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3397 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3398 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3401 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3402 gimple *vec_stmt, slp_tree slp_node)
3406 tree op0, op1 = NULL;
3407 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3409 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3410 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3411 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3415 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3416 stmt_vec_info prev_stmt_info;
3423 tree intermediate_type = NULL_TREE;
3424 int multi_step_cvt = 0;
3425 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3426 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3427 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3430 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3433 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3436 /* Is STMT a vectorizable type-promotion operation? */
3437 if (!is_gimple_assign (stmt))
3440 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3443 code = gimple_assign_rhs_code (stmt);
3444 if (!CONVERT_EXPR_CODE_P (code)
3445 && code != WIDEN_MULT_EXPR
3446 && code != WIDEN_LSHIFT_EXPR)
3449 scalar_dest = gimple_assign_lhs (stmt);
3450 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3452 /* Check the operands of the operation. */
3453 op0 = gimple_assign_rhs1 (stmt);
3454 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3455 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3456 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3457 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3458 && CONVERT_EXPR_CODE_P (code))))
3461 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3462 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3463 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3464 || ((TYPE_PRECISION (TREE_TYPE (op0))
3465 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3467 if (vect_print_dump_info (REPORT_DETAILS))
3468 fprintf (vect_dump, "type promotion to/from bit-precision "
3473 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3474 &def_stmt, &def, &dt[0], &vectype_in))
3476 if (vect_print_dump_info (REPORT_DETAILS))
3477 fprintf (vect_dump, "use not simple.");
3481 op_type = TREE_CODE_LENGTH (code);
3482 if (op_type == binary_op)
3486 op1 = gimple_assign_rhs2 (stmt);
3487 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3489 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3491 if (CONSTANT_CLASS_P (op0))
3492 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3493 &def_stmt, &def, &dt[1], &vectype_in);
3495 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3500 if (vect_print_dump_info (REPORT_DETAILS))
3501 fprintf (vect_dump, "use not simple.");
3507 /* If op0 is an external or constant def use a vector type with
3508 the same size as the output vector type. */
3510 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3512 gcc_assert (vectype_in);
3515 if (vect_print_dump_info (REPORT_DETAILS))
3517 fprintf (vect_dump, "no vectype for scalar type ");
3518 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3524 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3525 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3526 if (nunits_in <= nunits_out)
3529 /* Multiple types in SLP are handled by creating the appropriate number of
3530 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3532 if (slp_node || PURE_SLP_STMT (stmt_info))
3535 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3537 gcc_assert (ncopies >= 1);
3539 /* Supportable by target? */
3540 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3541 &decl1, &decl2, &code1, &code2,
3542 &multi_step_cvt, &interm_types))
3545 /* Binary widening operation can only be supported directly by the
3547 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3549 if (!vec_stmt) /* transformation not required. */
3551 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3552 if (vect_print_dump_info (REPORT_DETAILS))
3553 fprintf (vect_dump, "=== vectorizable_promotion ===");
3554 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3560 if (vect_print_dump_info (REPORT_DETAILS))
3561 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3564 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3566 if (CONSTANT_CLASS_P (op0))
3567 op0 = fold_convert (TREE_TYPE (op1), op0);
3568 else if (CONSTANT_CLASS_P (op1))
3569 op1 = fold_convert (TREE_TYPE (op0), op1);
3573 /* In case of multi-step promotion, we first generate promotion operations
3574 to the intermediate types, and then from that types to the final one.
3575 We store vector destination in VEC_DSTS in the correct order for
3576 recursive creation of promotion operations in
3577 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3578 according to TYPES recieved from supportable_widening_operation(). */
3580 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3582 vec_dsts = VEC_alloc (tree, heap, 1);
3584 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3585 VEC_quick_push (tree, vec_dsts, vec_dest);
3589 for (i = VEC_length (tree, interm_types) - 1;
3590 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3592 vec_dest = vect_create_destination_var (scalar_dest,
3594 VEC_quick_push (tree, vec_dsts, vec_dest);
3600 vec_oprnds0 = VEC_alloc (tree, heap,
3601 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3602 if (op_type == binary_op)
3603 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3605 else if (code == WIDEN_LSHIFT_EXPR)
3606 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3608 /* In case the vectorization factor (VF) is bigger than the number
3609 of elements that we can fit in a vectype (nunits), we have to generate
3610 more than one vector stmt - i.e - we need to "unroll" the
3611 vector stmt by a factor VF/nunits. */
3613 prev_stmt_info = NULL;
3614 for (j = 0; j < ncopies; j++)
3621 if (code == WIDEN_LSHIFT_EXPR)
3624 /* Store vec_oprnd1 for every vector stmt to be created
3625 for SLP_NODE. We check during the analysis that all
3626 the shift arguments are the same. */
3627 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3628 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3630 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
3634 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3639 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3640 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3641 if (op_type == binary_op)
3643 if (code == WIDEN_LSHIFT_EXPR)
3646 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3647 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3653 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3654 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3655 if (op_type == binary_op)
3657 if (code == WIDEN_LSHIFT_EXPR)
3660 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3661 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3665 /* Arguments are ready. Create the new vector stmts. */
3666 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3667 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3668 multi_step_cvt, stmt,
3670 gsi, slp_node, code1, code2,
3671 decl1, decl2, op_type,
3675 VEC_free (tree, heap, vec_dsts);
3676 VEC_free (tree, heap, tmp_vec_dsts);
3677 VEC_free (tree, heap, interm_types);
3678 VEC_free (tree, heap, vec_oprnds0);
3679 VEC_free (tree, heap, vec_oprnds1);
3681 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3686 /* Function vectorizable_store.
3688 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3690 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3691 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3692 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3695 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3701 tree vec_oprnd = NULL_TREE;
3702 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3703 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3704 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3706 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3707 struct loop *loop = NULL;
3708 enum machine_mode vec_mode;
3710 enum dr_alignment_support alignment_support_scheme;
3713 enum vect_def_type dt;
3714 stmt_vec_info prev_stmt_info = NULL;
3715 tree dataref_ptr = NULL_TREE;
3716 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3719 gimple next_stmt, first_stmt = NULL;
3720 bool strided_store = false;
3721 bool store_lanes_p = false;
3722 unsigned int group_size, i;
3723 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3725 VEC(tree,heap) *vec_oprnds = NULL;
3726 bool slp = (slp_node != NULL);
3727 unsigned int vec_num;
3728 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3732 loop = LOOP_VINFO_LOOP (loop_vinfo);
3734 /* Multiple types in SLP are handled by creating the appropriate number of
3735 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3737 if (slp || PURE_SLP_STMT (stmt_info))
3740 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3742 gcc_assert (ncopies >= 1);
3744 /* FORNOW. This restriction should be relaxed. */
3745 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3747 if (vect_print_dump_info (REPORT_DETAILS))
3748 fprintf (vect_dump, "multiple types in nested loop.");
3752 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3755 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3758 /* Is vectorizable store? */
3760 if (!is_gimple_assign (stmt))
3763 scalar_dest = gimple_assign_lhs (stmt);
3764 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3765 && is_pattern_stmt_p (stmt_info))
3766 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3767 if (TREE_CODE (scalar_dest) != ARRAY_REF
3768 && TREE_CODE (scalar_dest) != INDIRECT_REF
3769 && TREE_CODE (scalar_dest) != COMPONENT_REF
3770 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3771 && TREE_CODE (scalar_dest) != REALPART_EXPR
3772 && TREE_CODE (scalar_dest) != MEM_REF)
3775 gcc_assert (gimple_assign_single_p (stmt));
3776 op = gimple_assign_rhs1 (stmt);
3777 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3779 if (vect_print_dump_info (REPORT_DETAILS))
3780 fprintf (vect_dump, "use not simple.");
3784 elem_type = TREE_TYPE (vectype);
3785 vec_mode = TYPE_MODE (vectype);
3787 /* FORNOW. In some cases can vectorize even if data-type not supported
3788 (e.g. - array initialization with 0). */
3789 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3792 if (!STMT_VINFO_DATA_REF (stmt_info))
3795 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3797 if (vect_print_dump_info (REPORT_DETAILS))
3798 fprintf (vect_dump, "negative step for store.");
3802 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3804 strided_store = true;
3805 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3806 if (!slp && !PURE_SLP_STMT (stmt_info))
3808 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3809 if (vect_store_lanes_supported (vectype, group_size))
3810 store_lanes_p = true;
3811 else if (!vect_strided_store_supported (vectype, group_size))
3815 if (first_stmt == stmt)
3817 /* STMT is the leader of the group. Check the operands of all the
3818 stmts of the group. */
3819 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3822 gcc_assert (gimple_assign_single_p (next_stmt));
3823 op = gimple_assign_rhs1 (next_stmt);
3824 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3827 if (vect_print_dump_info (REPORT_DETAILS))
3828 fprintf (vect_dump, "use not simple.");
3831 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3836 if (!vec_stmt) /* transformation not required. */
3838 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3839 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3848 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3850 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3853 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3855 /* We vectorize all the stmts of the interleaving group when we
3856 reach the last stmt in the group. */
3857 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3858 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3867 strided_store = false;
3868 /* VEC_NUM is the number of vect stmts to be created for this
3870 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3871 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3872 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3875 /* VEC_NUM is the number of vect stmts to be created for this
3877 vec_num = group_size;
3883 group_size = vec_num = 1;
3886 if (vect_print_dump_info (REPORT_DETAILS))
3887 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3889 dr_chain = VEC_alloc (tree, heap, group_size);
3890 oprnds = VEC_alloc (tree, heap, group_size);
3892 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3893 gcc_assert (alignment_support_scheme);
3894 /* Targets with store-lane instructions must not require explicit
3896 gcc_assert (!store_lanes_p
3897 || alignment_support_scheme == dr_aligned
3898 || alignment_support_scheme == dr_unaligned_supported);
3901 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3903 aggr_type = vectype;
3905 /* In case the vectorization factor (VF) is bigger than the number
3906 of elements that we can fit in a vectype (nunits), we have to generate
3907 more than one vector stmt - i.e - we need to "unroll" the
3908 vector stmt by a factor VF/nunits. For more details see documentation in
3909 vect_get_vec_def_for_copy_stmt. */
3911 /* In case of interleaving (non-unit strided access):
3918 We create vectorized stores starting from base address (the access of the
3919 first stmt in the chain (S2 in the above example), when the last store stmt
3920 of the chain (S4) is reached:
3923 VS2: &base + vec_size*1 = vx0
3924 VS3: &base + vec_size*2 = vx1
3925 VS4: &base + vec_size*3 = vx3
3927 Then permutation statements are generated:
3929 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3930 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3933 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3934 (the order of the data-refs in the output of vect_permute_store_chain
3935 corresponds to the order of scalar stmts in the interleaving chain - see
3936 the documentation of vect_permute_store_chain()).
3938 In case of both multiple types and interleaving, above vector stores and
3939 permutation stmts are created for every copy. The result vector stmts are
3940 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3941 STMT_VINFO_RELATED_STMT for the next copies.
3944 prev_stmt_info = NULL;
3945 for (j = 0; j < ncopies; j++)
3954 /* Get vectorized arguments for SLP_NODE. */
3955 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3958 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3962 /* For interleaved stores we collect vectorized defs for all the
3963 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3964 used as an input to vect_permute_store_chain(), and OPRNDS as
3965 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3967 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3968 OPRNDS are of size 1. */
3969 next_stmt = first_stmt;
3970 for (i = 0; i < group_size; i++)
3972 /* Since gaps are not supported for interleaved stores,
3973 GROUP_SIZE is the exact number of stmts in the chain.
3974 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3975 there is no interleaving, GROUP_SIZE is 1, and only one
3976 iteration of the loop will be executed. */
3977 gcc_assert (next_stmt
3978 && gimple_assign_single_p (next_stmt));
3979 op = gimple_assign_rhs1 (next_stmt);
3981 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3983 VEC_quick_push(tree, dr_chain, vec_oprnd);
3984 VEC_quick_push(tree, oprnds, vec_oprnd);
3985 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3989 /* We should have catched mismatched types earlier. */
3990 gcc_assert (useless_type_conversion_p (vectype,
3991 TREE_TYPE (vec_oprnd)));
3992 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3993 NULL_TREE, &dummy, gsi,
3994 &ptr_incr, false, &inv_p);
3995 gcc_assert (bb_vinfo || !inv_p);
3999 /* For interleaved stores we created vectorized defs for all the
4000 defs stored in OPRNDS in the previous iteration (previous copy).
4001 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4002 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4004 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4005 OPRNDS are of size 1. */
4006 for (i = 0; i < group_size; i++)
4008 op = VEC_index (tree, oprnds, i);
4009 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
4011 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4012 VEC_replace(tree, dr_chain, i, vec_oprnd);
4013 VEC_replace(tree, oprnds, i, vec_oprnd);
4015 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4016 TYPE_SIZE_UNIT (aggr_type));
4023 /* Combine all the vectors into an array. */
4024 vec_array = create_vector_array (vectype, vec_num);
4025 for (i = 0; i < vec_num; i++)
4027 vec_oprnd = VEC_index (tree, dr_chain, i);
4028 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4032 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4033 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4034 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4035 gimple_call_set_lhs (new_stmt, data_ref);
4036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4037 mark_symbols_for_renaming (new_stmt);
4044 result_chain = VEC_alloc (tree, heap, group_size);
4046 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4050 next_stmt = first_stmt;
4051 for (i = 0; i < vec_num; i++)
4053 struct ptr_info_def *pi;
4056 /* Bump the vector pointer. */
4057 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4061 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4062 else if (strided_store)
4063 /* For strided stores vectorized defs are interleaved in
4064 vect_permute_store_chain(). */
4065 vec_oprnd = VEC_index (tree, result_chain, i);
4067 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4068 build_int_cst (reference_alias_ptr_type
4069 (DR_REF (first_dr)), 0));
4070 pi = get_ptr_info (dataref_ptr);
4071 pi->align = TYPE_ALIGN_UNIT (vectype);
4072 if (aligned_access_p (first_dr))
4074 else if (DR_MISALIGNMENT (first_dr) == -1)
4076 TREE_TYPE (data_ref)
4077 = build_aligned_type (TREE_TYPE (data_ref),
4078 TYPE_ALIGN (elem_type));
4079 pi->align = TYPE_ALIGN_UNIT (elem_type);
4084 TREE_TYPE (data_ref)
4085 = build_aligned_type (TREE_TYPE (data_ref),
4086 TYPE_ALIGN (elem_type));
4087 pi->misalign = DR_MISALIGNMENT (first_dr);
4090 /* Arguments are ready. Create the new vector stmt. */
4091 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4092 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4093 mark_symbols_for_renaming (new_stmt);
4098 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4106 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4108 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4109 prev_stmt_info = vinfo_for_stmt (new_stmt);
4113 VEC_free (tree, heap, dr_chain);
4114 VEC_free (tree, heap, oprnds);
4116 VEC_free (tree, heap, result_chain);
4118 VEC_free (tree, heap, vec_oprnds);
4123 /* Given a vector type VECTYPE returns a builtin DECL to be used
4124 for vector permutation and returns the mask that implements
4125 reversal of the vector elements. If that is impossible to do,
4129 perm_mask_for_reverse (tree vectype)
4131 tree mask_elt_type, mask_type, mask_vec;
4135 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4136 sel = XALLOCAVEC (unsigned char, nunits);
4138 for (i = 0; i < nunits; ++i)
4139 sel[i] = nunits - 1 - i;
4141 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4145 = lang_hooks.types.type_for_size
4146 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4147 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4150 for (i = 0; i < nunits; i++)
4151 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec);
4152 mask_vec = build_vector (mask_type, mask_vec);
4157 /* Given a vector variable X, that was generated for the scalar LHS of
4158 STMT, generate instructions to reverse the vector elements of X,
4159 insert them a *GSI and return the permuted vector variable. */
4162 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4164 tree vectype = TREE_TYPE (x);
4165 tree mask_vec, perm_dest, data_ref;
4168 mask_vec = perm_mask_for_reverse (vectype);
4170 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4172 /* Generate the permute statement. */
4173 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4175 data_ref = make_ssa_name (perm_dest, perm_stmt);
4176 gimple_set_lhs (perm_stmt, data_ref);
4177 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4182 /* vectorizable_load.
4184 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4186 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4187 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4188 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4191 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4192 slp_tree slp_node, slp_instance slp_node_instance)
4195 tree vec_dest = NULL;
4196 tree data_ref = NULL;
4197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4198 stmt_vec_info prev_stmt_info;
4199 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4200 struct loop *loop = NULL;
4201 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4202 bool nested_in_vect_loop = false;
4203 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4204 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4207 enum machine_mode mode;
4208 gimple new_stmt = NULL;
4210 enum dr_alignment_support alignment_support_scheme;
4211 tree dataref_ptr = NULL_TREE;
4213 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4215 int i, j, group_size;
4216 tree msq = NULL_TREE, lsq;
4217 tree offset = NULL_TREE;
4218 tree realignment_token = NULL_TREE;
4220 VEC(tree,heap) *dr_chain = NULL;
4221 bool strided_load = false;
4222 bool load_lanes_p = false;
4226 bool compute_in_loop = false;
4227 struct loop *at_loop;
4229 bool slp = (slp_node != NULL);
4230 bool slp_perm = false;
4231 enum tree_code code;
4232 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4238 loop = LOOP_VINFO_LOOP (loop_vinfo);
4239 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4240 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4245 /* Multiple types in SLP are handled by creating the appropriate number of
4246 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4248 if (slp || PURE_SLP_STMT (stmt_info))
4251 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4253 gcc_assert (ncopies >= 1);
4255 /* FORNOW. This restriction should be relaxed. */
4256 if (nested_in_vect_loop && ncopies > 1)
4258 if (vect_print_dump_info (REPORT_DETAILS))
4259 fprintf (vect_dump, "multiple types in nested loop.");
4263 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4266 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4269 /* Is vectorizable load? */
4270 if (!is_gimple_assign (stmt))
4273 scalar_dest = gimple_assign_lhs (stmt);
4274 if (TREE_CODE (scalar_dest) != SSA_NAME)
4277 code = gimple_assign_rhs_code (stmt);
4278 if (code != ARRAY_REF
4279 && code != INDIRECT_REF
4280 && code != COMPONENT_REF
4281 && code != IMAGPART_EXPR
4282 && code != REALPART_EXPR
4284 && TREE_CODE_CLASS (code) != tcc_declaration)
4287 if (!STMT_VINFO_DATA_REF (stmt_info))
4290 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4291 if (negative && ncopies > 1)
4293 if (vect_print_dump_info (REPORT_DETAILS))
4294 fprintf (vect_dump, "multiple types with negative step.");
4298 elem_type = TREE_TYPE (vectype);
4299 mode = TYPE_MODE (vectype);
4301 /* FORNOW. In some cases can vectorize even if data-type not supported
4302 (e.g. - data copies). */
4303 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4305 if (vect_print_dump_info (REPORT_DETAILS))
4306 fprintf (vect_dump, "Aligned load, but unsupported type.");
4310 /* Check if the load is a part of an interleaving chain. */
4311 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4313 strided_load = true;
4315 gcc_assert (! nested_in_vect_loop);
4317 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4318 if (!slp && !PURE_SLP_STMT (stmt_info))
4320 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4321 if (vect_load_lanes_supported (vectype, group_size))
4322 load_lanes_p = true;
4323 else if (!vect_strided_load_supported (vectype, group_size))
4330 gcc_assert (!strided_load);
4331 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4332 if (alignment_support_scheme != dr_aligned
4333 && alignment_support_scheme != dr_unaligned_supported)
4335 if (vect_print_dump_info (REPORT_DETAILS))
4336 fprintf (vect_dump, "negative step but alignment required.");
4339 if (!perm_mask_for_reverse (vectype))
4341 if (vect_print_dump_info (REPORT_DETAILS))
4342 fprintf (vect_dump, "negative step and reversing not supported.");
4347 if (!vec_stmt) /* transformation not required. */
4349 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4350 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4354 if (vect_print_dump_info (REPORT_DETAILS))
4355 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4361 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4363 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4364 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4365 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4367 /* Check if the chain of loads is already vectorized. */
4368 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4370 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4373 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4374 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4376 /* VEC_NUM is the number of vect stmts to be created for this group. */
4379 strided_load = false;
4380 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4381 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4385 vec_num = group_size;
4391 group_size = vec_num = 1;
4394 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4395 gcc_assert (alignment_support_scheme);
4396 /* Targets with load-lane instructions must not require explicit
4398 gcc_assert (!load_lanes_p
4399 || alignment_support_scheme == dr_aligned
4400 || alignment_support_scheme == dr_unaligned_supported);
4402 /* In case the vectorization factor (VF) is bigger than the number
4403 of elements that we can fit in a vectype (nunits), we have to generate
4404 more than one vector stmt - i.e - we need to "unroll" the
4405 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4406 from one copy of the vector stmt to the next, in the field
4407 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4408 stages to find the correct vector defs to be used when vectorizing
4409 stmts that use the defs of the current stmt. The example below
4410 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4411 need to create 4 vectorized stmts):
4413 before vectorization:
4414 RELATED_STMT VEC_STMT
4418 step 1: vectorize stmt S1:
4419 We first create the vector stmt VS1_0, and, as usual, record a
4420 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4421 Next, we create the vector stmt VS1_1, and record a pointer to
4422 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4423 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4425 RELATED_STMT VEC_STMT
4426 VS1_0: vx0 = memref0 VS1_1 -
4427 VS1_1: vx1 = memref1 VS1_2 -
4428 VS1_2: vx2 = memref2 VS1_3 -
4429 VS1_3: vx3 = memref3 - -
4430 S1: x = load - VS1_0
4433 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4434 information we recorded in RELATED_STMT field is used to vectorize
4437 /* In case of interleaving (non-unit strided access):
4444 Vectorized loads are created in the order of memory accesses
4445 starting from the access of the first stmt of the chain:
4448 VS2: vx1 = &base + vec_size*1
4449 VS3: vx3 = &base + vec_size*2
4450 VS4: vx4 = &base + vec_size*3
4452 Then permutation statements are generated:
4454 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4455 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4458 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4459 (the order of the data-refs in the output of vect_permute_load_chain
4460 corresponds to the order of scalar stmts in the interleaving chain - see
4461 the documentation of vect_permute_load_chain()).
4462 The generation of permutation stmts and recording them in
4463 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4465 In case of both multiple types and interleaving, the vector loads and
4466 permutation stmts above are created for every copy. The result vector
4467 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4468 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4470 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4471 on a target that supports unaligned accesses (dr_unaligned_supported)
4472 we generate the following code:
4476 p = p + indx * vectype_size;
4481 Otherwise, the data reference is potentially unaligned on a target that
4482 does not support unaligned accesses (dr_explicit_realign_optimized) -
4483 then generate the following code, in which the data in each iteration is
4484 obtained by two vector loads, one from the previous iteration, and one
4485 from the current iteration:
4487 msq_init = *(floor(p1))
4488 p2 = initial_addr + VS - 1;
4489 realignment_token = call target_builtin;
4492 p2 = p2 + indx * vectype_size
4494 vec_dest = realign_load (msq, lsq, realignment_token)
4499 /* If the misalignment remains the same throughout the execution of the
4500 loop, we can create the init_addr and permutation mask at the loop
4501 preheader. Otherwise, it needs to be created inside the loop.
4502 This can only occur when vectorizing memory accesses in the inner-loop
4503 nested within an outer-loop that is being vectorized. */
4505 if (loop && nested_in_vect_loop_p (loop, stmt)
4506 && (TREE_INT_CST_LOW (DR_STEP (dr))
4507 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4509 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4510 compute_in_loop = true;
4513 if ((alignment_support_scheme == dr_explicit_realign_optimized
4514 || alignment_support_scheme == dr_explicit_realign)
4515 && !compute_in_loop)
4517 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4518 alignment_support_scheme, NULL_TREE,
4520 if (alignment_support_scheme == dr_explicit_realign_optimized)
4522 phi = SSA_NAME_DEF_STMT (msq);
4523 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4530 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4533 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4535 aggr_type = vectype;
4537 prev_stmt_info = NULL;
4538 for (j = 0; j < ncopies; j++)
4540 /* 1. Create the vector or array pointer update chain. */
4542 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4543 offset, &dummy, gsi,
4544 &ptr_incr, false, &inv_p);
4546 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4547 TYPE_SIZE_UNIT (aggr_type));
4549 if (strided_load || slp_perm)
4550 dr_chain = VEC_alloc (tree, heap, vec_num);
4556 vec_array = create_vector_array (vectype, vec_num);
4559 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4560 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4561 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4562 gimple_call_set_lhs (new_stmt, vec_array);
4563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4564 mark_symbols_for_renaming (new_stmt);
4566 /* Extract each vector into an SSA_NAME. */
4567 for (i = 0; i < vec_num; i++)
4569 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4571 VEC_quick_push (tree, dr_chain, new_temp);
4574 /* Record the mapping between SSA_NAMEs and statements. */
4575 vect_record_strided_load_vectors (stmt, dr_chain);
4579 for (i = 0; i < vec_num; i++)
4582 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4585 /* 2. Create the vector-load in the loop. */
4586 switch (alignment_support_scheme)
4589 case dr_unaligned_supported:
4591 struct ptr_info_def *pi;
4593 = build2 (MEM_REF, vectype, dataref_ptr,
4594 build_int_cst (reference_alias_ptr_type
4595 (DR_REF (first_dr)), 0));
4596 pi = get_ptr_info (dataref_ptr);
4597 pi->align = TYPE_ALIGN_UNIT (vectype);
4598 if (alignment_support_scheme == dr_aligned)
4600 gcc_assert (aligned_access_p (first_dr));
4603 else if (DR_MISALIGNMENT (first_dr) == -1)
4605 TREE_TYPE (data_ref)
4606 = build_aligned_type (TREE_TYPE (data_ref),
4607 TYPE_ALIGN (elem_type));
4608 pi->align = TYPE_ALIGN_UNIT (elem_type);
4613 TREE_TYPE (data_ref)
4614 = build_aligned_type (TREE_TYPE (data_ref),
4615 TYPE_ALIGN (elem_type));
4616 pi->misalign = DR_MISALIGNMENT (first_dr);
4620 case dr_explicit_realign:
4625 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4627 if (compute_in_loop)
4628 msq = vect_setup_realignment (first_stmt, gsi,
4630 dr_explicit_realign,
4633 new_stmt = gimple_build_assign_with_ops
4634 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4636 (TREE_TYPE (dataref_ptr),
4637 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4638 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4639 gimple_assign_set_lhs (new_stmt, ptr);
4640 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4642 = build2 (MEM_REF, vectype, ptr,
4643 build_int_cst (reference_alias_ptr_type
4644 (DR_REF (first_dr)), 0));
4645 vec_dest = vect_create_destination_var (scalar_dest,
4647 new_stmt = gimple_build_assign (vec_dest, data_ref);
4648 new_temp = make_ssa_name (vec_dest, new_stmt);
4649 gimple_assign_set_lhs (new_stmt, new_temp);
4650 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4651 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4655 bump = size_binop (MULT_EXPR, vs_minus_1,
4656 TYPE_SIZE_UNIT (elem_type));
4657 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4658 new_stmt = gimple_build_assign_with_ops
4659 (BIT_AND_EXPR, NULL_TREE, ptr,
4662 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4663 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4664 gimple_assign_set_lhs (new_stmt, ptr);
4665 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4667 = build2 (MEM_REF, vectype, ptr,
4668 build_int_cst (reference_alias_ptr_type
4669 (DR_REF (first_dr)), 0));
4672 case dr_explicit_realign_optimized:
4673 new_stmt = gimple_build_assign_with_ops
4674 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4676 (TREE_TYPE (dataref_ptr),
4677 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4678 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4680 gimple_assign_set_lhs (new_stmt, new_temp);
4681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4683 = build2 (MEM_REF, vectype, new_temp,
4684 build_int_cst (reference_alias_ptr_type
4685 (DR_REF (first_dr)), 0));
4690 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4691 new_stmt = gimple_build_assign (vec_dest, data_ref);
4692 new_temp = make_ssa_name (vec_dest, new_stmt);
4693 gimple_assign_set_lhs (new_stmt, new_temp);
4694 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4695 mark_symbols_for_renaming (new_stmt);
4697 /* 3. Handle explicit realignment if necessary/supported.
4699 vec_dest = realign_load (msq, lsq, realignment_token) */
4700 if (alignment_support_scheme == dr_explicit_realign_optimized
4701 || alignment_support_scheme == dr_explicit_realign)
4703 lsq = gimple_assign_lhs (new_stmt);
4704 if (!realignment_token)
4705 realignment_token = dataref_ptr;
4706 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4708 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4711 new_temp = make_ssa_name (vec_dest, new_stmt);
4712 gimple_assign_set_lhs (new_stmt, new_temp);
4713 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4715 if (alignment_support_scheme == dr_explicit_realign_optimized)
4718 if (i == vec_num - 1 && j == ncopies - 1)
4719 add_phi_arg (phi, lsq,
4720 loop_latch_edge (containing_loop),
4726 /* 4. Handle invariant-load. */
4727 if (inv_p && !bb_vinfo)
4730 gimple_stmt_iterator gsi2 = *gsi;
4731 gcc_assert (!strided_load);
4734 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4737 tem = fold_convert (TREE_TYPE (vectype), tem);
4738 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4742 vec_inv = build_vector_from_val (vectype, tem);
4743 new_temp = vect_init_vector (stmt, vec_inv,
4745 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4750 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4751 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4754 /* Collect vector loads and later create their permutation in
4755 vect_transform_strided_load (). */
4756 if (strided_load || slp_perm)
4757 VEC_quick_push (tree, dr_chain, new_temp);
4759 /* Store vector loads in the corresponding SLP_NODE. */
4760 if (slp && !slp_perm)
4761 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4766 if (slp && !slp_perm)
4771 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4772 slp_node_instance, false))
4774 VEC_free (tree, heap, dr_chain);
4783 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4784 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4789 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4791 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4792 prev_stmt_info = vinfo_for_stmt (new_stmt);
4796 VEC_free (tree, heap, dr_chain);
4802 /* Function vect_is_simple_cond.
4805 LOOP - the loop that is being vectorized.
4806 COND - Condition that is checked for simple use.
4809 *COMP_VECTYPE - the vector type for the comparison.
4811 Returns whether a COND can be vectorized. Checks whether
4812 condition operands are supportable using vec_is_simple_use. */
4815 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4819 enum vect_def_type dt;
4820 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4822 if (!COMPARISON_CLASS_P (cond))
4825 lhs = TREE_OPERAND (cond, 0);
4826 rhs = TREE_OPERAND (cond, 1);
4828 if (TREE_CODE (lhs) == SSA_NAME)
4830 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4831 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4835 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4836 && TREE_CODE (lhs) != FIXED_CST)
4839 if (TREE_CODE (rhs) == SSA_NAME)
4841 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4842 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4846 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4847 && TREE_CODE (rhs) != FIXED_CST)
4850 *comp_vectype = vectype1 ? vectype1 : vectype2;
4854 /* vectorizable_condition.
4856 Check if STMT is conditional modify expression that can be vectorized.
4857 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4858 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4861 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4862 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4863 else caluse if it is 2).
4865 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4868 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4869 gimple *vec_stmt, tree reduc_def, int reduc_index)
4871 tree scalar_dest = NULL_TREE;
4872 tree vec_dest = NULL_TREE;
4873 tree cond_expr, then_clause, else_clause;
4874 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4875 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4877 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4878 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4879 tree vec_compare, vec_cond_expr;
4881 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4883 enum vect_def_type dt, dts[4];
4884 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4885 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4886 enum tree_code code;
4887 stmt_vec_info prev_stmt_info = NULL;
4890 /* FORNOW: unsupported in basic block SLP. */
4891 gcc_assert (loop_vinfo);
4893 /* FORNOW: SLP not supported. */
4894 if (STMT_SLP_TYPE (stmt_info))
4897 gcc_assert (ncopies >= 1);
4898 if (reduc_index && ncopies > 1)
4899 return false; /* FORNOW */
4901 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4904 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4905 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4909 /* FORNOW: not yet supported. */
4910 if (STMT_VINFO_LIVE_P (stmt_info))
4912 if (vect_print_dump_info (REPORT_DETAILS))
4913 fprintf (vect_dump, "value used after loop.");
4917 /* Is vectorizable conditional operation? */
4918 if (!is_gimple_assign (stmt))
4921 code = gimple_assign_rhs_code (stmt);
4923 if (code != COND_EXPR)
4926 cond_expr = gimple_assign_rhs1 (stmt);
4927 then_clause = gimple_assign_rhs2 (stmt);
4928 else_clause = gimple_assign_rhs3 (stmt);
4930 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4934 if (TREE_CODE (then_clause) == SSA_NAME)
4936 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4937 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4938 &then_def_stmt, &def, &dt))
4941 else if (TREE_CODE (then_clause) != INTEGER_CST
4942 && TREE_CODE (then_clause) != REAL_CST
4943 && TREE_CODE (then_clause) != FIXED_CST)
4946 if (TREE_CODE (else_clause) == SSA_NAME)
4948 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4949 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4950 &else_def_stmt, &def, &dt))
4953 else if (TREE_CODE (else_clause) != INTEGER_CST
4954 && TREE_CODE (else_clause) != REAL_CST
4955 && TREE_CODE (else_clause) != FIXED_CST)
4960 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4961 return expand_vec_cond_expr_p (vectype, comp_vectype);
4967 scalar_dest = gimple_assign_lhs (stmt);
4968 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4970 /* Handle cond expr. */
4971 for (j = 0; j < ncopies; j++)
4978 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4980 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4981 NULL, >emp, &def, &dts[0]);
4983 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4985 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4986 NULL, >emp, &def, &dts[1]);
4987 if (reduc_index == 1)
4988 vec_then_clause = reduc_def;
4991 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4993 vect_is_simple_use (then_clause, loop_vinfo,
4994 NULL, >emp, &def, &dts[2]);
4996 if (reduc_index == 2)
4997 vec_else_clause = reduc_def;
5000 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5002 vect_is_simple_use (else_clause, loop_vinfo,
5003 NULL, >emp, &def, &dts[3]);
5008 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
5009 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
5010 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5012 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5016 /* Arguments are ready. Create the new vector stmt. */
5017 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5018 vec_cond_lhs, vec_cond_rhs);
5019 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5020 vec_compare, vec_then_clause, vec_else_clause);
5022 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5023 new_temp = make_ssa_name (vec_dest, new_stmt);
5024 gimple_assign_set_lhs (new_stmt, new_temp);
5025 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5027 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5029 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5031 prev_stmt_info = vinfo_for_stmt (new_stmt);
5038 /* Make sure the statement is vectorizable. */
5041 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5043 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5044 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5045 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5047 tree scalar_type, vectype;
5048 gimple pattern_stmt, pattern_def_stmt;
5050 if (vect_print_dump_info (REPORT_DETAILS))
5052 fprintf (vect_dump, "==> examining statement: ");
5053 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5056 if (gimple_has_volatile_ops (stmt))
5058 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5059 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5064 /* Skip stmts that do not need to be vectorized. In loops this is expected
5066 - the COND_EXPR which is the loop exit condition
5067 - any LABEL_EXPRs in the loop
5068 - computations that are used only for array indexing or loop control.
5069 In basic blocks we only analyze statements that are a part of some SLP
5070 instance, therefore, all the statements are relevant.
5072 Pattern statement need to be analyzed instead of the original statement
5073 if the original statement is not relevant. Otherwise, we analyze both
5076 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5077 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5078 && !STMT_VINFO_LIVE_P (stmt_info))
5080 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5082 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5083 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5085 /* Analyze PATTERN_STMT instead of the original stmt. */
5086 stmt = pattern_stmt;
5087 stmt_info = vinfo_for_stmt (pattern_stmt);
5088 if (vect_print_dump_info (REPORT_DETAILS))
5090 fprintf (vect_dump, "==> examining pattern statement: ");
5091 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5096 if (vect_print_dump_info (REPORT_DETAILS))
5097 fprintf (vect_dump, "irrelevant.");
5102 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5104 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5105 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5107 /* Analyze PATTERN_STMT too. */
5108 if (vect_print_dump_info (REPORT_DETAILS))
5110 fprintf (vect_dump, "==> examining pattern statement: ");
5111 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5114 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5118 if (is_pattern_stmt_p (stmt_info)
5119 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5120 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5121 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5123 /* Analyze def stmt of STMT if it's a pattern stmt. */
5124 if (vect_print_dump_info (REPORT_DETAILS))
5126 fprintf (vect_dump, "==> examining pattern def statement: ");
5127 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5130 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5135 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5137 case vect_internal_def:
5140 case vect_reduction_def:
5141 case vect_nested_cycle:
5142 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5143 || relevance == vect_used_in_outer_by_reduction
5144 || relevance == vect_unused_in_scope));
5147 case vect_induction_def:
5148 case vect_constant_def:
5149 case vect_external_def:
5150 case vect_unknown_def_type:
5157 gcc_assert (PURE_SLP_STMT (stmt_info));
5159 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5160 if (vect_print_dump_info (REPORT_DETAILS))
5162 fprintf (vect_dump, "get vectype for scalar type: ");
5163 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5166 vectype = get_vectype_for_scalar_type (scalar_type);
5169 if (vect_print_dump_info (REPORT_DETAILS))
5171 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5172 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5177 if (vect_print_dump_info (REPORT_DETAILS))
5179 fprintf (vect_dump, "vectype: ");
5180 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5183 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5186 if (STMT_VINFO_RELEVANT_P (stmt_info))
5188 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5189 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5190 *need_to_vectorize = true;
5195 && (STMT_VINFO_RELEVANT_P (stmt_info)
5196 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5197 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5198 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5199 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5200 || vectorizable_shift (stmt, NULL, NULL, NULL)
5201 || vectorizable_operation (stmt, NULL, NULL, NULL)
5202 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5203 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5204 || vectorizable_call (stmt, NULL, NULL)
5205 || vectorizable_store (stmt, NULL, NULL, NULL)
5206 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5207 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5211 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5212 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5213 || vectorizable_shift (stmt, NULL, NULL, node)
5214 || vectorizable_operation (stmt, NULL, NULL, node)
5215 || vectorizable_assignment (stmt, NULL, NULL, node)
5216 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5217 || vectorizable_store (stmt, NULL, NULL, node));
5222 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5224 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5225 fprintf (vect_dump, "supported: ");
5226 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5235 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5236 need extra handling, except for vectorizable reductions. */
5237 if (STMT_VINFO_LIVE_P (stmt_info)
5238 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5239 ok = vectorizable_live_operation (stmt, NULL, NULL);
5243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5245 fprintf (vect_dump, "not vectorized: live stmt not ");
5246 fprintf (vect_dump, "supported: ");
5247 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5257 /* Function vect_transform_stmt.
5259 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5262 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5263 bool *strided_store, slp_tree slp_node,
5264 slp_instance slp_node_instance)
5266 bool is_store = false;
5267 gimple vec_stmt = NULL;
5268 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5271 switch (STMT_VINFO_TYPE (stmt_info))
5273 case type_demotion_vec_info_type:
5274 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5278 case type_promotion_vec_info_type:
5279 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5283 case type_conversion_vec_info_type:
5284 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5288 case induc_vec_info_type:
5289 gcc_assert (!slp_node);
5290 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5294 case shift_vec_info_type:
5295 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5299 case op_vec_info_type:
5300 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5304 case assignment_vec_info_type:
5305 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5309 case load_vec_info_type:
5310 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5315 case store_vec_info_type:
5316 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5318 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5320 /* In case of interleaving, the whole chain is vectorized when the
5321 last store in the chain is reached. Store stmts before the last
5322 one are skipped, and there vec_stmt_info shouldn't be freed
5324 *strided_store = true;
5325 if (STMT_VINFO_VEC_STMT (stmt_info))
5332 case condition_vec_info_type:
5333 gcc_assert (!slp_node);
5334 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5338 case call_vec_info_type:
5339 gcc_assert (!slp_node);
5340 done = vectorizable_call (stmt, gsi, &vec_stmt);
5341 stmt = gsi_stmt (*gsi);
5344 case reduc_vec_info_type:
5345 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5350 if (!STMT_VINFO_LIVE_P (stmt_info))
5352 if (vect_print_dump_info (REPORT_DETAILS))
5353 fprintf (vect_dump, "stmt not supported.");
5358 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5359 is being vectorized, but outside the immediately enclosing loop. */
5361 && STMT_VINFO_LOOP_VINFO (stmt_info)
5362 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5363 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5364 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5365 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5366 || STMT_VINFO_RELEVANT (stmt_info) ==
5367 vect_used_in_outer_by_reduction))
5369 struct loop *innerloop = LOOP_VINFO_LOOP (
5370 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5371 imm_use_iterator imm_iter;
5372 use_operand_p use_p;
5376 if (vect_print_dump_info (REPORT_DETAILS))
5377 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5379 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5380 (to be used when vectorizing outer-loop stmts that use the DEF of
5382 if (gimple_code (stmt) == GIMPLE_PHI)
5383 scalar_dest = PHI_RESULT (stmt);
5385 scalar_dest = gimple_assign_lhs (stmt);
5387 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5389 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5391 exit_phi = USE_STMT (use_p);
5392 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5397 /* Handle stmts whose DEF is used outside the loop-nest that is
5398 being vectorized. */
5399 if (STMT_VINFO_LIVE_P (stmt_info)
5400 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5402 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5407 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5413 /* Remove a group of stores (for SLP or interleaving), free their
5417 vect_remove_stores (gimple first_stmt)
5419 gimple next = first_stmt;
5421 gimple_stmt_iterator next_si;
5425 /* Free the attached stmt_vec_info and remove the stmt. */
5426 next_si = gsi_for_stmt (next);
5427 gsi_remove (&next_si, true);
5428 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5429 free_stmt_vec_info (next);
5435 /* Function new_stmt_vec_info.
5437 Create and initialize a new stmt_vec_info struct for STMT. */
5440 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5441 bb_vec_info bb_vinfo)
5444 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5446 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5447 STMT_VINFO_STMT (res) = stmt;
5448 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5449 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5450 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5451 STMT_VINFO_LIVE_P (res) = false;
5452 STMT_VINFO_VECTYPE (res) = NULL;
5453 STMT_VINFO_VEC_STMT (res) = NULL;
5454 STMT_VINFO_VECTORIZABLE (res) = true;
5455 STMT_VINFO_IN_PATTERN_P (res) = false;
5456 STMT_VINFO_RELATED_STMT (res) = NULL;
5457 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5458 STMT_VINFO_DATA_REF (res) = NULL;
5460 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5461 STMT_VINFO_DR_OFFSET (res) = NULL;
5462 STMT_VINFO_DR_INIT (res) = NULL;
5463 STMT_VINFO_DR_STEP (res) = NULL;
5464 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5466 if (gimple_code (stmt) == GIMPLE_PHI
5467 && is_loop_header_bb_p (gimple_bb (stmt)))
5468 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5470 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5472 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5473 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5474 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5475 STMT_SLP_TYPE (res) = loop_vect;
5476 GROUP_FIRST_ELEMENT (res) = NULL;
5477 GROUP_NEXT_ELEMENT (res) = NULL;
5478 GROUP_SIZE (res) = 0;
5479 GROUP_STORE_COUNT (res) = 0;
5480 GROUP_GAP (res) = 0;
5481 GROUP_SAME_DR_STMT (res) = NULL;
5482 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5488 /* Create a hash table for stmt_vec_info. */
5491 init_stmt_vec_info_vec (void)
5493 gcc_assert (!stmt_vec_info_vec);
5494 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5498 /* Free hash table for stmt_vec_info. */
5501 free_stmt_vec_info_vec (void)
5503 gcc_assert (stmt_vec_info_vec);
5504 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5508 /* Free stmt vectorization related info. */
5511 free_stmt_vec_info (gimple stmt)
5513 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5518 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5519 set_vinfo_for_stmt (stmt, NULL);
5524 /* Function get_vectype_for_scalar_type_and_size.
5526 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5530 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5532 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5533 enum machine_mode simd_mode;
5534 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5541 /* We can't build a vector type of elements with alignment bigger than
5543 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5546 /* For vector types of elements whose mode precision doesn't
5547 match their types precision we use a element type of mode
5548 precision. The vectorization routines will have to make sure
5549 they support the proper result truncation/extension. */
5550 if (INTEGRAL_TYPE_P (scalar_type)
5551 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5552 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5553 TYPE_UNSIGNED (scalar_type));
5555 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5556 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5559 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5560 When the component mode passes the above test simply use a type
5561 corresponding to that mode. The theory is that any use that
5562 would cause problems with this will disable vectorization anyway. */
5563 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5564 && !INTEGRAL_TYPE_P (scalar_type)
5565 && !POINTER_TYPE_P (scalar_type))
5566 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5568 /* If no size was supplied use the mode the target prefers. Otherwise
5569 lookup a vector mode of the specified size. */
5571 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5573 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5574 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5578 vectype = build_vector_type (scalar_type, nunits);
5579 if (vect_print_dump_info (REPORT_DETAILS))
5581 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5582 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5588 if (vect_print_dump_info (REPORT_DETAILS))
5590 fprintf (vect_dump, "vectype: ");
5591 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5594 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5595 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5597 if (vect_print_dump_info (REPORT_DETAILS))
5598 fprintf (vect_dump, "mode not supported by target.");
5605 unsigned int current_vector_size;
5607 /* Function get_vectype_for_scalar_type.
5609 Returns the vector type corresponding to SCALAR_TYPE as supported
5613 get_vectype_for_scalar_type (tree scalar_type)
5616 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5617 current_vector_size);
5619 && current_vector_size == 0)
5620 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5624 /* Function get_same_sized_vectype
5626 Returns a vector type corresponding to SCALAR_TYPE of size
5627 VECTOR_TYPE if supported by the target. */
5630 get_same_sized_vectype (tree scalar_type, tree vector_type)
5632 return get_vectype_for_scalar_type_and_size
5633 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5636 /* Function vect_is_simple_use.
5639 LOOP_VINFO - the vect info of the loop that is being vectorized.
5640 BB_VINFO - the vect info of the basic block that is being vectorized.
5641 OPERAND - operand of a stmt in the loop or bb.
5642 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5644 Returns whether a stmt with OPERAND can be vectorized.
5645 For loops, supportable operands are constants, loop invariants, and operands
5646 that are defined by the current iteration of the loop. Unsupportable
5647 operands are those that are defined by a previous iteration of the loop (as
5648 is the case in reduction/induction computations).
5649 For basic blocks, supportable operands are constants and bb invariants.
5650 For now, operands defined outside the basic block are not supported. */
5653 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5654 bb_vec_info bb_vinfo, gimple *def_stmt,
5655 tree *def, enum vect_def_type *dt)
5658 stmt_vec_info stmt_vinfo;
5659 struct loop *loop = NULL;
5662 loop = LOOP_VINFO_LOOP (loop_vinfo);
5667 if (vect_print_dump_info (REPORT_DETAILS))
5669 fprintf (vect_dump, "vect_is_simple_use: operand ");
5670 print_generic_expr (vect_dump, operand, TDF_SLIM);
5673 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5675 *dt = vect_constant_def;
5679 if (is_gimple_min_invariant (operand))
5682 *dt = vect_external_def;
5686 if (TREE_CODE (operand) == PAREN_EXPR)
5688 if (vect_print_dump_info (REPORT_DETAILS))
5689 fprintf (vect_dump, "non-associatable copy.");
5690 operand = TREE_OPERAND (operand, 0);
5693 if (TREE_CODE (operand) != SSA_NAME)
5695 if (vect_print_dump_info (REPORT_DETAILS))
5696 fprintf (vect_dump, "not ssa-name.");
5700 *def_stmt = SSA_NAME_DEF_STMT (operand);
5701 if (*def_stmt == NULL)
5703 if (vect_print_dump_info (REPORT_DETAILS))
5704 fprintf (vect_dump, "no def_stmt.");
5708 if (vect_print_dump_info (REPORT_DETAILS))
5710 fprintf (vect_dump, "def_stmt: ");
5711 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5714 /* Empty stmt is expected only in case of a function argument.
5715 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5716 if (gimple_nop_p (*def_stmt))
5719 *dt = vect_external_def;
5723 bb = gimple_bb (*def_stmt);
5725 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5726 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5727 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5728 *dt = vect_external_def;
5731 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5732 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5735 if (*dt == vect_unknown_def_type)
5737 if (vect_print_dump_info (REPORT_DETAILS))
5738 fprintf (vect_dump, "Unsupported pattern.");
5742 if (vect_print_dump_info (REPORT_DETAILS))
5743 fprintf (vect_dump, "type of def: %d.",*dt);
5745 switch (gimple_code (*def_stmt))
5748 *def = gimple_phi_result (*def_stmt);
5752 *def = gimple_assign_lhs (*def_stmt);
5756 *def = gimple_call_lhs (*def_stmt);
5761 if (vect_print_dump_info (REPORT_DETAILS))
5762 fprintf (vect_dump, "unsupported defining stmt: ");
5769 /* Function vect_is_simple_use_1.
5771 Same as vect_is_simple_use_1 but also determines the vector operand
5772 type of OPERAND and stores it to *VECTYPE. If the definition of
5773 OPERAND is vect_uninitialized_def, vect_constant_def or
5774 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5775 is responsible to compute the best suited vector type for the
5779 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5780 bb_vec_info bb_vinfo, gimple *def_stmt,
5781 tree *def, enum vect_def_type *dt, tree *vectype)
5783 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5786 /* Now get a vector type if the def is internal, otherwise supply
5787 NULL_TREE and leave it up to the caller to figure out a proper
5788 type for the use stmt. */
5789 if (*dt == vect_internal_def
5790 || *dt == vect_induction_def
5791 || *dt == vect_reduction_def
5792 || *dt == vect_double_reduction_def
5793 || *dt == vect_nested_cycle)
5795 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5797 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5798 && !STMT_VINFO_RELEVANT (stmt_info)
5799 && !STMT_VINFO_LIVE_P (stmt_info))
5800 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5802 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5803 gcc_assert (*vectype != NULL_TREE);
5805 else if (*dt == vect_uninitialized_def
5806 || *dt == vect_constant_def
5807 || *dt == vect_external_def)
5808 *vectype = NULL_TREE;
5816 /* Function supportable_widening_operation
5818 Check whether an operation represented by the code CODE is a
5819 widening operation that is supported by the target platform in
5820 vector form (i.e., when operating on arguments of type VECTYPE_IN
5821 producing a result of type VECTYPE_OUT).
5823 Widening operations we currently support are NOP (CONVERT), FLOAT
5824 and WIDEN_MULT. This function checks if these operations are supported
5825 by the target platform either directly (via vector tree-codes), or via
5829 - CODE1 and CODE2 are codes of vector operations to be used when
5830 vectorizing the operation, if available.
5831 - DECL1 and DECL2 are decls of target builtin functions to be used
5832 when vectorizing the operation, if available. In this case,
5833 CODE1 and CODE2 are CALL_EXPR.
5834 - MULTI_STEP_CVT determines the number of required intermediate steps in
5835 case of multi-step conversion (like char->short->int - in that case
5836 MULTI_STEP_CVT will be 1).
5837 - INTERM_TYPES contains the intermediate type required to perform the
5838 widening operation (short in the above example). */
5841 supportable_widening_operation (enum tree_code code, gimple stmt,
5842 tree vectype_out, tree vectype_in,
5843 tree *decl1, tree *decl2,
5844 enum tree_code *code1, enum tree_code *code2,
5845 int *multi_step_cvt,
5846 VEC (tree, heap) **interm_types)
5848 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5849 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5850 struct loop *vect_loop = NULL;
5852 enum machine_mode vec_mode;
5853 enum insn_code icode1, icode2;
5854 optab optab1, optab2;
5855 tree vectype = vectype_in;
5856 tree wide_vectype = vectype_out;
5857 enum tree_code c1, c2;
5860 vect_loop = LOOP_VINFO_LOOP (loop_info);
5862 /* The result of a vectorized widening operation usually requires two vectors
5863 (because the widened results do not fit int one vector). The generated
5864 vector results would normally be expected to be generated in the same
5865 order as in the original scalar computation, i.e. if 8 results are
5866 generated in each vector iteration, they are to be organized as follows:
5867 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5869 However, in the special case that the result of the widening operation is
5870 used in a reduction computation only, the order doesn't matter (because
5871 when vectorizing a reduction we change the order of the computation).
5872 Some targets can take advantage of this and generate more efficient code.
5873 For example, targets like Altivec, that support widen_mult using a sequence
5874 of {mult_even,mult_odd} generate the following vectors:
5875 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5877 When vectorizing outer-loops, we execute the inner-loop sequentially
5878 (each vectorized inner-loop iteration contributes to VF outer-loop
5879 iterations in parallel). We therefore don't allow to change the order
5880 of the computation in the inner-loop during outer-loop vectorization. */
5883 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5884 && !nested_in_vect_loop_p (vect_loop, stmt))
5890 && code == WIDEN_MULT_EXPR
5891 && targetm.vectorize.builtin_mul_widen_even
5892 && targetm.vectorize.builtin_mul_widen_even (vectype)
5893 && targetm.vectorize.builtin_mul_widen_odd
5894 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5896 if (vect_print_dump_info (REPORT_DETAILS))
5897 fprintf (vect_dump, "Unordered widening operation detected.");
5899 *code1 = *code2 = CALL_EXPR;
5900 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5901 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5907 case WIDEN_MULT_EXPR:
5908 if (BYTES_BIG_ENDIAN)
5910 c1 = VEC_WIDEN_MULT_HI_EXPR;
5911 c2 = VEC_WIDEN_MULT_LO_EXPR;
5915 c2 = VEC_WIDEN_MULT_HI_EXPR;
5916 c1 = VEC_WIDEN_MULT_LO_EXPR;
5920 case WIDEN_LSHIFT_EXPR:
5921 if (BYTES_BIG_ENDIAN)
5923 c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
5924 c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
5928 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5929 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5934 if (BYTES_BIG_ENDIAN)
5936 c1 = VEC_UNPACK_HI_EXPR;
5937 c2 = VEC_UNPACK_LO_EXPR;
5941 c2 = VEC_UNPACK_HI_EXPR;
5942 c1 = VEC_UNPACK_LO_EXPR;
5947 if (BYTES_BIG_ENDIAN)
5949 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5950 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5954 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5955 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5959 case FIX_TRUNC_EXPR:
5960 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5961 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5962 computing the operation. */
5969 if (code == FIX_TRUNC_EXPR)
5971 /* The signedness is determined from output operand. */
5972 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5973 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5977 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5978 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5981 if (!optab1 || !optab2)
5984 vec_mode = TYPE_MODE (vectype);
5985 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5986 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5989 /* Check if it's a multi-step conversion that can be done using intermediate
5991 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5992 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5995 tree prev_type = vectype, intermediate_type;
5996 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5997 optab optab3, optab4;
5999 if (!CONVERT_EXPR_CODE_P (code))
6005 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6006 intermediate steps in promotion sequence. We try
6007 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6009 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6010 for (i = 0; i < 3; i++)
6012 intermediate_mode = insn_data[icode1].operand[0].mode;
6013 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6014 TYPE_UNSIGNED (prev_type));
6015 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6016 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6018 if (!optab3 || !optab4
6019 || ((icode1 = optab_handler (optab1, prev_mode))
6020 == CODE_FOR_nothing)
6021 || insn_data[icode1].operand[0].mode != intermediate_mode
6022 || ((icode2 = optab_handler (optab2, prev_mode))
6023 == CODE_FOR_nothing)
6024 || insn_data[icode2].operand[0].mode != intermediate_mode
6025 || ((icode1 = optab_handler (optab3, intermediate_mode))
6026 == CODE_FOR_nothing)
6027 || ((icode2 = optab_handler (optab4, intermediate_mode))
6028 == CODE_FOR_nothing))
6031 VEC_quick_push (tree, *interm_types, intermediate_type);
6032 (*multi_step_cvt)++;
6034 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6035 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6038 prev_type = intermediate_type;
6039 prev_mode = intermediate_mode;
6051 /* Function supportable_narrowing_operation
6053 Check whether an operation represented by the code CODE is a
6054 narrowing operation that is supported by the target platform in
6055 vector form (i.e., when operating on arguments of type VECTYPE_IN
6056 and producing a result of type VECTYPE_OUT).
6058 Narrowing operations we currently support are NOP (CONVERT) and
6059 FIX_TRUNC. This function checks if these operations are supported by
6060 the target platform directly via vector tree-codes.
6063 - CODE1 is the code of a vector operation to be used when
6064 vectorizing the operation, if available.
6065 - MULTI_STEP_CVT determines the number of required intermediate steps in
6066 case of multi-step conversion (like int->short->char - in that case
6067 MULTI_STEP_CVT will be 1).
6068 - INTERM_TYPES contains the intermediate type required to perform the
6069 narrowing operation (short in the above example). */
6072 supportable_narrowing_operation (enum tree_code code,
6073 tree vectype_out, tree vectype_in,
6074 enum tree_code *code1, int *multi_step_cvt,
6075 VEC (tree, heap) **interm_types)
6077 enum machine_mode vec_mode;
6078 enum insn_code icode1;
6079 optab optab1, interm_optab;
6080 tree vectype = vectype_in;
6081 tree narrow_vectype = vectype_out;
6083 tree intermediate_type, prev_type;
6089 c1 = VEC_PACK_TRUNC_EXPR;
6092 case FIX_TRUNC_EXPR:
6093 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6097 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6098 tree code and optabs used for computing the operation. */
6105 if (code == FIX_TRUNC_EXPR)
6106 /* The signedness is determined from output operand. */
6107 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6109 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6114 vec_mode = TYPE_MODE (vectype);
6115 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6118 /* Check if it's a multi-step conversion that can be done using intermediate
6120 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
6122 enum machine_mode intermediate_mode, prev_mode = vec_mode;
6125 prev_type = vectype;
6126 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6127 intermediate steps in promotion sequence. We try
6128 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6130 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6131 for (i = 0; i < 3; i++)
6133 intermediate_mode = insn_data[icode1].operand[0].mode;
6134 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6135 TYPE_UNSIGNED (prev_type));
6136 interm_optab = optab_for_tree_code (c1, intermediate_type,
6139 || ((icode1 = optab_handler (optab1, prev_mode))
6140 == CODE_FOR_nothing)
6141 || insn_data[icode1].operand[0].mode != intermediate_mode
6142 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6143 == CODE_FOR_nothing))
6146 VEC_quick_push (tree, *interm_types, intermediate_type);
6147 (*multi_step_cvt)++;
6149 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6152 prev_type = intermediate_type;
6153 prev_mode = intermediate_mode;