1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
335 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
339 Generally, LIVE_P and RELEVANT are used to define the liveness and
340 relevance info of the DEF_STMT of this USE:
341 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
342 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
344 - case 1: If USE is used only for address computations (e.g. array indexing),
345 which does not need to be directly vectorized, then the liveness/relevance
346 of the respective DEF_STMT is left unchanged.
347 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
348 skip DEF_STMT cause it had already been processed.
349 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
350 be modified accordingly.
352 Return true if everything is as expected. Return false otherwise. */
355 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
356 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
359 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
360 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
361 stmt_vec_info dstmt_vinfo;
362 basic_block bb, def_bb;
365 enum vect_def_type dt;
367 /* case 1: we are only interested in uses that need to be vectorized. Uses
368 that are used for address computation are not considered relevant. */
369 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
372 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
375 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
379 if (!def_stmt || gimple_nop_p (def_stmt))
382 def_bb = gimple_bb (def_stmt);
383 if (!flow_bb_inside_loop_p (loop, def_bb))
385 if (vect_print_dump_info (REPORT_DETAILS))
386 fprintf (vect_dump, "def_stmt is out of loop.");
390 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
391 DEF_STMT must have already been processed, because this should be the
392 only way that STMT, which is a reduction-phi, was put in the worklist,
393 as there should be no other uses for DEF_STMT in the loop. So we just
394 check that everything is as expected, and we are done. */
395 dstmt_vinfo = vinfo_for_stmt (def_stmt);
396 bb = gimple_bb (stmt);
397 if (gimple_code (stmt) == GIMPLE_PHI
398 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
399 && gimple_code (def_stmt) != GIMPLE_PHI
400 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
401 && bb->loop_father == def_bb->loop_father)
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
405 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
406 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
407 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
408 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
409 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
413 /* case 3a: outer-loop stmt defining an inner-loop stmt:
414 outer-loop-header-bb:
420 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
422 if (vect_print_dump_info (REPORT_DETAILS))
423 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
427 case vect_unused_in_scope:
428 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
429 vect_used_in_scope : vect_unused_in_scope;
432 case vect_used_in_outer_by_reduction:
433 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
434 relevant = vect_used_by_reduction;
437 case vect_used_in_outer:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_in_scope;
442 case vect_used_in_scope:
450 /* case 3b: inner-loop stmt defining an outer-loop stmt:
451 outer-loop-header-bb:
455 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
457 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
464 case vect_unused_in_scope:
465 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
467 vect_used_in_outer_by_reduction : vect_unused_in_scope;
470 case vect_used_by_reduction:
471 relevant = vect_used_in_outer_by_reduction;
474 case vect_used_in_scope:
475 relevant = vect_used_in_outer;
483 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
484 is_pattern_stmt_p (stmt_vinfo));
489 /* Function vect_mark_stmts_to_be_vectorized.
491 Not all stmts in the loop need to be vectorized. For example:
500 Stmt 1 and 3 do not need to be vectorized, because loop control and
501 addressing of vectorized data-refs are handled differently.
503 This pass detects such stmts. */
506 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
508 VEC(gimple,heap) *worklist;
509 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
510 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
511 unsigned int nbbs = loop->num_nodes;
512 gimple_stmt_iterator si;
515 stmt_vec_info stmt_vinfo;
519 enum vect_relevant relevant, tmp_relevant;
520 enum vect_def_type def_type;
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
525 worklist = VEC_alloc (gimple, heap, 64);
527 /* 1. Init worklist. */
528 for (i = 0; i < nbbs; i++)
531 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
534 if (vect_print_dump_info (REPORT_DETAILS))
536 fprintf (vect_dump, "init: phi relevant? ");
537 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
540 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
541 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
545 stmt = gsi_stmt (si);
546 if (vect_print_dump_info (REPORT_DETAILS))
548 fprintf (vect_dump, "init: stmt relevant? ");
549 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
552 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
553 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
557 /* 2. Process_worklist */
558 while (VEC_length (gimple, worklist) > 0)
563 stmt = VEC_pop (gimple, worklist);
564 if (vect_print_dump_info (REPORT_DETAILS))
566 fprintf (vect_dump, "worklist: examine stmt: ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
570 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
571 (DEF_STMT) as relevant/irrelevant and live/dead according to the
572 liveness and relevance properties of STMT. */
573 stmt_vinfo = vinfo_for_stmt (stmt);
574 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
575 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
577 /* Generally, the liveness and relevance properties of STMT are
578 propagated as is to the DEF_STMTs of its USEs:
579 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
580 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
582 One exception is when STMT has been identified as defining a reduction
583 variable; in this case we set the liveness/relevance as follows:
585 relevant = vect_used_by_reduction
586 This is because we distinguish between two kinds of relevant stmts -
587 those that are used by a reduction computation, and those that are
588 (also) used by a regular computation. This allows us later on to
589 identify stmts that are used solely by a reduction, and therefore the
590 order of the results that they produce does not have to be kept. */
592 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
593 tmp_relevant = relevant;
596 case vect_reduction_def:
597 switch (tmp_relevant)
599 case vect_unused_in_scope:
600 relevant = vect_used_by_reduction;
603 case vect_used_by_reduction:
604 if (gimple_code (stmt) == GIMPLE_PHI)
609 if (vect_print_dump_info (REPORT_DETAILS))
610 fprintf (vect_dump, "unsupported use of reduction.");
612 VEC_free (gimple, heap, worklist);
619 case vect_nested_cycle:
620 if (tmp_relevant != vect_unused_in_scope
621 && tmp_relevant != vect_used_in_outer_by_reduction
622 && tmp_relevant != vect_used_in_outer)
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "unsupported use of nested cycle.");
627 VEC_free (gimple, heap, worklist);
634 case vect_double_reduction_def:
635 if (tmp_relevant != vect_unused_in_scope
636 && tmp_relevant != vect_used_by_reduction)
638 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "unsupported use of double reduction.");
641 VEC_free (gimple, heap, worklist);
652 if (is_pattern_stmt_p (stmt_vinfo))
654 /* Pattern statements are not inserted into the code, so
655 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
656 have to scan the RHS or function arguments instead. */
657 if (is_gimple_assign (stmt))
659 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
660 tree op = gimple_assign_rhs1 (stmt);
663 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
665 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
666 live_p, relevant, &worklist, false)
667 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
668 live_p, relevant, &worklist, false))
670 VEC_free (gimple, heap, worklist);
675 for (; i < gimple_num_ops (stmt); i++)
677 op = gimple_op (stmt, i);
678 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
681 VEC_free (gimple, heap, worklist);
686 else if (is_gimple_call (stmt))
688 for (i = 0; i < gimple_call_num_args (stmt); i++)
690 tree arg = gimple_call_arg (stmt, i);
691 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
694 VEC_free (gimple, heap, worklist);
701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
703 tree op = USE_FROM_PTR (use_p);
704 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
707 VEC_free (gimple, heap, worklist);
712 if (STMT_VINFO_GATHER_P (stmt_vinfo))
715 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
717 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
720 VEC_free (gimple, heap, worklist);
724 } /* while worklist */
726 VEC_free (gimple, heap, worklist);
731 /* Get cost by calling cost target builtin. */
734 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
736 tree dummy_type = NULL;
739 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
744 /* Get cost for STMT. */
747 cost_for_stmt (gimple stmt)
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
751 switch (STMT_VINFO_TYPE (stmt_info))
753 case load_vec_info_type:
754 return vect_get_stmt_cost (scalar_load);
755 case store_vec_info_type:
756 return vect_get_stmt_cost (scalar_store);
757 case op_vec_info_type:
758 case condition_vec_info_type:
759 case assignment_vec_info_type:
760 case reduc_vec_info_type:
761 case induc_vec_info_type:
762 case type_promotion_vec_info_type:
763 case type_demotion_vec_info_type:
764 case type_conversion_vec_info_type:
765 case call_vec_info_type:
766 return vect_get_stmt_cost (scalar_stmt);
767 case undef_vec_info_type:
773 /* Function vect_model_simple_cost.
775 Models cost for simple operations, i.e. those that only emit ncopies of a
776 single op. Right now, this does not account for multiple insns that could
777 be generated for the single vector op. We will handle that shortly. */
780 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
781 enum vect_def_type *dt, slp_tree slp_node)
784 int inside_cost = 0, outside_cost = 0;
786 /* The SLP costs were already calculated during SLP tree build. */
787 if (PURE_SLP_STMT (stmt_info))
790 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
792 /* FORNOW: Assuming maximum 2 args per stmts. */
793 for (i = 0; i < 2; i++)
795 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
796 outside_cost += vect_get_stmt_cost (vector_stmt);
799 if (vect_print_dump_info (REPORT_COST))
800 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
801 "outside_cost = %d .", inside_cost, outside_cost);
803 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
804 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
805 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
809 /* Function vect_cost_strided_group_size
811 For strided load or store, return the group_size only if it is the first
812 load or store of a group, else return 1. This ensures that group size is
813 only returned once per group. */
816 vect_cost_strided_group_size (stmt_vec_info stmt_info)
818 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
820 if (first_stmt == STMT_VINFO_STMT (stmt_info))
821 return GROUP_SIZE (stmt_info);
827 /* Function vect_model_store_cost
829 Models cost for stores. In the case of strided accesses, one access
830 has the overhead of the strided access attributed to it. */
833 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
834 bool store_lanes_p, enum vect_def_type dt,
838 unsigned int inside_cost = 0, outside_cost = 0;
839 struct data_reference *first_dr;
842 /* The SLP costs were already calculated during SLP tree build. */
843 if (PURE_SLP_STMT (stmt_info))
846 if (dt == vect_constant_def || dt == vect_external_def)
847 outside_cost = vect_get_stmt_cost (scalar_to_vec);
849 /* Strided access? */
850 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
854 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
859 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
860 group_size = vect_cost_strided_group_size (stmt_info);
863 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
865 /* Not a strided access. */
869 first_dr = STMT_VINFO_DATA_REF (stmt_info);
872 /* We assume that the cost of a single store-lanes instruction is
873 equivalent to the cost of GROUP_SIZE separate stores. If a strided
874 access is instead being provided by a permute-and-store operation,
875 include the cost of the permutes. */
876 if (!store_lanes_p && group_size > 1)
878 /* Uses a high and low interleave operation for each needed permute. */
879 inside_cost = ncopies * exact_log2(group_size) * group_size
880 * vect_get_stmt_cost (vector_stmt);
882 if (vect_print_dump_info (REPORT_COST))
883 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
888 /* Costs of the stores. */
889 vect_get_store_cost (first_dr, ncopies, &inside_cost);
891 if (vect_print_dump_info (REPORT_COST))
892 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
893 "outside_cost = %d .", inside_cost, outside_cost);
895 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
896 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
897 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
901 /* Calculate cost of DR's memory access. */
903 vect_get_store_cost (struct data_reference *dr, int ncopies,
904 unsigned int *inside_cost)
906 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
908 switch (alignment_support_scheme)
912 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: aligned.");
920 case dr_unaligned_supported:
922 gimple stmt = DR_STMT (dr);
923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926 /* Here, we assign an additional cost for the unaligned store. */
927 *inside_cost += ncopies
928 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
929 vectype, DR_MISALIGNMENT (dr));
931 if (vect_print_dump_info (REPORT_COST))
932 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
944 /* Function vect_model_load_cost
946 Models cost for loads. In the case of strided accesses, the last access
947 has the overhead of the strided access attributed to it. Since unaligned
948 accesses are supported for loads, we also account for the costs of the
949 access scheme chosen. */
952 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
957 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
958 unsigned int inside_cost = 0, outside_cost = 0;
960 /* The SLP costs were already calculated during SLP tree build. */
961 if (PURE_SLP_STMT (stmt_info))
964 /* Strided accesses? */
965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
966 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
968 group_size = vect_cost_strided_group_size (stmt_info);
969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
971 /* Not a strided access. */
978 /* We assume that the cost of a single load-lanes instruction is
979 equivalent to the cost of GROUP_SIZE separate loads. If a strided
980 access is instead being provided by a load-and-permute operation,
981 include the cost of the permutes. */
982 if (!load_lanes_p && group_size > 1)
984 /* Uses an even and odd extract operations for each needed permute. */
985 inside_cost = ncopies * exact_log2(group_size) * group_size
986 * vect_get_stmt_cost (vector_stmt);
988 if (vect_print_dump_info (REPORT_COST))
989 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
993 /* The loads themselves. */
994 vect_get_load_cost (first_dr, ncopies,
995 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
997 &inside_cost, &outside_cost);
999 if (vect_print_dump_info (REPORT_COST))
1000 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1001 "outside_cost = %d .", inside_cost, outside_cost);
1003 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1004 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1005 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1009 /* Calculate cost of DR's memory access. */
1011 vect_get_load_cost (struct data_reference *dr, int ncopies,
1012 bool add_realign_cost, unsigned int *inside_cost,
1013 unsigned int *outside_cost)
1015 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1017 switch (alignment_support_scheme)
1021 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1023 if (vect_print_dump_info (REPORT_COST))
1024 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1028 case dr_unaligned_supported:
1030 gimple stmt = DR_STMT (dr);
1031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1034 /* Here, we assign an additional cost for the unaligned load. */
1035 *inside_cost += ncopies
1036 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1037 vectype, DR_MISALIGNMENT (dr));
1038 if (vect_print_dump_info (REPORT_COST))
1039 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1044 case dr_explicit_realign:
1046 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1047 + vect_get_stmt_cost (vector_stmt));
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += vect_get_stmt_cost (vector_stmt);
1057 case dr_explicit_realign_optimized:
1059 if (vect_print_dump_info (REPORT_COST))
1060 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1063 /* Unaligned software pipeline has a load of an address, an initial
1064 load, and possibly a mask operation to "prime" the loop. However,
1065 if this is an access in a group of loads, which provide strided
1066 access, then the above cost should only be considered for one
1067 access in the group. Inside the loop, there is a load op
1068 and a realignment op. */
1070 if (add_realign_cost)
1072 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1073 if (targetm.vectorize.builtin_mask_for_load)
1074 *outside_cost += vect_get_stmt_cost (vector_stmt);
1077 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1078 + vect_get_stmt_cost (vector_stmt));
1088 /* Function vect_init_vector.
1090 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1091 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1092 is not NULL. Otherwise, place the initialization at the loop preheader.
1093 Return the DEF of INIT_STMT.
1094 It will be used in the vectorization of STMT. */
1097 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1098 gimple_stmt_iterator *gsi)
1100 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1108 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1109 add_referenced_var (new_var);
1110 init_stmt = gimple_build_assign (new_var, vector_var);
1111 new_temp = make_ssa_name (new_var, init_stmt);
1112 gimple_assign_set_lhs (init_stmt, new_temp);
1115 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1122 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 if (nested_in_vect_loop_p (loop, stmt))
1127 pe = loop_preheader_edge (loop);
1128 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1129 gcc_assert (!new_bb);
1133 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1135 gimple_stmt_iterator gsi_bb_start;
1137 gcc_assert (bb_vinfo);
1138 bb = BB_VINFO_BB (bb_vinfo);
1139 gsi_bb_start = gsi_after_labels (bb);
1140 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1144 if (vect_print_dump_info (REPORT_DETAILS))
1146 fprintf (vect_dump, "created new init_stmt: ");
1147 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1150 vec_oprnd = gimple_assign_lhs (init_stmt);
1155 /* Function vect_get_vec_def_for_operand.
1157 OP is an operand in STMT. This function returns a (vector) def that will be
1158 used in the vectorized stmt for STMT.
1160 In the case that OP is an SSA_NAME which is defined in the loop, then
1161 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1163 In case OP is an invariant or constant, a new stmt that creates a vector def
1164 needs to be introduced. */
1167 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1172 stmt_vec_info def_stmt_info = NULL;
1173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1174 unsigned int nunits;
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1181 enum vect_def_type dt;
1185 if (vect_print_dump_info (REPORT_DETAILS))
1187 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1188 print_generic_expr (vect_dump, op, TDF_SLIM);
1191 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1193 gcc_assert (is_simple_use);
1194 if (vect_print_dump_info (REPORT_DETAILS))
1198 fprintf (vect_dump, "def = ");
1199 print_generic_expr (vect_dump, def, TDF_SLIM);
1203 fprintf (vect_dump, " def_stmt = ");
1204 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1210 /* Case 1: operand is a constant. */
1211 case vect_constant_def:
1213 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1214 gcc_assert (vector_type);
1215 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1220 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1224 vec_cst = build_vector_from_val (vector_type,
1225 fold_convert (TREE_TYPE (vector_type),
1227 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1230 /* Case 2: operand is defined outside the loop - loop invariant. */
1231 case vect_external_def:
1233 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1234 gcc_assert (vector_type);
1235 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1240 /* Create 'vec_inv = {inv,inv,..,inv}' */
1241 if (vect_print_dump_info (REPORT_DETAILS))
1242 fprintf (vect_dump, "Create vector_inv.");
1244 for (i = nunits - 1; i >= 0; --i)
1246 t = tree_cons (NULL_TREE, def, t);
1249 /* FIXME: use build_constructor directly. */
1250 vec_inv = build_constructor_from_list (vector_type, t);
1251 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1254 /* Case 3: operand is defined inside the loop. */
1255 case vect_internal_def:
1258 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1260 /* Get the def from the vectorized stmt. */
1261 def_stmt_info = vinfo_for_stmt (def_stmt);
1263 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1264 /* Get vectorized pattern statement. */
1266 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1267 && !STMT_VINFO_RELEVANT (def_stmt_info))
1268 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1269 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1270 gcc_assert (vec_stmt);
1271 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1272 vec_oprnd = PHI_RESULT (vec_stmt);
1273 else if (is_gimple_call (vec_stmt))
1274 vec_oprnd = gimple_call_lhs (vec_stmt);
1276 vec_oprnd = gimple_assign_lhs (vec_stmt);
1280 /* Case 4: operand is defined by a loop header phi - reduction */
1281 case vect_reduction_def:
1282 case vect_double_reduction_def:
1283 case vect_nested_cycle:
1287 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1288 loop = (gimple_bb (def_stmt))->loop_father;
1290 /* Get the def before the loop */
1291 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1292 return get_initial_def_for_reduction (stmt, op, scalar_def);
1295 /* Case 5: operand is defined by loop-header phi - induction. */
1296 case vect_induction_def:
1298 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1300 /* Get the def from the vectorized stmt. */
1301 def_stmt_info = vinfo_for_stmt (def_stmt);
1302 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1303 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1304 vec_oprnd = PHI_RESULT (vec_stmt);
1306 vec_oprnd = gimple_get_lhs (vec_stmt);
1316 /* Function vect_get_vec_def_for_stmt_copy
1318 Return a vector-def for an operand. This function is used when the
1319 vectorized stmt to be created (by the caller to this function) is a "copy"
1320 created in case the vectorized result cannot fit in one vector, and several
1321 copies of the vector-stmt are required. In this case the vector-def is
1322 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1323 of the stmt that defines VEC_OPRND.
1324 DT is the type of the vector def VEC_OPRND.
1327 In case the vectorization factor (VF) is bigger than the number
1328 of elements that can fit in a vectype (nunits), we have to generate
1329 more than one vector stmt to vectorize the scalar stmt. This situation
1330 arises when there are multiple data-types operated upon in the loop; the
1331 smallest data-type determines the VF, and as a result, when vectorizing
1332 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1333 vector stmt (each computing a vector of 'nunits' results, and together
1334 computing 'VF' results in each iteration). This function is called when
1335 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1336 which VF=16 and nunits=4, so the number of copies required is 4):
1338 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1340 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1341 VS1.1: vx.1 = memref1 VS1.2
1342 VS1.2: vx.2 = memref2 VS1.3
1343 VS1.3: vx.3 = memref3
1345 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1346 VSnew.1: vz1 = vx.1 + ... VSnew.2
1347 VSnew.2: vz2 = vx.2 + ... VSnew.3
1348 VSnew.3: vz3 = vx.3 + ...
1350 The vectorization of S1 is explained in vectorizable_load.
1351 The vectorization of S2:
1352 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1353 the function 'vect_get_vec_def_for_operand' is called to
1354 get the relevant vector-def for each operand of S2. For operand x it
1355 returns the vector-def 'vx.0'.
1357 To create the remaining copies of the vector-stmt (VSnew.j), this
1358 function is called to get the relevant vector-def for each operand. It is
1359 obtained from the respective VS1.j stmt, which is recorded in the
1360 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1362 For example, to obtain the vector-def 'vx.1' in order to create the
1363 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1364 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1365 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1366 and return its def ('vx.1').
1367 Overall, to create the above sequence this function will be called 3 times:
1368 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1369 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1370 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1373 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1375 gimple vec_stmt_for_operand;
1376 stmt_vec_info def_stmt_info;
1378 /* Do nothing; can reuse same def. */
1379 if (dt == vect_external_def || dt == vect_constant_def )
1382 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1383 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1384 gcc_assert (def_stmt_info);
1385 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1386 gcc_assert (vec_stmt_for_operand);
1387 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1388 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1391 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1396 /* Get vectorized definitions for the operands to create a copy of an original
1397 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1400 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1401 VEC(tree,heap) **vec_oprnds0,
1402 VEC(tree,heap) **vec_oprnds1)
1404 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1406 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1407 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1409 if (vec_oprnds1 && *vec_oprnds1)
1411 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1412 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1413 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1418 /* Get vectorized definitions for OP0 and OP1.
1419 REDUC_INDEX is the index of reduction operand in case of reduction,
1420 and -1 otherwise. */
1423 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1424 VEC (tree, heap) **vec_oprnds0,
1425 VEC (tree, heap) **vec_oprnds1,
1426 slp_tree slp_node, int reduc_index)
1430 int nops = (op1 == NULL_TREE) ? 1 : 2;
1431 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1432 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1434 VEC_quick_push (tree, ops, op0);
1436 VEC_quick_push (tree, ops, op1);
1438 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1440 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1442 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1444 VEC_free (tree, heap, ops);
1445 VEC_free (slp_void_p, heap, vec_defs);
1451 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1452 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1453 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1457 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1458 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1459 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1465 /* Function vect_finish_stmt_generation.
1467 Insert a new stmt. */
1470 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1471 gimple_stmt_iterator *gsi)
1473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1474 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1475 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1477 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1479 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1481 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1484 if (vect_print_dump_info (REPORT_DETAILS))
1486 fprintf (vect_dump, "add new stmt: ");
1487 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1490 gimple_set_location (vec_stmt, gimple_location (stmt));
1493 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1494 a function declaration if the target has a vectorized version
1495 of the function, or NULL_TREE if the function cannot be vectorized. */
1498 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1500 tree fndecl = gimple_call_fndecl (call);
1502 /* We only handle functions that do not read or clobber memory -- i.e.
1503 const or novops ones. */
1504 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1508 || TREE_CODE (fndecl) != FUNCTION_DECL
1509 || !DECL_BUILT_IN (fndecl))
1512 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1516 /* Function vectorizable_call.
1518 Check if STMT performs a function call that can be vectorized.
1519 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1520 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1521 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1524 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1529 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1530 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1531 tree vectype_out, vectype_in;
1534 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1535 tree fndecl, new_temp, def, rhs_type;
1537 enum vect_def_type dt[3]
1538 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1539 gimple new_stmt = NULL;
1541 VEC(tree, heap) *vargs = NULL;
1542 enum { NARROW, NONE, WIDEN } modifier;
1546 /* FORNOW: unsupported in basic block SLP. */
1547 gcc_assert (loop_vinfo);
1549 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1552 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1555 /* FORNOW: SLP not supported. */
1556 if (STMT_SLP_TYPE (stmt_info))
1559 /* Is STMT a vectorizable call? */
1560 if (!is_gimple_call (stmt))
1563 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1566 if (stmt_can_throw_internal (stmt))
1569 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1571 /* Process function arguments. */
1572 rhs_type = NULL_TREE;
1573 vectype_in = NULL_TREE;
1574 nargs = gimple_call_num_args (stmt);
1576 /* Bail out if the function has more than three arguments, we do not have
1577 interesting builtin functions to vectorize with more than two arguments
1578 except for fma. No arguments is also not good. */
1579 if (nargs == 0 || nargs > 3)
1582 for (i = 0; i < nargs; i++)
1586 op = gimple_call_arg (stmt, i);
1588 /* We can only handle calls with arguments of the same type. */
1590 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1592 if (vect_print_dump_info (REPORT_DETAILS))
1593 fprintf (vect_dump, "argument types differ.");
1597 rhs_type = TREE_TYPE (op);
1599 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1600 &def_stmt, &def, &dt[i], &opvectype))
1602 if (vect_print_dump_info (REPORT_DETAILS))
1603 fprintf (vect_dump, "use not simple.");
1608 vectype_in = opvectype;
1610 && opvectype != vectype_in)
1612 if (vect_print_dump_info (REPORT_DETAILS))
1613 fprintf (vect_dump, "argument vector types differ.");
1617 /* If all arguments are external or constant defs use a vector type with
1618 the same size as the output vector type. */
1620 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1622 gcc_assert (vectype_in);
1625 if (vect_print_dump_info (REPORT_DETAILS))
1627 fprintf (vect_dump, "no vectype for scalar type ");
1628 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1635 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1636 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1637 if (nunits_in == nunits_out / 2)
1639 else if (nunits_out == nunits_in)
1641 else if (nunits_out == nunits_in / 2)
1646 /* For now, we only vectorize functions if a target specific builtin
1647 is available. TODO -- in some cases, it might be profitable to
1648 insert the calls for pieces of the vector, in order to be able
1649 to vectorize other operations in the loop. */
1650 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1651 if (fndecl == NULL_TREE)
1653 if (vect_print_dump_info (REPORT_DETAILS))
1654 fprintf (vect_dump, "function is not vectorizable.");
1659 gcc_assert (!gimple_vuse (stmt));
1661 if (modifier == NARROW)
1662 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1664 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1666 /* Sanity check: make sure that at least one copy of the vectorized stmt
1667 needs to be generated. */
1668 gcc_assert (ncopies >= 1);
1670 if (!vec_stmt) /* transformation not required. */
1672 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1673 if (vect_print_dump_info (REPORT_DETAILS))
1674 fprintf (vect_dump, "=== vectorizable_call ===");
1675 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1681 if (vect_print_dump_info (REPORT_DETAILS))
1682 fprintf (vect_dump, "transform call.");
1685 scalar_dest = gimple_call_lhs (stmt);
1686 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1688 prev_stmt_info = NULL;
1692 for (j = 0; j < ncopies; ++j)
1694 /* Build argument list for the vectorized call. */
1696 vargs = VEC_alloc (tree, heap, nargs);
1698 VEC_truncate (tree, vargs, 0);
1700 for (i = 0; i < nargs; i++)
1702 op = gimple_call_arg (stmt, i);
1705 = vect_get_vec_def_for_operand (op, stmt, NULL);
1708 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1710 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1713 VEC_quick_push (tree, vargs, vec_oprnd0);
1716 new_stmt = gimple_build_call_vec (fndecl, vargs);
1717 new_temp = make_ssa_name (vec_dest, new_stmt);
1718 gimple_call_set_lhs (new_stmt, new_temp);
1720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1721 mark_symbols_for_renaming (new_stmt);
1724 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1726 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1728 prev_stmt_info = vinfo_for_stmt (new_stmt);
1734 for (j = 0; j < ncopies; ++j)
1736 /* Build argument list for the vectorized call. */
1738 vargs = VEC_alloc (tree, heap, nargs * 2);
1740 VEC_truncate (tree, vargs, 0);
1742 for (i = 0; i < nargs; i++)
1744 op = gimple_call_arg (stmt, i);
1748 = vect_get_vec_def_for_operand (op, stmt, NULL);
1750 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1754 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1756 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1758 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1761 VEC_quick_push (tree, vargs, vec_oprnd0);
1762 VEC_quick_push (tree, vargs, vec_oprnd1);
1765 new_stmt = gimple_build_call_vec (fndecl, vargs);
1766 new_temp = make_ssa_name (vec_dest, new_stmt);
1767 gimple_call_set_lhs (new_stmt, new_temp);
1769 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1770 mark_symbols_for_renaming (new_stmt);
1773 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1775 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1777 prev_stmt_info = vinfo_for_stmt (new_stmt);
1780 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1785 /* No current target implements this case. */
1789 VEC_free (tree, heap, vargs);
1791 /* Update the exception handling table with the vector stmt if necessary. */
1792 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1793 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1795 /* The call in STMT might prevent it from being removed in dce.
1796 We however cannot remove it here, due to the way the ssa name
1797 it defines is mapped to the new definition. So just replace
1798 rhs of the statement with something harmless. */
1800 type = TREE_TYPE (scalar_dest);
1801 if (is_pattern_stmt_p (stmt_info))
1802 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1804 lhs = gimple_call_lhs (stmt);
1805 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1806 set_vinfo_for_stmt (new_stmt, stmt_info);
1807 set_vinfo_for_stmt (stmt, NULL);
1808 STMT_VINFO_STMT (stmt_info) = new_stmt;
1809 gsi_replace (gsi, new_stmt, false);
1810 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1816 /* Function vect_gen_widened_results_half
1818 Create a vector stmt whose code, type, number of arguments, and result
1819 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1820 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1821 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1822 needs to be created (DECL is a function-decl of a target-builtin).
1823 STMT is the original scalar stmt that we are vectorizing. */
1826 vect_gen_widened_results_half (enum tree_code code,
1828 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1829 tree vec_dest, gimple_stmt_iterator *gsi,
1835 /* Generate half of the widened result: */
1836 if (code == CALL_EXPR)
1838 /* Target specific support */
1839 if (op_type == binary_op)
1840 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1842 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1843 new_temp = make_ssa_name (vec_dest, new_stmt);
1844 gimple_call_set_lhs (new_stmt, new_temp);
1848 /* Generic support */
1849 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1850 if (op_type != binary_op)
1852 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1854 new_temp = make_ssa_name (vec_dest, new_stmt);
1855 gimple_assign_set_lhs (new_stmt, new_temp);
1857 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1863 /* Get vectorized definitions for loop-based vectorization. For the first
1864 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1865 scalar operand), and for the rest we get a copy with
1866 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1867 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1868 The vectors are collected into VEC_OPRNDS. */
1871 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1872 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1876 /* Get first vector operand. */
1877 /* All the vector operands except the very first one (that is scalar oprnd)
1879 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1880 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1882 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1884 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1886 /* Get second vector operand. */
1887 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1888 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1892 /* For conversion in multiple steps, continue to get operands
1895 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1899 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1900 For multi-step conversions store the resulting vectors and call the function
1904 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
1905 int multi_step_cvt, gimple stmt,
1906 VEC (tree, heap) *vec_dsts,
1907 gimple_stmt_iterator *gsi,
1908 slp_tree slp_node, enum tree_code code,
1909 stmt_vec_info *prev_stmt_info)
1912 tree vop0, vop1, new_tmp, vec_dest;
1914 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1916 vec_dest = VEC_pop (tree, vec_dsts);
1918 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
1920 /* Create demotion operation. */
1921 vop0 = VEC_index (tree, *vec_oprnds, i);
1922 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
1923 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
1924 new_tmp = make_ssa_name (vec_dest, new_stmt);
1925 gimple_assign_set_lhs (new_stmt, new_tmp);
1926 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1929 /* Store the resulting vector for next recursive call. */
1930 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
1933 /* This is the last step of the conversion sequence. Store the
1934 vectors in SLP_NODE or in vector info of the scalar statement
1935 (or in STMT_VINFO_RELATED_STMT chain). */
1937 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1940 if (!*prev_stmt_info)
1941 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1943 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
1945 *prev_stmt_info = vinfo_for_stmt (new_stmt);
1950 /* For multi-step demotion operations we first generate demotion operations
1951 from the source type to the intermediate types, and then combine the
1952 results (stored in VEC_OPRNDS) in demotion operation to the destination
1956 /* At each level of recursion we have half of the operands we had at the
1958 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
1959 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
1960 stmt, vec_dsts, gsi, slp_node,
1961 VEC_PACK_TRUNC_EXPR,
1965 VEC_quick_push (tree, vec_dsts, vec_dest);
1969 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
1970 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
1971 the resulting vectors and call the function recursively. */
1974 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
1975 VEC (tree, heap) **vec_oprnds1,
1976 gimple stmt, tree vec_dest,
1977 gimple_stmt_iterator *gsi,
1978 enum tree_code code1,
1979 enum tree_code code2, tree decl1,
1980 tree decl2, int op_type)
1983 tree vop0, vop1, new_tmp1, new_tmp2;
1984 gimple new_stmt1, new_stmt2;
1985 VEC (tree, heap) *vec_tmp = NULL;
1987 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
1988 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
1990 if (op_type == binary_op)
1991 vop1 = VEC_index (tree, *vec_oprnds1, i);
1995 /* Generate the two halves of promotion operation. */
1996 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
1997 op_type, vec_dest, gsi, stmt);
1998 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
1999 op_type, vec_dest, gsi, stmt);
2000 if (is_gimple_call (new_stmt1))
2002 new_tmp1 = gimple_call_lhs (new_stmt1);
2003 new_tmp2 = gimple_call_lhs (new_stmt2);
2007 new_tmp1 = gimple_assign_lhs (new_stmt1);
2008 new_tmp2 = gimple_assign_lhs (new_stmt2);
2011 /* Store the results for the next step. */
2012 VEC_quick_push (tree, vec_tmp, new_tmp1);
2013 VEC_quick_push (tree, vec_tmp, new_tmp2);
2016 VEC_free (tree, heap, *vec_oprnds0);
2017 *vec_oprnds0 = vec_tmp;
2021 /* Check if STMT performs a conversion operation, that can be vectorized.
2022 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2023 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2024 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2027 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2028 gimple *vec_stmt, slp_tree slp_node)
2032 tree op0, op1 = NULL_TREE;
2033 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2034 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2035 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2036 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2037 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2038 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2042 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2043 gimple new_stmt = NULL;
2044 stmt_vec_info prev_stmt_info;
2047 tree vectype_out, vectype_in;
2049 tree lhs_type, rhs_type;
2050 enum { NARROW, NONE, WIDEN } modifier;
2051 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2053 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2054 int multi_step_cvt = 0;
2055 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2056 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2058 enum machine_mode rhs_mode;
2059 unsigned short fltsz;
2061 /* Is STMT a vectorizable conversion? */
2063 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2066 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2069 if (!is_gimple_assign (stmt))
2072 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2075 code = gimple_assign_rhs_code (stmt);
2076 if (!CONVERT_EXPR_CODE_P (code)
2077 && code != FIX_TRUNC_EXPR
2078 && code != FLOAT_EXPR
2079 && code != WIDEN_MULT_EXPR
2080 && code != WIDEN_LSHIFT_EXPR)
2083 op_type = TREE_CODE_LENGTH (code);
2085 /* Check types of lhs and rhs. */
2086 scalar_dest = gimple_assign_lhs (stmt);
2087 lhs_type = TREE_TYPE (scalar_dest);
2088 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2090 op0 = gimple_assign_rhs1 (stmt);
2091 rhs_type = TREE_TYPE (op0);
2093 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2094 && !((INTEGRAL_TYPE_P (lhs_type)
2095 && INTEGRAL_TYPE_P (rhs_type))
2096 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2097 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2100 if ((INTEGRAL_TYPE_P (lhs_type)
2101 && (TYPE_PRECISION (lhs_type)
2102 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2103 || (INTEGRAL_TYPE_P (rhs_type)
2104 && (TYPE_PRECISION (rhs_type)
2105 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2107 if (vect_print_dump_info (REPORT_DETAILS))
2109 "type conversion to/from bit-precision unsupported.");
2113 /* Check the operands of the operation. */
2114 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2115 &def_stmt, &def, &dt[0], &vectype_in))
2117 if (vect_print_dump_info (REPORT_DETAILS))
2118 fprintf (vect_dump, "use not simple.");
2121 if (op_type == binary_op)
2125 op1 = gimple_assign_rhs2 (stmt);
2126 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2127 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2129 if (CONSTANT_CLASS_P (op0))
2130 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2131 &def_stmt, &def, &dt[1], &vectype_in);
2133 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2138 if (vect_print_dump_info (REPORT_DETAILS))
2139 fprintf (vect_dump, "use not simple.");
2144 /* If op0 is an external or constant defs use a vector type of
2145 the same size as the output vector type. */
2147 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2149 gcc_assert (vectype_in);
2152 if (vect_print_dump_info (REPORT_DETAILS))
2154 fprintf (vect_dump, "no vectype for scalar type ");
2155 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2161 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2162 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2163 if (nunits_in < nunits_out)
2165 else if (nunits_out == nunits_in)
2170 /* Multiple types in SLP are handled by creating the appropriate number of
2171 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2173 if (slp_node || PURE_SLP_STMT (stmt_info))
2175 else if (modifier == NARROW)
2176 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2178 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2180 /* Sanity check: make sure that at least one copy of the vectorized stmt
2181 needs to be generated. */
2182 gcc_assert (ncopies >= 1);
2184 /* Supportable by target? */
2188 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2190 if (supportable_convert_operation (code, vectype_out, vectype_in,
2195 if (vect_print_dump_info (REPORT_DETAILS))
2196 fprintf (vect_dump, "conversion not supported by target.");
2200 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2201 &decl1, &decl2, &code1, &code2,
2202 &multi_step_cvt, &interm_types))
2204 /* Binary widening operation can only be supported directly by the
2206 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2210 if (code != FLOAT_EXPR
2211 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2212 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2215 rhs_mode = TYPE_MODE (rhs_type);
2216 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2217 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2218 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2219 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2222 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2223 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2224 if (cvt_type == NULL_TREE)
2227 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2229 if (!supportable_convert_operation (code, vectype_out,
2230 cvt_type, &decl1, &codecvt1))
2233 else if (!supportable_widening_operation (code, stmt, vectype_out,
2234 cvt_type, &decl1, &decl2,
2235 &codecvt1, &codecvt2,
2240 gcc_assert (multi_step_cvt == 0);
2242 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2243 vectype_in, NULL, NULL, &code1,
2244 &code2, &multi_step_cvt,
2249 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2252 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2253 codecvt2 = ERROR_MARK;
2257 VEC_safe_push (tree, heap, interm_types, cvt_type);
2258 cvt_type = NULL_TREE;
2263 gcc_assert (op_type == unary_op);
2264 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2265 &code1, &multi_step_cvt,
2269 if (code != FIX_TRUNC_EXPR
2270 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2271 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2274 rhs_mode = TYPE_MODE (rhs_type);
2276 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2277 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2278 if (cvt_type == NULL_TREE)
2280 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2283 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2284 &code1, &multi_step_cvt,
2293 if (!vec_stmt) /* transformation not required. */
2295 if (vect_print_dump_info (REPORT_DETAILS))
2296 fprintf (vect_dump, "=== vectorizable_conversion ===");
2297 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2298 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2299 else if (modifier == NARROW)
2301 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2302 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2306 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2307 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2309 VEC_free (tree, heap, interm_types);
2314 if (vect_print_dump_info (REPORT_DETAILS))
2315 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2317 if (op_type == binary_op)
2319 if (CONSTANT_CLASS_P (op0))
2320 op0 = fold_convert (TREE_TYPE (op1), op0);
2321 else if (CONSTANT_CLASS_P (op1))
2322 op1 = fold_convert (TREE_TYPE (op0), op1);
2325 /* In case of multi-step conversion, we first generate conversion operations
2326 to the intermediate types, and then from that types to the final one.
2327 We create vector destinations for the intermediate type (TYPES) received
2328 from supportable_*_operation, and store them in the correct order
2329 for future use in vect_create_vectorized_*_stmts (). */
2330 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2331 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2332 VEC_quick_push (tree, vec_dsts, vec_dest);
2336 for (i = VEC_length (tree, interm_types) - 1;
2337 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2339 vec_dest = vect_create_destination_var (scalar_dest,
2341 VEC_quick_push (tree, vec_dsts, vec_dest);
2346 vec_dest = vect_create_destination_var (scalar_dest, cvt_type);
2350 if (modifier == NONE)
2351 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2352 else if (modifier == WIDEN)
2354 vec_oprnds0 = VEC_alloc (tree, heap,
2356 ? vect_pow2 (multi_step_cvt) : 1));
2357 if (op_type == binary_op)
2358 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2361 vec_oprnds0 = VEC_alloc (tree, heap,
2363 ? vect_pow2 (multi_step_cvt) : 1));
2365 else if (code == WIDEN_LSHIFT_EXPR)
2366 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2369 prev_stmt_info = NULL;
2373 for (j = 0; j < ncopies; j++)
2376 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2379 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2381 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2383 /* Arguments are ready, create the new vector stmt. */
2384 if (code1 == CALL_EXPR)
2386 new_stmt = gimple_build_call (decl1, 1, vop0);
2387 new_temp = make_ssa_name (vec_dest, new_stmt);
2388 gimple_call_set_lhs (new_stmt, new_temp);
2392 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2393 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2395 new_temp = make_ssa_name (vec_dest, new_stmt);
2396 gimple_assign_set_lhs (new_stmt, new_temp);
2399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2401 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2406 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2408 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2409 prev_stmt_info = vinfo_for_stmt (new_stmt);
2414 /* In case the vectorization factor (VF) is bigger than the number
2415 of elements that we can fit in a vectype (nunits), we have to
2416 generate more than one vector stmt - i.e - we need to "unroll"
2417 the vector stmt by a factor VF/nunits. */
2418 for (j = 0; j < ncopies; j++)
2425 if (code == WIDEN_LSHIFT_EXPR)
2430 /* Store vec_oprnd1 for every vector stmt to be created
2431 for SLP_NODE. We check during the analysis that all
2432 the shift arguments are the same. */
2433 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2434 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2436 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2440 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2441 &vec_oprnds1, slp_node, -1);
2445 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2446 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2447 if (op_type == binary_op)
2449 if (code == WIDEN_LSHIFT_EXPR)
2452 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2454 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2460 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2461 VEC_truncate (tree, vec_oprnds0, 0);
2462 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2463 if (op_type == binary_op)
2465 if (code == WIDEN_LSHIFT_EXPR)
2468 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2470 VEC_truncate (tree, vec_oprnds1, 0);
2471 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2475 /* Arguments are ready. Create the new vector stmts. */
2476 for (i = multi_step_cvt; i >= 0; i--)
2478 tree this_dest = VEC_index (tree, vec_dsts, i);
2479 enum tree_code c1 = code1, c2 = code2;
2480 if (i == 0 && codecvt2 != ERROR_MARK)
2485 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2487 stmt, this_dest, gsi,
2488 c1, c2, decl1, decl2,
2492 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2496 if (codecvt1 == CALL_EXPR)
2498 new_stmt = gimple_build_call (decl1, 1, vop0);
2499 new_temp = make_ssa_name (vec_dest, new_stmt);
2500 gimple_call_set_lhs (new_stmt, new_temp);
2504 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2505 new_temp = make_ssa_name (vec_dest, NULL);
2506 new_stmt = gimple_build_assign_with_ops (codecvt1,
2511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2514 new_stmt = SSA_NAME_DEF_STMT (vop0);
2517 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2521 if (!prev_stmt_info)
2522 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2524 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2525 prev_stmt_info = vinfo_for_stmt (new_stmt);
2530 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2534 /* In case the vectorization factor (VF) is bigger than the number
2535 of elements that we can fit in a vectype (nunits), we have to
2536 generate more than one vector stmt - i.e - we need to "unroll"
2537 the vector stmt by a factor VF/nunits. */
2538 for (j = 0; j < ncopies; j++)
2542 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2546 VEC_truncate (tree, vec_oprnds0, 0);
2547 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2548 vect_pow2 (multi_step_cvt) - 1);
2551 /* Arguments are ready. Create the new vector stmts. */
2553 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2555 if (codecvt1 == CALL_EXPR)
2557 new_stmt = gimple_build_call (decl1, 1, vop0);
2558 new_temp = make_ssa_name (vec_dest, new_stmt);
2559 gimple_call_set_lhs (new_stmt, new_temp);
2563 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2564 new_temp = make_ssa_name (vec_dest, NULL);
2565 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2570 VEC_replace (tree, vec_oprnds0, i, new_temp);
2573 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2574 stmt, vec_dsts, gsi,
2579 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2583 VEC_free (tree, heap, vec_oprnds0);
2584 VEC_free (tree, heap, vec_oprnds1);
2585 VEC_free (tree, heap, vec_dsts);
2586 VEC_free (tree, heap, interm_types);
2592 /* Function vectorizable_assignment.
2594 Check if STMT performs an assignment (copy) that can be vectorized.
2595 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2596 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2597 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2600 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2601 gimple *vec_stmt, slp_tree slp_node)
2606 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2607 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2612 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2613 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2616 VEC(tree,heap) *vec_oprnds = NULL;
2618 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2619 gimple new_stmt = NULL;
2620 stmt_vec_info prev_stmt_info = NULL;
2621 enum tree_code code;
2624 /* Multiple types in SLP are handled by creating the appropriate number of
2625 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2627 if (slp_node || PURE_SLP_STMT (stmt_info))
2630 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2632 gcc_assert (ncopies >= 1);
2634 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2637 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2640 /* Is vectorizable assignment? */
2641 if (!is_gimple_assign (stmt))
2644 scalar_dest = gimple_assign_lhs (stmt);
2645 if (TREE_CODE (scalar_dest) != SSA_NAME)
2648 code = gimple_assign_rhs_code (stmt);
2649 if (gimple_assign_single_p (stmt)
2650 || code == PAREN_EXPR
2651 || CONVERT_EXPR_CODE_P (code))
2652 op = gimple_assign_rhs1 (stmt);
2656 if (code == VIEW_CONVERT_EXPR)
2657 op = TREE_OPERAND (op, 0);
2659 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2660 &def_stmt, &def, &dt[0], &vectype_in))
2662 if (vect_print_dump_info (REPORT_DETAILS))
2663 fprintf (vect_dump, "use not simple.");
2667 /* We can handle NOP_EXPR conversions that do not change the number
2668 of elements or the vector size. */
2669 if ((CONVERT_EXPR_CODE_P (code)
2670 || code == VIEW_CONVERT_EXPR)
2672 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2673 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2674 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2677 /* We do not handle bit-precision changes. */
2678 if ((CONVERT_EXPR_CODE_P (code)
2679 || code == VIEW_CONVERT_EXPR)
2680 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2681 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2682 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2683 || ((TYPE_PRECISION (TREE_TYPE (op))
2684 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2685 /* But a conversion that does not change the bit-pattern is ok. */
2686 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2687 > TYPE_PRECISION (TREE_TYPE (op)))
2688 && TYPE_UNSIGNED (TREE_TYPE (op))))
2690 if (vect_print_dump_info (REPORT_DETAILS))
2691 fprintf (vect_dump, "type conversion to/from bit-precision "
2696 if (!vec_stmt) /* transformation not required. */
2698 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2699 if (vect_print_dump_info (REPORT_DETAILS))
2700 fprintf (vect_dump, "=== vectorizable_assignment ===");
2701 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2706 if (vect_print_dump_info (REPORT_DETAILS))
2707 fprintf (vect_dump, "transform assignment.");
2710 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2713 for (j = 0; j < ncopies; j++)
2717 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2719 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2721 /* Arguments are ready. create the new vector stmt. */
2722 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2724 if (CONVERT_EXPR_CODE_P (code)
2725 || code == VIEW_CONVERT_EXPR)
2726 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2727 new_stmt = gimple_build_assign (vec_dest, vop);
2728 new_temp = make_ssa_name (vec_dest, new_stmt);
2729 gimple_assign_set_lhs (new_stmt, new_temp);
2730 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2732 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2739 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2741 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2743 prev_stmt_info = vinfo_for_stmt (new_stmt);
2746 VEC_free (tree, heap, vec_oprnds);
2751 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2752 either as shift by a scalar or by a vector. */
2755 vect_supportable_shift (enum tree_code code, tree scalar_type)
2758 enum machine_mode vec_mode;
2763 vectype = get_vectype_for_scalar_type (scalar_type);
2767 optab = optab_for_tree_code (code, vectype, optab_scalar);
2769 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2771 optab = optab_for_tree_code (code, vectype, optab_vector);
2773 || (optab_handler (optab, TYPE_MODE (vectype))
2774 == CODE_FOR_nothing))
2778 vec_mode = TYPE_MODE (vectype);
2779 icode = (int) optab_handler (optab, vec_mode);
2780 if (icode == CODE_FOR_nothing)
2787 /* Function vectorizable_shift.
2789 Check if STMT performs a shift operation that can be vectorized.
2790 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2791 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2792 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2795 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2796 gimple *vec_stmt, slp_tree slp_node)
2800 tree op0, op1 = NULL;
2801 tree vec_oprnd1 = NULL_TREE;
2802 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2804 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2805 enum tree_code code;
2806 enum machine_mode vec_mode;
2810 enum machine_mode optab_op2_mode;
2813 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2814 gimple new_stmt = NULL;
2815 stmt_vec_info prev_stmt_info;
2822 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2825 bool scalar_shift_arg = true;
2826 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2829 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2832 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2835 /* Is STMT a vectorizable binary/unary operation? */
2836 if (!is_gimple_assign (stmt))
2839 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2842 code = gimple_assign_rhs_code (stmt);
2844 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2845 || code == RROTATE_EXPR))
2848 scalar_dest = gimple_assign_lhs (stmt);
2849 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2850 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2851 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2853 if (vect_print_dump_info (REPORT_DETAILS))
2854 fprintf (vect_dump, "bit-precision shifts not supported.");
2858 op0 = gimple_assign_rhs1 (stmt);
2859 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2860 &def_stmt, &def, &dt[0], &vectype))
2862 if (vect_print_dump_info (REPORT_DETAILS))
2863 fprintf (vect_dump, "use not simple.");
2866 /* If op0 is an external or constant def use a vector type with
2867 the same size as the output vector type. */
2869 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2871 gcc_assert (vectype);
2874 if (vect_print_dump_info (REPORT_DETAILS))
2876 fprintf (vect_dump, "no vectype for scalar type ");
2877 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2883 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2884 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2885 if (nunits_out != nunits_in)
2888 op1 = gimple_assign_rhs2 (stmt);
2889 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2890 &dt[1], &op1_vectype))
2892 if (vect_print_dump_info (REPORT_DETAILS))
2893 fprintf (vect_dump, "use not simple.");
2898 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2902 /* Multiple types in SLP are handled by creating the appropriate number of
2903 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2905 if (slp_node || PURE_SLP_STMT (stmt_info))
2908 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2910 gcc_assert (ncopies >= 1);
2912 /* Determine whether the shift amount is a vector, or scalar. If the
2913 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2915 if (dt[1] == vect_internal_def && !slp_node)
2916 scalar_shift_arg = false;
2917 else if (dt[1] == vect_constant_def
2918 || dt[1] == vect_external_def
2919 || dt[1] == vect_internal_def)
2921 /* In SLP, need to check whether the shift count is the same,
2922 in loops if it is a constant or invariant, it is always
2926 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2929 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2930 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2931 scalar_shift_arg = false;
2936 if (vect_print_dump_info (REPORT_DETAILS))
2937 fprintf (vect_dump, "operand mode requires invariant argument.");
2941 /* Vector shifted by vector. */
2942 if (!scalar_shift_arg)
2944 optab = optab_for_tree_code (code, vectype, optab_vector);
2945 if (vect_print_dump_info (REPORT_DETAILS))
2946 fprintf (vect_dump, "vector/vector shift/rotate found.");
2948 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
2949 if (op1_vectype == NULL_TREE
2950 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
2952 if (vect_print_dump_info (REPORT_DETAILS))
2953 fprintf (vect_dump, "unusable type for last operand in"
2954 " vector/vector shift/rotate.");
2958 /* See if the machine has a vector shifted by scalar insn and if not
2959 then see if it has a vector shifted by vector insn. */
2962 optab = optab_for_tree_code (code, vectype, optab_scalar);
2964 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2966 if (vect_print_dump_info (REPORT_DETAILS))
2967 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2971 optab = optab_for_tree_code (code, vectype, optab_vector);
2973 && (optab_handler (optab, TYPE_MODE (vectype))
2974 != CODE_FOR_nothing))
2976 scalar_shift_arg = false;
2978 if (vect_print_dump_info (REPORT_DETAILS))
2979 fprintf (vect_dump, "vector/vector shift/rotate found.");
2981 /* Unlike the other binary operators, shifts/rotates have
2982 the rhs being int, instead of the same type as the lhs,
2983 so make sure the scalar is the right type if we are
2984 dealing with vectors of long long/long/short/char. */
2985 if (dt[1] == vect_constant_def)
2986 op1 = fold_convert (TREE_TYPE (vectype), op1);
2987 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
2991 && TYPE_MODE (TREE_TYPE (vectype))
2992 != TYPE_MODE (TREE_TYPE (op1)))
2994 if (vect_print_dump_info (REPORT_DETAILS))
2995 fprintf (vect_dump, "unusable type for last operand in"
2996 " vector/vector shift/rotate.");
2999 if (vec_stmt && !slp_node)
3001 op1 = fold_convert (TREE_TYPE (vectype), op1);
3002 op1 = vect_init_vector (stmt, op1,
3003 TREE_TYPE (vectype), NULL);
3010 /* Supportable by target? */
3013 if (vect_print_dump_info (REPORT_DETAILS))
3014 fprintf (vect_dump, "no optab.");
3017 vec_mode = TYPE_MODE (vectype);
3018 icode = (int) optab_handler (optab, vec_mode);
3019 if (icode == CODE_FOR_nothing)
3021 if (vect_print_dump_info (REPORT_DETAILS))
3022 fprintf (vect_dump, "op not supported by target.");
3023 /* Check only during analysis. */
3024 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3025 || (vf < vect_min_worthwhile_factor (code)
3028 if (vect_print_dump_info (REPORT_DETAILS))
3029 fprintf (vect_dump, "proceeding using word mode.");
3032 /* Worthwhile without SIMD support? Check only during analysis. */
3033 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3034 && vf < vect_min_worthwhile_factor (code)
3037 if (vect_print_dump_info (REPORT_DETAILS))
3038 fprintf (vect_dump, "not worthwhile without SIMD support.");
3042 if (!vec_stmt) /* transformation not required. */
3044 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3045 if (vect_print_dump_info (REPORT_DETAILS))
3046 fprintf (vect_dump, "=== vectorizable_shift ===");
3047 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3053 if (vect_print_dump_info (REPORT_DETAILS))
3054 fprintf (vect_dump, "transform binary/unary operation.");
3057 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3059 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3060 created in the previous stages of the recursion, so no allocation is
3061 needed, except for the case of shift with scalar shift argument. In that
3062 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3063 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3064 In case of loop-based vectorization we allocate VECs of size 1. We
3065 allocate VEC_OPRNDS1 only in case of binary operation. */
3068 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3069 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3071 else if (scalar_shift_arg)
3072 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3074 prev_stmt_info = NULL;
3075 for (j = 0; j < ncopies; j++)
3080 if (scalar_shift_arg)
3082 /* Vector shl and shr insn patterns can be defined with scalar
3083 operand 2 (shift operand). In this case, use constant or loop
3084 invariant op1 directly, without extending it to vector mode
3086 optab_op2_mode = insn_data[icode].operand[2].mode;
3087 if (!VECTOR_MODE_P (optab_op2_mode))
3089 if (vect_print_dump_info (REPORT_DETAILS))
3090 fprintf (vect_dump, "operand 1 using scalar mode.");
3092 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3095 /* Store vec_oprnd1 for every vector stmt to be created
3096 for SLP_NODE. We check during the analysis that all
3097 the shift arguments are the same.
3098 TODO: Allow different constants for different vector
3099 stmts generated for an SLP instance. */
3100 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3101 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3106 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3107 (a special case for certain kind of vector shifts); otherwise,
3108 operand 1 should be of a vector type (the usual case). */
3110 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3113 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3117 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3119 /* Arguments are ready. Create the new vector stmt. */
3120 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3122 vop1 = VEC_index (tree, vec_oprnds1, i);
3123 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3124 new_temp = make_ssa_name (vec_dest, new_stmt);
3125 gimple_assign_set_lhs (new_stmt, new_temp);
3126 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3128 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3135 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3137 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3138 prev_stmt_info = vinfo_for_stmt (new_stmt);
3141 VEC_free (tree, heap, vec_oprnds0);
3142 VEC_free (tree, heap, vec_oprnds1);
3148 /* Function vectorizable_operation.
3150 Check if STMT performs a binary, unary or ternary operation that can
3152 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3153 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3154 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3157 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3158 gimple *vec_stmt, slp_tree slp_node)
3162 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3166 enum tree_code code;
3167 enum machine_mode vec_mode;
3174 enum vect_def_type dt[3]
3175 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3176 gimple new_stmt = NULL;
3177 stmt_vec_info prev_stmt_info;
3183 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3184 tree vop0, vop1, vop2;
3185 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3188 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3191 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3194 /* Is STMT a vectorizable binary/unary operation? */
3195 if (!is_gimple_assign (stmt))
3198 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3201 code = gimple_assign_rhs_code (stmt);
3203 /* For pointer addition, we should use the normal plus for
3204 the vector addition. */
3205 if (code == POINTER_PLUS_EXPR)
3208 /* Support only unary or binary operations. */
3209 op_type = TREE_CODE_LENGTH (code);
3210 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3212 if (vect_print_dump_info (REPORT_DETAILS))
3213 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3218 scalar_dest = gimple_assign_lhs (stmt);
3219 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3221 /* Most operations cannot handle bit-precision types without extra
3223 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3224 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3225 /* Exception are bitwise binary operations. */
3226 && code != BIT_IOR_EXPR
3227 && code != BIT_XOR_EXPR
3228 && code != BIT_AND_EXPR)
3230 if (vect_print_dump_info (REPORT_DETAILS))
3231 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3235 op0 = gimple_assign_rhs1 (stmt);
3236 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3237 &def_stmt, &def, &dt[0], &vectype))
3239 if (vect_print_dump_info (REPORT_DETAILS))
3240 fprintf (vect_dump, "use not simple.");
3243 /* If op0 is an external or constant def use a vector type with
3244 the same size as the output vector type. */
3246 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3248 gcc_assert (vectype);
3251 if (vect_print_dump_info (REPORT_DETAILS))
3253 fprintf (vect_dump, "no vectype for scalar type ");
3254 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3260 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3261 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3262 if (nunits_out != nunits_in)
3265 if (op_type == binary_op || op_type == ternary_op)
3267 op1 = gimple_assign_rhs2 (stmt);
3268 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
3271 if (vect_print_dump_info (REPORT_DETAILS))
3272 fprintf (vect_dump, "use not simple.");
3276 if (op_type == ternary_op)
3278 op2 = gimple_assign_rhs3 (stmt);
3279 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3282 if (vect_print_dump_info (REPORT_DETAILS))
3283 fprintf (vect_dump, "use not simple.");
3289 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3293 /* Multiple types in SLP are handled by creating the appropriate number of
3294 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3296 if (slp_node || PURE_SLP_STMT (stmt_info))
3299 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3301 gcc_assert (ncopies >= 1);
3303 /* Shifts are handled in vectorizable_shift (). */
3304 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3305 || code == RROTATE_EXPR)
3308 optab = optab_for_tree_code (code, vectype, optab_default);
3310 /* Supportable by target? */
3313 if (vect_print_dump_info (REPORT_DETAILS))
3314 fprintf (vect_dump, "no optab.");
3317 vec_mode = TYPE_MODE (vectype);
3318 icode = (int) optab_handler (optab, vec_mode);
3319 if (icode == CODE_FOR_nothing)
3321 if (vect_print_dump_info (REPORT_DETAILS))
3322 fprintf (vect_dump, "op not supported by target.");
3323 /* Check only during analysis. */
3324 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3325 || (vf < vect_min_worthwhile_factor (code)
3328 if (vect_print_dump_info (REPORT_DETAILS))
3329 fprintf (vect_dump, "proceeding using word mode.");
3332 /* Worthwhile without SIMD support? Check only during analysis. */
3333 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3334 && vf < vect_min_worthwhile_factor (code)
3337 if (vect_print_dump_info (REPORT_DETAILS))
3338 fprintf (vect_dump, "not worthwhile without SIMD support.");
3342 if (!vec_stmt) /* transformation not required. */
3344 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3345 if (vect_print_dump_info (REPORT_DETAILS))
3346 fprintf (vect_dump, "=== vectorizable_operation ===");
3347 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3353 if (vect_print_dump_info (REPORT_DETAILS))
3354 fprintf (vect_dump, "transform binary/unary operation.");
3357 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3359 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3360 created in the previous stages of the recursion, so no allocation is
3361 needed, except for the case of shift with scalar shift argument. In that
3362 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3363 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3364 In case of loop-based vectorization we allocate VECs of size 1. We
3365 allocate VEC_OPRNDS1 only in case of binary operation. */
3368 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3369 if (op_type == binary_op || op_type == ternary_op)
3370 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3371 if (op_type == ternary_op)
3372 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3375 /* In case the vectorization factor (VF) is bigger than the number
3376 of elements that we can fit in a vectype (nunits), we have to generate
3377 more than one vector stmt - i.e - we need to "unroll" the
3378 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3379 from one copy of the vector stmt to the next, in the field
3380 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3381 stages to find the correct vector defs to be used when vectorizing
3382 stmts that use the defs of the current stmt. The example below
3383 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3384 we need to create 4 vectorized stmts):
3386 before vectorization:
3387 RELATED_STMT VEC_STMT
3391 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3393 RELATED_STMT VEC_STMT
3394 VS1_0: vx0 = memref0 VS1_1 -
3395 VS1_1: vx1 = memref1 VS1_2 -
3396 VS1_2: vx2 = memref2 VS1_3 -
3397 VS1_3: vx3 = memref3 - -
3398 S1: x = load - VS1_0
3401 step2: vectorize stmt S2 (done here):
3402 To vectorize stmt S2 we first need to find the relevant vector
3403 def for the first operand 'x'. This is, as usual, obtained from
3404 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3405 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3406 relevant vector def 'vx0'. Having found 'vx0' we can generate
3407 the vector stmt VS2_0, and as usual, record it in the
3408 STMT_VINFO_VEC_STMT of stmt S2.
3409 When creating the second copy (VS2_1), we obtain the relevant vector
3410 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3411 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3412 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3413 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3414 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3415 chain of stmts and pointers:
3416 RELATED_STMT VEC_STMT
3417 VS1_0: vx0 = memref0 VS1_1 -
3418 VS1_1: vx1 = memref1 VS1_2 -
3419 VS1_2: vx2 = memref2 VS1_3 -
3420 VS1_3: vx3 = memref3 - -
3421 S1: x = load - VS1_0
3422 VS2_0: vz0 = vx0 + v1 VS2_1 -
3423 VS2_1: vz1 = vx1 + v1 VS2_2 -
3424 VS2_2: vz2 = vx2 + v1 VS2_3 -
3425 VS2_3: vz3 = vx3 + v1 - -
3426 S2: z = x + 1 - VS2_0 */
3428 prev_stmt_info = NULL;
3429 for (j = 0; j < ncopies; j++)
3434 if (op_type == binary_op || op_type == ternary_op)
3435 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3438 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3440 if (op_type == ternary_op)
3442 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3443 VEC_quick_push (tree, vec_oprnds2,
3444 vect_get_vec_def_for_operand (op2, stmt, NULL));
3449 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3450 if (op_type == ternary_op)
3452 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3453 VEC_quick_push (tree, vec_oprnds2,
3454 vect_get_vec_def_for_stmt_copy (dt[2],
3459 /* Arguments are ready. Create the new vector stmt. */
3460 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3462 vop1 = ((op_type == binary_op || op_type == ternary_op)
3463 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3464 vop2 = ((op_type == ternary_op)
3465 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3466 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3468 new_temp = make_ssa_name (vec_dest, new_stmt);
3469 gimple_assign_set_lhs (new_stmt, new_temp);
3470 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3472 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3479 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3481 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3482 prev_stmt_info = vinfo_for_stmt (new_stmt);
3485 VEC_free (tree, heap, vec_oprnds0);
3487 VEC_free (tree, heap, vec_oprnds1);
3489 VEC_free (tree, heap, vec_oprnds2);
3495 /* Function vectorizable_store.
3497 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3499 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3500 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3501 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3504 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3510 tree vec_oprnd = NULL_TREE;
3511 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3512 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3513 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3515 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3516 struct loop *loop = NULL;
3517 enum machine_mode vec_mode;
3519 enum dr_alignment_support alignment_support_scheme;
3522 enum vect_def_type dt;
3523 stmt_vec_info prev_stmt_info = NULL;
3524 tree dataref_ptr = NULL_TREE;
3525 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3528 gimple next_stmt, first_stmt = NULL;
3529 bool strided_store = false;
3530 bool store_lanes_p = false;
3531 unsigned int group_size, i;
3532 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3534 VEC(tree,heap) *vec_oprnds = NULL;
3535 bool slp = (slp_node != NULL);
3536 unsigned int vec_num;
3537 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3541 loop = LOOP_VINFO_LOOP (loop_vinfo);
3543 /* Multiple types in SLP are handled by creating the appropriate number of
3544 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3546 if (slp || PURE_SLP_STMT (stmt_info))
3549 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3551 gcc_assert (ncopies >= 1);
3553 /* FORNOW. This restriction should be relaxed. */
3554 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3556 if (vect_print_dump_info (REPORT_DETAILS))
3557 fprintf (vect_dump, "multiple types in nested loop.");
3561 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3564 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3567 /* Is vectorizable store? */
3569 if (!is_gimple_assign (stmt))
3572 scalar_dest = gimple_assign_lhs (stmt);
3573 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3574 && is_pattern_stmt_p (stmt_info))
3575 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3576 if (TREE_CODE (scalar_dest) != ARRAY_REF
3577 && TREE_CODE (scalar_dest) != INDIRECT_REF
3578 && TREE_CODE (scalar_dest) != COMPONENT_REF
3579 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3580 && TREE_CODE (scalar_dest) != REALPART_EXPR
3581 && TREE_CODE (scalar_dest) != MEM_REF)
3584 gcc_assert (gimple_assign_single_p (stmt));
3585 op = gimple_assign_rhs1 (stmt);
3586 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3588 if (vect_print_dump_info (REPORT_DETAILS))
3589 fprintf (vect_dump, "use not simple.");
3593 elem_type = TREE_TYPE (vectype);
3594 vec_mode = TYPE_MODE (vectype);
3596 /* FORNOW. In some cases can vectorize even if data-type not supported
3597 (e.g. - array initialization with 0). */
3598 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3601 if (!STMT_VINFO_DATA_REF (stmt_info))
3604 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3606 if (vect_print_dump_info (REPORT_DETAILS))
3607 fprintf (vect_dump, "negative step for store.");
3611 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3613 strided_store = true;
3614 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3615 if (!slp && !PURE_SLP_STMT (stmt_info))
3617 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3618 if (vect_store_lanes_supported (vectype, group_size))
3619 store_lanes_p = true;
3620 else if (!vect_strided_store_supported (vectype, group_size))
3624 if (first_stmt == stmt)
3626 /* STMT is the leader of the group. Check the operands of all the
3627 stmts of the group. */
3628 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3631 gcc_assert (gimple_assign_single_p (next_stmt));
3632 op = gimple_assign_rhs1 (next_stmt);
3633 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3636 if (vect_print_dump_info (REPORT_DETAILS))
3637 fprintf (vect_dump, "use not simple.");
3640 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3645 if (!vec_stmt) /* transformation not required. */
3647 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3648 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3656 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3657 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3659 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3662 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3664 /* We vectorize all the stmts of the interleaving group when we
3665 reach the last stmt in the group. */
3666 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3667 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3676 strided_store = false;
3677 /* VEC_NUM is the number of vect stmts to be created for this
3679 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3680 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3681 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3682 op = gimple_assign_rhs1 (first_stmt);
3685 /* VEC_NUM is the number of vect stmts to be created for this
3687 vec_num = group_size;
3693 group_size = vec_num = 1;
3696 if (vect_print_dump_info (REPORT_DETAILS))
3697 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3699 dr_chain = VEC_alloc (tree, heap, group_size);
3700 oprnds = VEC_alloc (tree, heap, group_size);
3702 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3703 gcc_assert (alignment_support_scheme);
3704 /* Targets with store-lane instructions must not require explicit
3706 gcc_assert (!store_lanes_p
3707 || alignment_support_scheme == dr_aligned
3708 || alignment_support_scheme == dr_unaligned_supported);
3711 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3713 aggr_type = vectype;
3715 /* In case the vectorization factor (VF) is bigger than the number
3716 of elements that we can fit in a vectype (nunits), we have to generate
3717 more than one vector stmt - i.e - we need to "unroll" the
3718 vector stmt by a factor VF/nunits. For more details see documentation in
3719 vect_get_vec_def_for_copy_stmt. */
3721 /* In case of interleaving (non-unit strided access):
3728 We create vectorized stores starting from base address (the access of the
3729 first stmt in the chain (S2 in the above example), when the last store stmt
3730 of the chain (S4) is reached:
3733 VS2: &base + vec_size*1 = vx0
3734 VS3: &base + vec_size*2 = vx1
3735 VS4: &base + vec_size*3 = vx3
3737 Then permutation statements are generated:
3739 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3740 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3743 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3744 (the order of the data-refs in the output of vect_permute_store_chain
3745 corresponds to the order of scalar stmts in the interleaving chain - see
3746 the documentation of vect_permute_store_chain()).
3748 In case of both multiple types and interleaving, above vector stores and
3749 permutation stmts are created for every copy. The result vector stmts are
3750 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3751 STMT_VINFO_RELATED_STMT for the next copies.
3754 prev_stmt_info = NULL;
3755 for (j = 0; j < ncopies; j++)
3764 /* Get vectorized arguments for SLP_NODE. */
3765 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3766 NULL, slp_node, -1);
3768 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3772 /* For interleaved stores we collect vectorized defs for all the
3773 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3774 used as an input to vect_permute_store_chain(), and OPRNDS as
3775 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3777 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3778 OPRNDS are of size 1. */
3779 next_stmt = first_stmt;
3780 for (i = 0; i < group_size; i++)
3782 /* Since gaps are not supported for interleaved stores,
3783 GROUP_SIZE is the exact number of stmts in the chain.
3784 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3785 there is no interleaving, GROUP_SIZE is 1, and only one
3786 iteration of the loop will be executed. */
3787 gcc_assert (next_stmt
3788 && gimple_assign_single_p (next_stmt));
3789 op = gimple_assign_rhs1 (next_stmt);
3791 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3793 VEC_quick_push(tree, dr_chain, vec_oprnd);
3794 VEC_quick_push(tree, oprnds, vec_oprnd);
3795 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3799 /* We should have catched mismatched types earlier. */
3800 gcc_assert (useless_type_conversion_p (vectype,
3801 TREE_TYPE (vec_oprnd)));
3802 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3803 NULL_TREE, &dummy, gsi,
3804 &ptr_incr, false, &inv_p);
3805 gcc_assert (bb_vinfo || !inv_p);
3809 /* For interleaved stores we created vectorized defs for all the
3810 defs stored in OPRNDS in the previous iteration (previous copy).
3811 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3812 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3814 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3815 OPRNDS are of size 1. */
3816 for (i = 0; i < group_size; i++)
3818 op = VEC_index (tree, oprnds, i);
3819 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3821 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3822 VEC_replace(tree, dr_chain, i, vec_oprnd);
3823 VEC_replace(tree, oprnds, i, vec_oprnd);
3825 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3826 TYPE_SIZE_UNIT (aggr_type));
3833 /* Combine all the vectors into an array. */
3834 vec_array = create_vector_array (vectype, vec_num);
3835 for (i = 0; i < vec_num; i++)
3837 vec_oprnd = VEC_index (tree, dr_chain, i);
3838 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3842 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3843 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3844 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3845 gimple_call_set_lhs (new_stmt, data_ref);
3846 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3847 mark_symbols_for_renaming (new_stmt);
3854 result_chain = VEC_alloc (tree, heap, group_size);
3856 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3860 next_stmt = first_stmt;
3861 for (i = 0; i < vec_num; i++)
3863 struct ptr_info_def *pi;
3866 /* Bump the vector pointer. */
3867 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3871 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3872 else if (strided_store)
3873 /* For strided stores vectorized defs are interleaved in
3874 vect_permute_store_chain(). */
3875 vec_oprnd = VEC_index (tree, result_chain, i);
3877 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3878 build_int_cst (reference_alias_ptr_type
3879 (DR_REF (first_dr)), 0));
3880 pi = get_ptr_info (dataref_ptr);
3881 pi->align = TYPE_ALIGN_UNIT (vectype);
3882 if (aligned_access_p (first_dr))
3884 else if (DR_MISALIGNMENT (first_dr) == -1)
3886 TREE_TYPE (data_ref)
3887 = build_aligned_type (TREE_TYPE (data_ref),
3888 TYPE_ALIGN (elem_type));
3889 pi->align = TYPE_ALIGN_UNIT (elem_type);
3894 TREE_TYPE (data_ref)
3895 = build_aligned_type (TREE_TYPE (data_ref),
3896 TYPE_ALIGN (elem_type));
3897 pi->misalign = DR_MISALIGNMENT (first_dr);
3900 /* Arguments are ready. Create the new vector stmt. */
3901 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3902 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3903 mark_symbols_for_renaming (new_stmt);
3908 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3916 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3918 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3919 prev_stmt_info = vinfo_for_stmt (new_stmt);
3923 VEC_free (tree, heap, dr_chain);
3924 VEC_free (tree, heap, oprnds);
3926 VEC_free (tree, heap, result_chain);
3928 VEC_free (tree, heap, vec_oprnds);
3933 /* Given a vector type VECTYPE and permutation SEL returns
3934 the VECTOR_CST mask that implements the permutation of the
3935 vector elements. If that is impossible to do, returns NULL. */
3938 gen_perm_mask (tree vectype, unsigned char *sel)
3940 tree mask_elt_type, mask_type, mask_vec;
3943 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3945 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
3949 = lang_hooks.types.type_for_size
3950 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
3951 mask_type = get_vectype_for_scalar_type (mask_elt_type);
3954 for (i = nunits - 1; i >= 0; i--)
3955 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
3957 mask_vec = build_vector (mask_type, mask_vec);
3962 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
3963 reversal of the vector elements. If that is impossible to do,
3967 perm_mask_for_reverse (tree vectype)
3972 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3973 sel = XALLOCAVEC (unsigned char, nunits);
3975 for (i = 0; i < nunits; ++i)
3976 sel[i] = nunits - 1 - i;
3978 return gen_perm_mask (vectype, sel);
3981 /* Given a vector variable X and Y, that was generated for the scalar
3982 STMT, generate instructions to permute the vector elements of X and Y
3983 using permutation mask MASK_VEC, insert them at *GSI and return the
3984 permuted vector variable. */
3987 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
3988 gimple_stmt_iterator *gsi)
3990 tree vectype = TREE_TYPE (x);
3991 tree perm_dest, data_ref;
3994 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3995 data_ref = make_ssa_name (perm_dest, NULL);
3997 /* Generate the permute statement. */
3998 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4000 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4005 /* vectorizable_load.
4007 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4009 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4010 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4011 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4014 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4015 slp_tree slp_node, slp_instance slp_node_instance)
4018 tree vec_dest = NULL;
4019 tree data_ref = NULL;
4020 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4021 stmt_vec_info prev_stmt_info;
4022 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4023 struct loop *loop = NULL;
4024 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4025 bool nested_in_vect_loop = false;
4026 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4027 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4030 enum machine_mode mode;
4031 gimple new_stmt = NULL;
4033 enum dr_alignment_support alignment_support_scheme;
4034 tree dataref_ptr = NULL_TREE;
4036 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4038 int i, j, group_size;
4039 tree msq = NULL_TREE, lsq;
4040 tree offset = NULL_TREE;
4041 tree realignment_token = NULL_TREE;
4043 VEC(tree,heap) *dr_chain = NULL;
4044 bool strided_load = false;
4045 bool load_lanes_p = false;
4049 bool compute_in_loop = false;
4050 struct loop *at_loop;
4052 bool slp = (slp_node != NULL);
4053 bool slp_perm = false;
4054 enum tree_code code;
4055 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4058 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4059 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4060 int gather_scale = 1;
4061 enum vect_def_type gather_dt = vect_unknown_def_type;
4065 loop = LOOP_VINFO_LOOP (loop_vinfo);
4066 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4067 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4072 /* Multiple types in SLP are handled by creating the appropriate number of
4073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4075 if (slp || PURE_SLP_STMT (stmt_info))
4078 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4080 gcc_assert (ncopies >= 1);
4082 /* FORNOW. This restriction should be relaxed. */
4083 if (nested_in_vect_loop && ncopies > 1)
4085 if (vect_print_dump_info (REPORT_DETAILS))
4086 fprintf (vect_dump, "multiple types in nested loop.");
4090 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4093 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4096 /* Is vectorizable load? */
4097 if (!is_gimple_assign (stmt))
4100 scalar_dest = gimple_assign_lhs (stmt);
4101 if (TREE_CODE (scalar_dest) != SSA_NAME)
4104 code = gimple_assign_rhs_code (stmt);
4105 if (code != ARRAY_REF
4106 && code != INDIRECT_REF
4107 && code != COMPONENT_REF
4108 && code != IMAGPART_EXPR
4109 && code != REALPART_EXPR
4111 && TREE_CODE_CLASS (code) != tcc_declaration)
4114 if (!STMT_VINFO_DATA_REF (stmt_info))
4117 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4118 if (negative && ncopies > 1)
4120 if (vect_print_dump_info (REPORT_DETAILS))
4121 fprintf (vect_dump, "multiple types with negative step.");
4125 elem_type = TREE_TYPE (vectype);
4126 mode = TYPE_MODE (vectype);
4128 /* FORNOW. In some cases can vectorize even if data-type not supported
4129 (e.g. - data copies). */
4130 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4132 if (vect_print_dump_info (REPORT_DETAILS))
4133 fprintf (vect_dump, "Aligned load, but unsupported type.");
4137 /* Check if the load is a part of an interleaving chain. */
4138 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4140 strided_load = true;
4142 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4144 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4145 if (!slp && !PURE_SLP_STMT (stmt_info))
4147 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4148 if (vect_load_lanes_supported (vectype, group_size))
4149 load_lanes_p = true;
4150 else if (!vect_strided_load_supported (vectype, group_size))
4157 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4158 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4159 if (alignment_support_scheme != dr_aligned
4160 && alignment_support_scheme != dr_unaligned_supported)
4162 if (vect_print_dump_info (REPORT_DETAILS))
4163 fprintf (vect_dump, "negative step but alignment required.");
4166 if (!perm_mask_for_reverse (vectype))
4168 if (vect_print_dump_info (REPORT_DETAILS))
4169 fprintf (vect_dump, "negative step and reversing not supported.");
4174 if (STMT_VINFO_GATHER_P (stmt_info))
4178 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4179 &gather_off, &gather_scale);
4180 gcc_assert (gather_decl);
4181 if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
4182 &def_stmt, &def, &gather_dt,
4183 &gather_off_vectype))
4185 if (vect_print_dump_info (REPORT_DETAILS))
4186 fprintf (vect_dump, "gather index use not simple.");
4191 if (!vec_stmt) /* transformation not required. */
4193 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4194 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4198 if (vect_print_dump_info (REPORT_DETAILS))
4199 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4203 if (STMT_VINFO_GATHER_P (stmt_info))
4205 tree vec_oprnd0 = NULL_TREE, op;
4206 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4207 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4208 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4209 edge pe = loop_preheader_edge (loop);
4212 enum { NARROW, NONE, WIDEN } modifier;
4213 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4215 if (nunits == gather_off_nunits)
4217 else if (nunits == gather_off_nunits / 2)
4219 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4222 for (i = 0; i < gather_off_nunits; ++i)
4223 sel[i] = i | nunits;
4225 perm_mask = gen_perm_mask (gather_off_vectype, sel);
4226 gcc_assert (perm_mask != NULL_TREE);
4228 else if (nunits == gather_off_nunits * 2)
4230 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4233 for (i = 0; i < nunits; ++i)
4234 sel[i] = i < gather_off_nunits
4235 ? i : i + nunits - gather_off_nunits;
4237 perm_mask = gen_perm_mask (vectype, sel);
4238 gcc_assert (perm_mask != NULL_TREE);
4244 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4245 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4246 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4247 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4248 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4249 scaletype = TREE_VALUE (arglist);
4250 gcc_checking_assert (types_compatible_p (srctype, rettype)
4251 && types_compatible_p (srctype, masktype));
4253 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4255 ptr = fold_convert (ptrtype, gather_base);
4256 if (!is_gimple_min_invariant (ptr))
4258 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4259 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4260 gcc_assert (!new_bb);
4263 /* Currently we support only unconditional gather loads,
4264 so mask should be all ones. */
4265 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4266 mask = build_int_cst (TREE_TYPE (masktype), -1);
4267 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4271 for (j = 0; j < 6; ++j)
4273 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4274 mask = build_real (TREE_TYPE (masktype), r);
4278 mask = build_vector_from_val (masktype, mask);
4279 mask = vect_init_vector (stmt, mask, masktype, NULL);
4281 scale = build_int_cst (scaletype, gather_scale);
4283 prev_stmt_info = NULL;
4284 for (j = 0; j < ncopies; ++j)
4286 if (modifier == WIDEN && (j & 1))
4287 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4288 perm_mask, stmt, gsi);
4291 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4294 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4296 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4298 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4299 == TYPE_VECTOR_SUBPARTS (idxtype));
4300 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4301 add_referenced_var (var);
4302 var = make_ssa_name (var, NULL);
4303 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4305 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4307 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4312 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4314 if (!useless_type_conversion_p (vectype, rettype))
4316 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4317 == TYPE_VECTOR_SUBPARTS (rettype));
4318 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4319 add_referenced_var (var);
4320 op = make_ssa_name (var, new_stmt);
4321 gimple_call_set_lhs (new_stmt, op);
4322 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4323 var = make_ssa_name (vec_dest, NULL);
4324 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4326 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4331 var = make_ssa_name (vec_dest, new_stmt);
4332 gimple_call_set_lhs (new_stmt, var);
4335 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4337 if (modifier == NARROW)
4344 var = permute_vec_elements (prev_res, var,
4345 perm_mask, stmt, gsi);
4346 new_stmt = SSA_NAME_DEF_STMT (var);
4349 if (prev_stmt_info == NULL)
4350 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4352 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4353 prev_stmt_info = vinfo_for_stmt (new_stmt);
4360 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4362 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4363 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4364 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4366 /* Check if the chain of loads is already vectorized. */
4367 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4369 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4372 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4373 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4375 /* VEC_NUM is the number of vect stmts to be created for this group. */
4378 strided_load = false;
4379 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4380 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4384 vec_num = group_size;
4390 group_size = vec_num = 1;
4393 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4394 gcc_assert (alignment_support_scheme);
4395 /* Targets with load-lane instructions must not require explicit
4397 gcc_assert (!load_lanes_p
4398 || alignment_support_scheme == dr_aligned
4399 || alignment_support_scheme == dr_unaligned_supported);
4401 /* In case the vectorization factor (VF) is bigger than the number
4402 of elements that we can fit in a vectype (nunits), we have to generate
4403 more than one vector stmt - i.e - we need to "unroll" the
4404 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4405 from one copy of the vector stmt to the next, in the field
4406 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4407 stages to find the correct vector defs to be used when vectorizing
4408 stmts that use the defs of the current stmt. The example below
4409 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4410 need to create 4 vectorized stmts):
4412 before vectorization:
4413 RELATED_STMT VEC_STMT
4417 step 1: vectorize stmt S1:
4418 We first create the vector stmt VS1_0, and, as usual, record a
4419 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4420 Next, we create the vector stmt VS1_1, and record a pointer to
4421 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4422 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4424 RELATED_STMT VEC_STMT
4425 VS1_0: vx0 = memref0 VS1_1 -
4426 VS1_1: vx1 = memref1 VS1_2 -
4427 VS1_2: vx2 = memref2 VS1_3 -
4428 VS1_3: vx3 = memref3 - -
4429 S1: x = load - VS1_0
4432 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4433 information we recorded in RELATED_STMT field is used to vectorize
4436 /* In case of interleaving (non-unit strided access):
4443 Vectorized loads are created in the order of memory accesses
4444 starting from the access of the first stmt of the chain:
4447 VS2: vx1 = &base + vec_size*1
4448 VS3: vx3 = &base + vec_size*2
4449 VS4: vx4 = &base + vec_size*3
4451 Then permutation statements are generated:
4453 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4454 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4457 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4458 (the order of the data-refs in the output of vect_permute_load_chain
4459 corresponds to the order of scalar stmts in the interleaving chain - see
4460 the documentation of vect_permute_load_chain()).
4461 The generation of permutation stmts and recording them in
4462 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4464 In case of both multiple types and interleaving, the vector loads and
4465 permutation stmts above are created for every copy. The result vector
4466 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4467 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4469 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4470 on a target that supports unaligned accesses (dr_unaligned_supported)
4471 we generate the following code:
4475 p = p + indx * vectype_size;
4480 Otherwise, the data reference is potentially unaligned on a target that
4481 does not support unaligned accesses (dr_explicit_realign_optimized) -
4482 then generate the following code, in which the data in each iteration is
4483 obtained by two vector loads, one from the previous iteration, and one
4484 from the current iteration:
4486 msq_init = *(floor(p1))
4487 p2 = initial_addr + VS - 1;
4488 realignment_token = call target_builtin;
4491 p2 = p2 + indx * vectype_size
4493 vec_dest = realign_load (msq, lsq, realignment_token)
4498 /* If the misalignment remains the same throughout the execution of the
4499 loop, we can create the init_addr and permutation mask at the loop
4500 preheader. Otherwise, it needs to be created inside the loop.
4501 This can only occur when vectorizing memory accesses in the inner-loop
4502 nested within an outer-loop that is being vectorized. */
4504 if (loop && nested_in_vect_loop_p (loop, stmt)
4505 && (TREE_INT_CST_LOW (DR_STEP (dr))
4506 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4508 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4509 compute_in_loop = true;
4512 if ((alignment_support_scheme == dr_explicit_realign_optimized
4513 || alignment_support_scheme == dr_explicit_realign)
4514 && !compute_in_loop)
4516 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4517 alignment_support_scheme, NULL_TREE,
4519 if (alignment_support_scheme == dr_explicit_realign_optimized)
4521 phi = SSA_NAME_DEF_STMT (msq);
4522 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4529 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4532 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4534 aggr_type = vectype;
4536 prev_stmt_info = NULL;
4537 for (j = 0; j < ncopies; j++)
4539 /* 1. Create the vector or array pointer update chain. */
4541 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4542 offset, &dummy, gsi,
4543 &ptr_incr, false, &inv_p);
4545 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4546 TYPE_SIZE_UNIT (aggr_type));
4548 if (strided_load || slp_perm)
4549 dr_chain = VEC_alloc (tree, heap, vec_num);
4555 vec_array = create_vector_array (vectype, vec_num);
4558 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4559 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4560 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4561 gimple_call_set_lhs (new_stmt, vec_array);
4562 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4563 mark_symbols_for_renaming (new_stmt);
4565 /* Extract each vector into an SSA_NAME. */
4566 for (i = 0; i < vec_num; i++)
4568 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4570 VEC_quick_push (tree, dr_chain, new_temp);
4573 /* Record the mapping between SSA_NAMEs and statements. */
4574 vect_record_strided_load_vectors (stmt, dr_chain);
4578 for (i = 0; i < vec_num; i++)
4581 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4584 /* 2. Create the vector-load in the loop. */
4585 switch (alignment_support_scheme)
4588 case dr_unaligned_supported:
4590 struct ptr_info_def *pi;
4592 = build2 (MEM_REF, vectype, dataref_ptr,
4593 build_int_cst (reference_alias_ptr_type
4594 (DR_REF (first_dr)), 0));
4595 pi = get_ptr_info (dataref_ptr);
4596 pi->align = TYPE_ALIGN_UNIT (vectype);
4597 if (alignment_support_scheme == dr_aligned)
4599 gcc_assert (aligned_access_p (first_dr));
4602 else if (DR_MISALIGNMENT (first_dr) == -1)
4604 TREE_TYPE (data_ref)
4605 = build_aligned_type (TREE_TYPE (data_ref),
4606 TYPE_ALIGN (elem_type));
4607 pi->align = TYPE_ALIGN_UNIT (elem_type);
4612 TREE_TYPE (data_ref)
4613 = build_aligned_type (TREE_TYPE (data_ref),
4614 TYPE_ALIGN (elem_type));
4615 pi->misalign = DR_MISALIGNMENT (first_dr);
4619 case dr_explicit_realign:
4624 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4626 if (compute_in_loop)
4627 msq = vect_setup_realignment (first_stmt, gsi,
4629 dr_explicit_realign,
4632 new_stmt = gimple_build_assign_with_ops
4633 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4635 (TREE_TYPE (dataref_ptr),
4636 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4637 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4638 gimple_assign_set_lhs (new_stmt, ptr);
4639 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4641 = build2 (MEM_REF, vectype, ptr,
4642 build_int_cst (reference_alias_ptr_type
4643 (DR_REF (first_dr)), 0));
4644 vec_dest = vect_create_destination_var (scalar_dest,
4646 new_stmt = gimple_build_assign (vec_dest, data_ref);
4647 new_temp = make_ssa_name (vec_dest, new_stmt);
4648 gimple_assign_set_lhs (new_stmt, new_temp);
4649 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4650 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4651 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4654 bump = size_binop (MULT_EXPR, vs_minus_1,
4655 TYPE_SIZE_UNIT (elem_type));
4656 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4657 new_stmt = gimple_build_assign_with_ops
4658 (BIT_AND_EXPR, NULL_TREE, ptr,
4661 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4662 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4663 gimple_assign_set_lhs (new_stmt, ptr);
4664 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4666 = build2 (MEM_REF, vectype, ptr,
4667 build_int_cst (reference_alias_ptr_type
4668 (DR_REF (first_dr)), 0));
4671 case dr_explicit_realign_optimized:
4672 new_stmt = gimple_build_assign_with_ops
4673 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4675 (TREE_TYPE (dataref_ptr),
4676 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4677 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4679 gimple_assign_set_lhs (new_stmt, new_temp);
4680 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4682 = build2 (MEM_REF, vectype, new_temp,
4683 build_int_cst (reference_alias_ptr_type
4684 (DR_REF (first_dr)), 0));
4689 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4690 new_stmt = gimple_build_assign (vec_dest, data_ref);
4691 new_temp = make_ssa_name (vec_dest, new_stmt);
4692 gimple_assign_set_lhs (new_stmt, new_temp);
4693 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4694 mark_symbols_for_renaming (new_stmt);
4696 /* 3. Handle explicit realignment if necessary/supported.
4698 vec_dest = realign_load (msq, lsq, realignment_token) */
4699 if (alignment_support_scheme == dr_explicit_realign_optimized
4700 || alignment_support_scheme == dr_explicit_realign)
4702 lsq = gimple_assign_lhs (new_stmt);
4703 if (!realignment_token)
4704 realignment_token = dataref_ptr;
4705 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4707 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4710 new_temp = make_ssa_name (vec_dest, new_stmt);
4711 gimple_assign_set_lhs (new_stmt, new_temp);
4712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4714 if (alignment_support_scheme == dr_explicit_realign_optimized)
4717 if (i == vec_num - 1 && j == ncopies - 1)
4718 add_phi_arg (phi, lsq,
4719 loop_latch_edge (containing_loop),
4725 /* 4. Handle invariant-load. */
4726 if (inv_p && !bb_vinfo)
4729 gimple_stmt_iterator gsi2 = *gsi;
4730 gcc_assert (!strided_load);
4733 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4736 tem = fold_convert (TREE_TYPE (vectype), tem);
4737 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4741 vec_inv = build_vector_from_val (vectype, tem);
4742 new_temp = vect_init_vector (stmt, vec_inv,
4744 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4749 tree perm_mask = perm_mask_for_reverse (vectype);
4750 new_temp = permute_vec_elements (new_temp, new_temp,
4751 perm_mask, stmt, gsi);
4752 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4755 /* Collect vector loads and later create their permutation in
4756 vect_transform_strided_load (). */
4757 if (strided_load || slp_perm)
4758 VEC_quick_push (tree, dr_chain, new_temp);
4760 /* Store vector loads in the corresponding SLP_NODE. */
4761 if (slp && !slp_perm)
4762 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4767 if (slp && !slp_perm)
4772 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4773 slp_node_instance, false))
4775 VEC_free (tree, heap, dr_chain);
4784 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4785 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4790 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4792 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4793 prev_stmt_info = vinfo_for_stmt (new_stmt);
4797 VEC_free (tree, heap, dr_chain);
4803 /* Function vect_is_simple_cond.
4806 LOOP - the loop that is being vectorized.
4807 COND - Condition that is checked for simple use.
4810 *COMP_VECTYPE - the vector type for the comparison.
4812 Returns whether a COND can be vectorized. Checks whether
4813 condition operands are supportable using vec_is_simple_use. */
4816 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
4821 enum vect_def_type dt;
4822 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4824 if (!COMPARISON_CLASS_P (cond))
4827 lhs = TREE_OPERAND (cond, 0);
4828 rhs = TREE_OPERAND (cond, 1);
4830 if (TREE_CODE (lhs) == SSA_NAME)
4832 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4833 if (!vect_is_simple_use_1 (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
4837 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4838 && TREE_CODE (lhs) != FIXED_CST)
4841 if (TREE_CODE (rhs) == SSA_NAME)
4843 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4844 if (!vect_is_simple_use_1 (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
4848 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4849 && TREE_CODE (rhs) != FIXED_CST)
4852 *comp_vectype = vectype1 ? vectype1 : vectype2;
4856 /* vectorizable_condition.
4858 Check if STMT is conditional modify expression that can be vectorized.
4859 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4860 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4863 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4864 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4865 else caluse if it is 2).
4867 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4870 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4871 gimple *vec_stmt, tree reduc_def, int reduc_index,
4874 tree scalar_dest = NULL_TREE;
4875 tree vec_dest = NULL_TREE;
4876 tree cond_expr, then_clause, else_clause;
4877 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4878 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4880 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4881 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4882 tree vec_compare, vec_cond_expr;
4884 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4886 enum vect_def_type dt, dts[4];
4887 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4889 enum tree_code code;
4890 stmt_vec_info prev_stmt_info = NULL;
4892 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4893 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4894 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
4896 if (slp_node || PURE_SLP_STMT (stmt_info))
4899 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4901 gcc_assert (ncopies >= 1);
4902 if (reduc_index && ncopies > 1)
4903 return false; /* FORNOW */
4905 if (reduc_index && STMT_SLP_TYPE (stmt_info))
4908 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4911 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4912 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4916 /* FORNOW: not yet supported. */
4917 if (STMT_VINFO_LIVE_P (stmt_info))
4919 if (vect_print_dump_info (REPORT_DETAILS))
4920 fprintf (vect_dump, "value used after loop.");
4924 /* Is vectorizable conditional operation? */
4925 if (!is_gimple_assign (stmt))
4928 code = gimple_assign_rhs_code (stmt);
4930 if (code != COND_EXPR)
4933 cond_expr = gimple_assign_rhs1 (stmt);
4934 then_clause = gimple_assign_rhs2 (stmt);
4935 else_clause = gimple_assign_rhs3 (stmt);
4937 if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo, &comp_vectype)
4941 if (TREE_CODE (then_clause) == SSA_NAME)
4943 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4944 if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
4945 &then_def_stmt, &def, &dt))
4948 else if (TREE_CODE (then_clause) != INTEGER_CST
4949 && TREE_CODE (then_clause) != REAL_CST
4950 && TREE_CODE (then_clause) != FIXED_CST)
4953 if (TREE_CODE (else_clause) == SSA_NAME)
4955 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4956 if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
4957 &else_def_stmt, &def, &dt))
4960 else if (TREE_CODE (else_clause) != INTEGER_CST
4961 && TREE_CODE (else_clause) != REAL_CST
4962 && TREE_CODE (else_clause) != FIXED_CST)
4967 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4968 return expand_vec_cond_expr_p (vectype, comp_vectype);
4975 vec_oprnds0 = VEC_alloc (tree, heap, 1);
4976 vec_oprnds1 = VEC_alloc (tree, heap, 1);
4977 vec_oprnds2 = VEC_alloc (tree, heap, 1);
4978 vec_oprnds3 = VEC_alloc (tree, heap, 1);
4982 scalar_dest = gimple_assign_lhs (stmt);
4983 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4985 /* Handle cond expr. */
4986 for (j = 0; j < ncopies; j++)
4988 gimple new_stmt = NULL;
4993 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
4994 VEC (slp_void_p, heap) *vec_defs;
4996 vec_defs = VEC_alloc (slp_void_p, heap, 4);
4997 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
4998 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
4999 VEC_safe_push (tree, heap, ops, then_clause);
5000 VEC_safe_push (tree, heap, ops, else_clause);
5001 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5002 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5003 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5004 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5005 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5007 VEC_free (tree, heap, ops);
5008 VEC_free (slp_void_p, heap, vec_defs);
5014 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5016 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
5017 NULL, >emp, &def, &dts[0]);
5020 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5022 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5023 NULL, >emp, &def, &dts[1]);
5024 if (reduc_index == 1)
5025 vec_then_clause = reduc_def;
5028 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5030 vect_is_simple_use (then_clause, loop_vinfo,
5031 NULL, >emp, &def, &dts[2]);
5033 if (reduc_index == 2)
5034 vec_else_clause = reduc_def;
5037 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5039 vect_is_simple_use (else_clause, loop_vinfo,
5040 NULL, >emp, &def, &dts[3]);
5046 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5047 VEC_pop (tree, vec_oprnds0));
5048 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5049 VEC_pop (tree, vec_oprnds1));
5050 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5051 VEC_pop (tree, vec_oprnds2));
5052 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5053 VEC_pop (tree, vec_oprnds3));
5058 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5059 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5060 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5061 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5064 /* Arguments are ready. Create the new vector stmt. */
5065 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5067 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5068 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5069 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5071 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5072 vec_cond_lhs, vec_cond_rhs);
5073 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5074 vec_compare, vec_then_clause, vec_else_clause);
5076 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5077 new_temp = make_ssa_name (vec_dest, new_stmt);
5078 gimple_assign_set_lhs (new_stmt, new_temp);
5079 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5081 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5088 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5090 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5092 prev_stmt_info = vinfo_for_stmt (new_stmt);
5095 VEC_free (tree, heap, vec_oprnds0);
5096 VEC_free (tree, heap, vec_oprnds1);
5097 VEC_free (tree, heap, vec_oprnds2);
5098 VEC_free (tree, heap, vec_oprnds3);
5104 /* Make sure the statement is vectorizable. */
5107 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5109 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5111 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5113 tree scalar_type, vectype;
5114 gimple pattern_stmt, pattern_def_stmt;
5116 if (vect_print_dump_info (REPORT_DETAILS))
5118 fprintf (vect_dump, "==> examining statement: ");
5119 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5122 if (gimple_has_volatile_ops (stmt))
5124 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5125 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5130 /* Skip stmts that do not need to be vectorized. In loops this is expected
5132 - the COND_EXPR which is the loop exit condition
5133 - any LABEL_EXPRs in the loop
5134 - computations that are used only for array indexing or loop control.
5135 In basic blocks we only analyze statements that are a part of some SLP
5136 instance, therefore, all the statements are relevant.
5138 Pattern statement needs to be analyzed instead of the original statement
5139 if the original statement is not relevant. Otherwise, we analyze both
5142 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5143 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5144 && !STMT_VINFO_LIVE_P (stmt_info))
5146 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5148 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5149 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5151 /* Analyze PATTERN_STMT instead of the original stmt. */
5152 stmt = pattern_stmt;
5153 stmt_info = vinfo_for_stmt (pattern_stmt);
5154 if (vect_print_dump_info (REPORT_DETAILS))
5156 fprintf (vect_dump, "==> examining pattern statement: ");
5157 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5162 if (vect_print_dump_info (REPORT_DETAILS))
5163 fprintf (vect_dump, "irrelevant.");
5168 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5170 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5171 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5173 /* Analyze PATTERN_STMT too. */
5174 if (vect_print_dump_info (REPORT_DETAILS))
5176 fprintf (vect_dump, "==> examining pattern statement: ");
5177 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5180 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5184 if (is_pattern_stmt_p (stmt_info)
5185 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5186 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5187 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5189 /* Analyze def stmt of STMT if it's a pattern stmt. */
5190 if (vect_print_dump_info (REPORT_DETAILS))
5192 fprintf (vect_dump, "==> examining pattern def statement: ");
5193 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5196 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5201 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5203 case vect_internal_def:
5206 case vect_reduction_def:
5207 case vect_nested_cycle:
5208 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5209 || relevance == vect_used_in_outer_by_reduction
5210 || relevance == vect_unused_in_scope));
5213 case vect_induction_def:
5214 case vect_constant_def:
5215 case vect_external_def:
5216 case vect_unknown_def_type:
5223 gcc_assert (PURE_SLP_STMT (stmt_info));
5225 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5226 if (vect_print_dump_info (REPORT_DETAILS))
5228 fprintf (vect_dump, "get vectype for scalar type: ");
5229 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5232 vectype = get_vectype_for_scalar_type (scalar_type);
5235 if (vect_print_dump_info (REPORT_DETAILS))
5237 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5238 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5243 if (vect_print_dump_info (REPORT_DETAILS))
5245 fprintf (vect_dump, "vectype: ");
5246 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5249 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5252 if (STMT_VINFO_RELEVANT_P (stmt_info))
5254 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5255 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5256 *need_to_vectorize = true;
5261 && (STMT_VINFO_RELEVANT_P (stmt_info)
5262 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5263 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5264 || vectorizable_shift (stmt, NULL, NULL, NULL)
5265 || vectorizable_operation (stmt, NULL, NULL, NULL)
5266 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5267 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5268 || vectorizable_call (stmt, NULL, NULL)
5269 || vectorizable_store (stmt, NULL, NULL, NULL)
5270 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5271 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5275 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5276 || vectorizable_shift (stmt, NULL, NULL, node)
5277 || vectorizable_operation (stmt, NULL, NULL, node)
5278 || vectorizable_assignment (stmt, NULL, NULL, node)
5279 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5280 || vectorizable_store (stmt, NULL, NULL, node)
5281 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5286 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5288 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5289 fprintf (vect_dump, "supported: ");
5290 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5299 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5300 need extra handling, except for vectorizable reductions. */
5301 if (STMT_VINFO_LIVE_P (stmt_info)
5302 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5303 ok = vectorizable_live_operation (stmt, NULL, NULL);
5307 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5309 fprintf (vect_dump, "not vectorized: live stmt not ");
5310 fprintf (vect_dump, "supported: ");
5311 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5321 /* Function vect_transform_stmt.
5323 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5326 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5327 bool *strided_store, slp_tree slp_node,
5328 slp_instance slp_node_instance)
5330 bool is_store = false;
5331 gimple vec_stmt = NULL;
5332 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5335 switch (STMT_VINFO_TYPE (stmt_info))
5337 case type_demotion_vec_info_type:
5338 case type_promotion_vec_info_type:
5339 case type_conversion_vec_info_type:
5340 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5344 case induc_vec_info_type:
5345 gcc_assert (!slp_node);
5346 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5350 case shift_vec_info_type:
5351 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5355 case op_vec_info_type:
5356 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5360 case assignment_vec_info_type:
5361 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5365 case load_vec_info_type:
5366 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5371 case store_vec_info_type:
5372 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5374 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5376 /* In case of interleaving, the whole chain is vectorized when the
5377 last store in the chain is reached. Store stmts before the last
5378 one are skipped, and there vec_stmt_info shouldn't be freed
5380 *strided_store = true;
5381 if (STMT_VINFO_VEC_STMT (stmt_info))
5388 case condition_vec_info_type:
5389 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5393 case call_vec_info_type:
5394 gcc_assert (!slp_node);
5395 done = vectorizable_call (stmt, gsi, &vec_stmt);
5396 stmt = gsi_stmt (*gsi);
5399 case reduc_vec_info_type:
5400 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5405 if (!STMT_VINFO_LIVE_P (stmt_info))
5407 if (vect_print_dump_info (REPORT_DETAILS))
5408 fprintf (vect_dump, "stmt not supported.");
5413 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5414 is being vectorized, but outside the immediately enclosing loop. */
5416 && STMT_VINFO_LOOP_VINFO (stmt_info)
5417 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5418 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5419 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5420 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5421 || STMT_VINFO_RELEVANT (stmt_info) ==
5422 vect_used_in_outer_by_reduction))
5424 struct loop *innerloop = LOOP_VINFO_LOOP (
5425 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5426 imm_use_iterator imm_iter;
5427 use_operand_p use_p;
5431 if (vect_print_dump_info (REPORT_DETAILS))
5432 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5434 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5435 (to be used when vectorizing outer-loop stmts that use the DEF of
5437 if (gimple_code (stmt) == GIMPLE_PHI)
5438 scalar_dest = PHI_RESULT (stmt);
5440 scalar_dest = gimple_assign_lhs (stmt);
5442 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5444 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5446 exit_phi = USE_STMT (use_p);
5447 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5452 /* Handle stmts whose DEF is used outside the loop-nest that is
5453 being vectorized. */
5454 if (STMT_VINFO_LIVE_P (stmt_info)
5455 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5457 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5462 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5468 /* Remove a group of stores (for SLP or interleaving), free their
5472 vect_remove_stores (gimple first_stmt)
5474 gimple next = first_stmt;
5476 gimple_stmt_iterator next_si;
5480 /* Free the attached stmt_vec_info and remove the stmt. */
5481 next_si = gsi_for_stmt (next);
5482 gsi_remove (&next_si, true);
5483 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5484 free_stmt_vec_info (next);
5490 /* Function new_stmt_vec_info.
5492 Create and initialize a new stmt_vec_info struct for STMT. */
5495 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5496 bb_vec_info bb_vinfo)
5499 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5501 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5502 STMT_VINFO_STMT (res) = stmt;
5503 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5504 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5505 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5506 STMT_VINFO_LIVE_P (res) = false;
5507 STMT_VINFO_VECTYPE (res) = NULL;
5508 STMT_VINFO_VEC_STMT (res) = NULL;
5509 STMT_VINFO_VECTORIZABLE (res) = true;
5510 STMT_VINFO_IN_PATTERN_P (res) = false;
5511 STMT_VINFO_RELATED_STMT (res) = NULL;
5512 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5513 STMT_VINFO_DATA_REF (res) = NULL;
5515 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5516 STMT_VINFO_DR_OFFSET (res) = NULL;
5517 STMT_VINFO_DR_INIT (res) = NULL;
5518 STMT_VINFO_DR_STEP (res) = NULL;
5519 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5521 if (gimple_code (stmt) == GIMPLE_PHI
5522 && is_loop_header_bb_p (gimple_bb (stmt)))
5523 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5525 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5527 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5528 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5529 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5530 STMT_SLP_TYPE (res) = loop_vect;
5531 GROUP_FIRST_ELEMENT (res) = NULL;
5532 GROUP_NEXT_ELEMENT (res) = NULL;
5533 GROUP_SIZE (res) = 0;
5534 GROUP_STORE_COUNT (res) = 0;
5535 GROUP_GAP (res) = 0;
5536 GROUP_SAME_DR_STMT (res) = NULL;
5537 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5543 /* Create a hash table for stmt_vec_info. */
5546 init_stmt_vec_info_vec (void)
5548 gcc_assert (!stmt_vec_info_vec);
5549 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5553 /* Free hash table for stmt_vec_info. */
5556 free_stmt_vec_info_vec (void)
5558 gcc_assert (stmt_vec_info_vec);
5559 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5563 /* Free stmt vectorization related info. */
5566 free_stmt_vec_info (gimple stmt)
5568 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5573 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5574 set_vinfo_for_stmt (stmt, NULL);
5579 /* Function get_vectype_for_scalar_type_and_size.
5581 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5585 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5587 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5588 enum machine_mode simd_mode;
5589 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5596 /* We can't build a vector type of elements with alignment bigger than
5598 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5601 /* For vector types of elements whose mode precision doesn't
5602 match their types precision we use a element type of mode
5603 precision. The vectorization routines will have to make sure
5604 they support the proper result truncation/extension. */
5605 if (INTEGRAL_TYPE_P (scalar_type)
5606 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5607 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5608 TYPE_UNSIGNED (scalar_type));
5610 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5611 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5614 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5615 When the component mode passes the above test simply use a type
5616 corresponding to that mode. The theory is that any use that
5617 would cause problems with this will disable vectorization anyway. */
5618 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5619 && !INTEGRAL_TYPE_P (scalar_type)
5620 && !POINTER_TYPE_P (scalar_type))
5621 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5623 /* If no size was supplied use the mode the target prefers. Otherwise
5624 lookup a vector mode of the specified size. */
5626 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5628 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5629 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5633 vectype = build_vector_type (scalar_type, nunits);
5634 if (vect_print_dump_info (REPORT_DETAILS))
5636 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5637 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5643 if (vect_print_dump_info (REPORT_DETAILS))
5645 fprintf (vect_dump, "vectype: ");
5646 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5649 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5650 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5652 if (vect_print_dump_info (REPORT_DETAILS))
5653 fprintf (vect_dump, "mode not supported by target.");
5660 unsigned int current_vector_size;
5662 /* Function get_vectype_for_scalar_type.
5664 Returns the vector type corresponding to SCALAR_TYPE as supported
5668 get_vectype_for_scalar_type (tree scalar_type)
5671 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5672 current_vector_size);
5674 && current_vector_size == 0)
5675 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5679 /* Function get_same_sized_vectype
5681 Returns a vector type corresponding to SCALAR_TYPE of size
5682 VECTOR_TYPE if supported by the target. */
5685 get_same_sized_vectype (tree scalar_type, tree vector_type)
5687 return get_vectype_for_scalar_type_and_size
5688 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5691 /* Function vect_is_simple_use.
5694 LOOP_VINFO - the vect info of the loop that is being vectorized.
5695 BB_VINFO - the vect info of the basic block that is being vectorized.
5696 OPERAND - operand of a stmt in the loop or bb.
5697 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5699 Returns whether a stmt with OPERAND can be vectorized.
5700 For loops, supportable operands are constants, loop invariants, and operands
5701 that are defined by the current iteration of the loop. Unsupportable
5702 operands are those that are defined by a previous iteration of the loop (as
5703 is the case in reduction/induction computations).
5704 For basic blocks, supportable operands are constants and bb invariants.
5705 For now, operands defined outside the basic block are not supported. */
5708 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5709 bb_vec_info bb_vinfo, gimple *def_stmt,
5710 tree *def, enum vect_def_type *dt)
5713 stmt_vec_info stmt_vinfo;
5714 struct loop *loop = NULL;
5717 loop = LOOP_VINFO_LOOP (loop_vinfo);
5722 if (vect_print_dump_info (REPORT_DETAILS))
5724 fprintf (vect_dump, "vect_is_simple_use: operand ");
5725 print_generic_expr (vect_dump, operand, TDF_SLIM);
5728 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5730 *dt = vect_constant_def;
5734 if (is_gimple_min_invariant (operand))
5737 *dt = vect_external_def;
5741 if (TREE_CODE (operand) == PAREN_EXPR)
5743 if (vect_print_dump_info (REPORT_DETAILS))
5744 fprintf (vect_dump, "non-associatable copy.");
5745 operand = TREE_OPERAND (operand, 0);
5748 if (TREE_CODE (operand) != SSA_NAME)
5750 if (vect_print_dump_info (REPORT_DETAILS))
5751 fprintf (vect_dump, "not ssa-name.");
5755 *def_stmt = SSA_NAME_DEF_STMT (operand);
5756 if (*def_stmt == NULL)
5758 if (vect_print_dump_info (REPORT_DETAILS))
5759 fprintf (vect_dump, "no def_stmt.");
5763 if (vect_print_dump_info (REPORT_DETAILS))
5765 fprintf (vect_dump, "def_stmt: ");
5766 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5769 /* Empty stmt is expected only in case of a function argument.
5770 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5771 if (gimple_nop_p (*def_stmt))
5774 *dt = vect_external_def;
5778 bb = gimple_bb (*def_stmt);
5780 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5781 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5782 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5783 *dt = vect_external_def;
5786 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5787 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5790 if (*dt == vect_unknown_def_type)
5792 if (vect_print_dump_info (REPORT_DETAILS))
5793 fprintf (vect_dump, "Unsupported pattern.");
5797 if (vect_print_dump_info (REPORT_DETAILS))
5798 fprintf (vect_dump, "type of def: %d.",*dt);
5800 switch (gimple_code (*def_stmt))
5803 *def = gimple_phi_result (*def_stmt);
5807 *def = gimple_assign_lhs (*def_stmt);
5811 *def = gimple_call_lhs (*def_stmt);
5816 if (vect_print_dump_info (REPORT_DETAILS))
5817 fprintf (vect_dump, "unsupported defining stmt: ");
5824 /* Function vect_is_simple_use_1.
5826 Same as vect_is_simple_use_1 but also determines the vector operand
5827 type of OPERAND and stores it to *VECTYPE. If the definition of
5828 OPERAND is vect_uninitialized_def, vect_constant_def or
5829 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5830 is responsible to compute the best suited vector type for the
5834 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5835 bb_vec_info bb_vinfo, gimple *def_stmt,
5836 tree *def, enum vect_def_type *dt, tree *vectype)
5838 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5841 /* Now get a vector type if the def is internal, otherwise supply
5842 NULL_TREE and leave it up to the caller to figure out a proper
5843 type for the use stmt. */
5844 if (*dt == vect_internal_def
5845 || *dt == vect_induction_def
5846 || *dt == vect_reduction_def
5847 || *dt == vect_double_reduction_def
5848 || *dt == vect_nested_cycle)
5850 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5852 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5853 && !STMT_VINFO_RELEVANT (stmt_info)
5854 && !STMT_VINFO_LIVE_P (stmt_info))
5855 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5857 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5858 gcc_assert (*vectype != NULL_TREE);
5860 else if (*dt == vect_uninitialized_def
5861 || *dt == vect_constant_def
5862 || *dt == vect_external_def)
5863 *vectype = NULL_TREE;
5871 /* Function supportable_widening_operation
5873 Check whether an operation represented by the code CODE is a
5874 widening operation that is supported by the target platform in
5875 vector form (i.e., when operating on arguments of type VECTYPE_IN
5876 producing a result of type VECTYPE_OUT).
5878 Widening operations we currently support are NOP (CONVERT), FLOAT
5879 and WIDEN_MULT. This function checks if these operations are supported
5880 by the target platform either directly (via vector tree-codes), or via
5884 - CODE1 and CODE2 are codes of vector operations to be used when
5885 vectorizing the operation, if available.
5886 - DECL1 and DECL2 are decls of target builtin functions to be used
5887 when vectorizing the operation, if available. In this case,
5888 CODE1 and CODE2 are CALL_EXPR.
5889 - MULTI_STEP_CVT determines the number of required intermediate steps in
5890 case of multi-step conversion (like char->short->int - in that case
5891 MULTI_STEP_CVT will be 1).
5892 - INTERM_TYPES contains the intermediate type required to perform the
5893 widening operation (short in the above example). */
5896 supportable_widening_operation (enum tree_code code, gimple stmt,
5897 tree vectype_out, tree vectype_in,
5898 tree *decl1, tree *decl2,
5899 enum tree_code *code1, enum tree_code *code2,
5900 int *multi_step_cvt,
5901 VEC (tree, heap) **interm_types)
5903 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5904 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5905 struct loop *vect_loop = NULL;
5907 enum machine_mode vec_mode;
5908 enum insn_code icode1, icode2;
5909 optab optab1, optab2;
5910 tree vectype = vectype_in;
5911 tree wide_vectype = vectype_out;
5912 enum tree_code c1, c2;
5914 tree prev_type, intermediate_type;
5915 enum machine_mode intermediate_mode, prev_mode;
5916 optab optab3, optab4;
5918 *multi_step_cvt = 0;
5920 vect_loop = LOOP_VINFO_LOOP (loop_info);
5922 /* The result of a vectorized widening operation usually requires two vectors
5923 (because the widened results do not fit into one vector). The generated
5924 vector results would normally be expected to be generated in the same
5925 order as in the original scalar computation, i.e. if 8 results are
5926 generated in each vector iteration, they are to be organized as follows:
5927 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5929 However, in the special case that the result of the widening operation is
5930 used in a reduction computation only, the order doesn't matter (because
5931 when vectorizing a reduction we change the order of the computation).
5932 Some targets can take advantage of this and generate more efficient code.
5933 For example, targets like Altivec, that support widen_mult using a sequence
5934 of {mult_even,mult_odd} generate the following vectors:
5935 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5937 When vectorizing outer-loops, we execute the inner-loop sequentially
5938 (each vectorized inner-loop iteration contributes to VF outer-loop
5939 iterations in parallel). We therefore don't allow to change the order
5940 of the computation in the inner-loop during outer-loop vectorization. */
5943 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5944 && !nested_in_vect_loop_p (vect_loop, stmt))
5950 && code == WIDEN_MULT_EXPR
5951 && targetm.vectorize.builtin_mul_widen_even
5952 && targetm.vectorize.builtin_mul_widen_even (vectype)
5953 && targetm.vectorize.builtin_mul_widen_odd
5954 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5956 if (vect_print_dump_info (REPORT_DETAILS))
5957 fprintf (vect_dump, "Unordered widening operation detected.");
5959 *code1 = *code2 = CALL_EXPR;
5960 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5961 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5967 case WIDEN_MULT_EXPR:
5968 c1 = VEC_WIDEN_MULT_LO_EXPR;
5969 c2 = VEC_WIDEN_MULT_HI_EXPR;
5972 case WIDEN_LSHIFT_EXPR:
5973 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5974 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5978 c1 = VEC_UNPACK_LO_EXPR;
5979 c2 = VEC_UNPACK_HI_EXPR;
5983 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5984 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5987 case FIX_TRUNC_EXPR:
5988 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5989 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5990 computing the operation. */
5997 if (BYTES_BIG_ENDIAN)
5999 enum tree_code ctmp = c1;
6004 if (code == FIX_TRUNC_EXPR)
6006 /* The signedness is determined from output operand. */
6007 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6008 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6012 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6013 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6016 if (!optab1 || !optab2)
6019 vec_mode = TYPE_MODE (vectype);
6020 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6021 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6027 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6028 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6031 /* Check if it's a multi-step conversion that can be done using intermediate
6034 prev_type = vectype;
6035 prev_mode = vec_mode;
6037 if (!CONVERT_EXPR_CODE_P (code))
6040 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6041 intermediate steps in promotion sequence. We try
6042 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6044 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6045 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6047 intermediate_mode = insn_data[icode1].operand[0].mode;
6049 = lang_hooks.types.type_for_mode (intermediate_mode,
6050 TYPE_UNSIGNED (prev_type));
6051 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6052 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6054 if (!optab3 || !optab4
6055 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6056 || insn_data[icode1].operand[0].mode != intermediate_mode
6057 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6058 || insn_data[icode2].operand[0].mode != intermediate_mode
6059 || ((icode1 = optab_handler (optab3, intermediate_mode))
6060 == CODE_FOR_nothing)
6061 || ((icode2 = optab_handler (optab4, intermediate_mode))
6062 == CODE_FOR_nothing))
6065 VEC_quick_push (tree, *interm_types, intermediate_type);
6066 (*multi_step_cvt)++;
6068 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6069 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6072 prev_type = intermediate_type;
6073 prev_mode = intermediate_mode;
6076 VEC_free (tree, heap, *interm_types);
6081 /* Function supportable_narrowing_operation
6083 Check whether an operation represented by the code CODE is a
6084 narrowing operation that is supported by the target platform in
6085 vector form (i.e., when operating on arguments of type VECTYPE_IN
6086 and producing a result of type VECTYPE_OUT).
6088 Narrowing operations we currently support are NOP (CONVERT) and
6089 FIX_TRUNC. This function checks if these operations are supported by
6090 the target platform directly via vector tree-codes.
6093 - CODE1 is the code of a vector operation to be used when
6094 vectorizing the operation, if available.
6095 - MULTI_STEP_CVT determines the number of required intermediate steps in
6096 case of multi-step conversion (like int->short->char - in that case
6097 MULTI_STEP_CVT will be 1).
6098 - INTERM_TYPES contains the intermediate type required to perform the
6099 narrowing operation (short in the above example). */
6102 supportable_narrowing_operation (enum tree_code code,
6103 tree vectype_out, tree vectype_in,
6104 enum tree_code *code1, int *multi_step_cvt,
6105 VEC (tree, heap) **interm_types)
6107 enum machine_mode vec_mode;
6108 enum insn_code icode1;
6109 optab optab1, interm_optab;
6110 tree vectype = vectype_in;
6111 tree narrow_vectype = vectype_out;
6113 tree intermediate_type;
6114 enum machine_mode intermediate_mode, prev_mode;
6118 *multi_step_cvt = 0;
6122 c1 = VEC_PACK_TRUNC_EXPR;
6125 case FIX_TRUNC_EXPR:
6126 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6130 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6131 tree code and optabs used for computing the operation. */
6138 if (code == FIX_TRUNC_EXPR)
6139 /* The signedness is determined from output operand. */
6140 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6142 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6147 vec_mode = TYPE_MODE (vectype);
6148 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6153 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6156 /* Check if it's a multi-step conversion that can be done using intermediate
6158 prev_mode = vec_mode;
6159 if (code == FIX_TRUNC_EXPR)
6160 uns = TYPE_UNSIGNED (vectype_out);
6162 uns = TYPE_UNSIGNED (vectype);
6164 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6165 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6166 costly than signed. */
6167 if (code == FIX_TRUNC_EXPR && uns)
6169 enum insn_code icode2;
6172 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6174 = optab_for_tree_code (c1, intermediate_type, optab_default);
6175 if (interm_optab != NULL
6176 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6177 && insn_data[icode1].operand[0].mode
6178 == insn_data[icode2].operand[0].mode)
6181 optab1 = interm_optab;
6186 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6187 intermediate steps in promotion sequence. We try
6188 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6189 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6190 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6192 intermediate_mode = insn_data[icode1].operand[0].mode;
6194 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6196 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6199 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6200 || insn_data[icode1].operand[0].mode != intermediate_mode
6201 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6202 == CODE_FOR_nothing))
6205 VEC_quick_push (tree, *interm_types, intermediate_type);
6206 (*multi_step_cvt)++;
6208 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6211 prev_mode = intermediate_mode;
6212 optab1 = interm_optab;
6215 VEC_free (tree, heap, *interm_types);