1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232 imm_use_iterator imm_iter;
236 *relevant = vect_unused_in_scope;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
299 /* STMT has a data_ref. FORNOW this means that its of one of
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
328 Function process_use.
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
337 Generally, LIVE_P and RELEVANT are used to define the liveness and
338 relevance info of the DEF_STMT of this USE:
339 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
340 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
342 - case 1: If USE is used only for address computations (e.g. array indexing),
343 which does not need to be directly vectorized, then the liveness/relevance
344 of the respective DEF_STMT is left unchanged.
345 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
346 skip DEF_STMT cause it had already been processed.
347 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
348 be modified accordingly.
350 Return true if everything is as expected. Return false otherwise. */
353 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
354 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
358 stmt_vec_info dstmt_vinfo;
359 basic_block bb, def_bb;
362 enum vect_def_type dt;
364 /* case 1: we are only interested in uses that need to be vectorized. Uses
365 that are used for address computation are not considered relevant. */
366 if (!exist_non_indexing_operands_for_use_p (use, stmt))
369 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
372 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 if (!def_stmt || gimple_nop_p (def_stmt))
379 def_bb = gimple_bb (def_stmt);
380 if (!flow_bb_inside_loop_p (loop, def_bb))
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump, "def_stmt is out of loop.");
387 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
388 DEF_STMT must have already been processed, because this should be the
389 only way that STMT, which is a reduction-phi, was put in the worklist,
390 as there should be no other uses for DEF_STMT in the loop. So we just
391 check that everything is as expected, and we are done. */
392 dstmt_vinfo = vinfo_for_stmt (def_stmt);
393 bb = gimple_bb (stmt);
394 if (gimple_code (stmt) == GIMPLE_PHI
395 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
396 && gimple_code (def_stmt) != GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
398 && bb->loop_father == def_bb->loop_father)
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
402 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
403 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
404 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
405 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
406 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 /* case 3a: outer-loop stmt defining an inner-loop stmt:
411 outer-loop-header-bb:
417 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
419 if (vect_print_dump_info (REPORT_DETAILS))
420 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424 case vect_unused_in_scope:
425 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
426 vect_used_in_scope : vect_unused_in_scope;
429 case vect_used_in_outer_by_reduction:
430 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
431 relevant = vect_used_by_reduction;
434 case vect_used_in_outer:
435 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
436 relevant = vect_used_in_scope;
439 case vect_used_in_scope:
447 /* case 3b: inner-loop stmt defining an outer-loop stmt:
448 outer-loop-header-bb:
452 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
454 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
456 if (vect_print_dump_info (REPORT_DETAILS))
457 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461 case vect_unused_in_scope:
462 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
463 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
464 vect_used_in_outer_by_reduction : vect_unused_in_scope;
467 case vect_used_by_reduction:
468 relevant = vect_used_in_outer_by_reduction;
471 case vect_used_in_scope:
472 relevant = vect_used_in_outer;
480 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
481 is_pattern_stmt_p (stmt_vinfo));
486 /* Function vect_mark_stmts_to_be_vectorized.
488 Not all stmts in the loop need to be vectorized. For example:
497 Stmt 1 and 3 do not need to be vectorized, because loop control and
498 addressing of vectorized data-refs are handled differently.
500 This pass detects such stmts. */
503 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
505 VEC(gimple,heap) *worklist;
506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
507 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
508 unsigned int nbbs = loop->num_nodes;
509 gimple_stmt_iterator si;
512 stmt_vec_info stmt_vinfo;
516 enum vect_relevant relevant, tmp_relevant;
517 enum vect_def_type def_type;
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
522 worklist = VEC_alloc (gimple, heap, 64);
524 /* 1. Init worklist. */
525 for (i = 0; i < nbbs; i++)
528 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
531 if (vect_print_dump_info (REPORT_DETAILS))
533 fprintf (vect_dump, "init: phi relevant? ");
534 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
537 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
538 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
540 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
542 stmt = gsi_stmt (si);
543 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump, "init: stmt relevant? ");
546 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
549 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
550 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
554 /* 2. Process_worklist */
555 while (VEC_length (gimple, worklist) > 0)
560 stmt = VEC_pop (gimple, worklist);
561 if (vect_print_dump_info (REPORT_DETAILS))
563 fprintf (vect_dump, "worklist: examine stmt: ");
564 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
567 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
568 (DEF_STMT) as relevant/irrelevant and live/dead according to the
569 liveness and relevance properties of STMT. */
570 stmt_vinfo = vinfo_for_stmt (stmt);
571 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
572 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
574 /* Generally, the liveness and relevance properties of STMT are
575 propagated as is to the DEF_STMTs of its USEs:
576 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
577 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
579 One exception is when STMT has been identified as defining a reduction
580 variable; in this case we set the liveness/relevance as follows:
582 relevant = vect_used_by_reduction
583 This is because we distinguish between two kinds of relevant stmts -
584 those that are used by a reduction computation, and those that are
585 (also) used by a regular computation. This allows us later on to
586 identify stmts that are used solely by a reduction, and therefore the
587 order of the results that they produce does not have to be kept. */
589 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
590 tmp_relevant = relevant;
593 case vect_reduction_def:
594 switch (tmp_relevant)
596 case vect_unused_in_scope:
597 relevant = vect_used_by_reduction;
600 case vect_used_by_reduction:
601 if (gimple_code (stmt) == GIMPLE_PHI)
606 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "unsupported use of reduction.");
609 VEC_free (gimple, heap, worklist);
616 case vect_nested_cycle:
617 if (tmp_relevant != vect_unused_in_scope
618 && tmp_relevant != vect_used_in_outer_by_reduction
619 && tmp_relevant != vect_used_in_outer)
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "unsupported use of nested cycle.");
624 VEC_free (gimple, heap, worklist);
631 case vect_double_reduction_def:
632 if (tmp_relevant != vect_unused_in_scope
633 && tmp_relevant != vect_used_by_reduction)
635 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "unsupported use of double reduction.");
638 VEC_free (gimple, heap, worklist);
649 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
651 /* Pattern statements are not inserted into the code, so
652 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
653 have to scan the RHS or function arguments instead. */
654 if (is_gimple_assign (stmt))
656 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
657 tree op = gimple_assign_rhs1 (stmt);
660 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
662 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
663 live_p, relevant, &worklist)
664 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
665 live_p, relevant, &worklist))
667 VEC_free (gimple, heap, worklist);
672 for (; i < gimple_num_ops (stmt); i++)
674 op = gimple_op (stmt, i);
675 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
678 VEC_free (gimple, heap, worklist);
683 else if (is_gimple_call (stmt))
685 for (i = 0; i < gimple_call_num_args (stmt); i++)
687 tree arg = gimple_call_arg (stmt, i);
688 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
691 VEC_free (gimple, heap, worklist);
698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
700 tree op = USE_FROM_PTR (use_p);
701 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
704 VEC_free (gimple, heap, worklist);
708 } /* while worklist */
710 VEC_free (gimple, heap, worklist);
715 /* Get cost by calling cost target builtin. */
718 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
720 tree dummy_type = NULL;
723 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
728 /* Get cost for STMT. */
731 cost_for_stmt (gimple stmt)
733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
735 switch (STMT_VINFO_TYPE (stmt_info))
737 case load_vec_info_type:
738 return vect_get_stmt_cost (scalar_load);
739 case store_vec_info_type:
740 return vect_get_stmt_cost (scalar_store);
741 case op_vec_info_type:
742 case condition_vec_info_type:
743 case assignment_vec_info_type:
744 case reduc_vec_info_type:
745 case induc_vec_info_type:
746 case type_promotion_vec_info_type:
747 case type_demotion_vec_info_type:
748 case type_conversion_vec_info_type:
749 case call_vec_info_type:
750 return vect_get_stmt_cost (scalar_stmt);
751 case undef_vec_info_type:
757 /* Function vect_model_simple_cost.
759 Models cost for simple operations, i.e. those that only emit ncopies of a
760 single op. Right now, this does not account for multiple insns that could
761 be generated for the single vector op. We will handle that shortly. */
764 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
765 enum vect_def_type *dt, slp_tree slp_node)
768 int inside_cost = 0, outside_cost = 0;
770 /* The SLP costs were already calculated during SLP tree build. */
771 if (PURE_SLP_STMT (stmt_info))
774 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
776 /* FORNOW: Assuming maximum 2 args per stmts. */
777 for (i = 0; i < 2; i++)
779 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
780 outside_cost += vect_get_stmt_cost (vector_stmt);
783 if (vect_print_dump_info (REPORT_COST))
784 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
785 "outside_cost = %d .", inside_cost, outside_cost);
787 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
788 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
789 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
793 /* Function vect_cost_strided_group_size
795 For strided load or store, return the group_size only if it is the first
796 load or store of a group, else return 1. This ensures that group size is
797 only returned once per group. */
800 vect_cost_strided_group_size (stmt_vec_info stmt_info)
802 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
804 if (first_stmt == STMT_VINFO_STMT (stmt_info))
805 return GROUP_SIZE (stmt_info);
811 /* Function vect_model_store_cost
813 Models cost for stores. In the case of strided accesses, one access
814 has the overhead of the strided access attributed to it. */
817 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
818 bool store_lanes_p, enum vect_def_type dt,
822 unsigned int inside_cost = 0, outside_cost = 0;
823 struct data_reference *first_dr;
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
830 if (dt == vect_constant_def || dt == vect_external_def)
831 outside_cost = vect_get_stmt_cost (scalar_to_vec);
833 /* Strided access? */
834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
838 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
843 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
844 group_size = vect_cost_strided_group_size (stmt_info);
847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
849 /* Not a strided access. */
853 first_dr = STMT_VINFO_DATA_REF (stmt_info);
856 /* We assume that the cost of a single store-lanes instruction is
857 equivalent to the cost of GROUP_SIZE separate stores. If a strided
858 access is instead being provided by a permute-and-store operation,
859 include the cost of the permutes. */
860 if (!store_lanes_p && group_size > 1)
862 /* Uses a high and low interleave operation for each needed permute. */
863 inside_cost = ncopies * exact_log2(group_size) * group_size
864 * vect_get_stmt_cost (vector_stmt);
866 if (vect_print_dump_info (REPORT_COST))
867 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
872 /* Costs of the stores. */
873 vect_get_store_cost (first_dr, ncopies, &inside_cost);
875 if (vect_print_dump_info (REPORT_COST))
876 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
877 "outside_cost = %d .", inside_cost, outside_cost);
879 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
880 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
881 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
885 /* Calculate cost of DR's memory access. */
887 vect_get_store_cost (struct data_reference *dr, int ncopies,
888 unsigned int *inside_cost)
890 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
892 switch (alignment_support_scheme)
896 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: aligned.");
904 case dr_unaligned_supported:
906 gimple stmt = DR_STMT (dr);
907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
910 /* Here, we assign an additional cost for the unaligned store. */
911 *inside_cost += ncopies
912 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
913 vectype, DR_MISALIGNMENT (dr));
915 if (vect_print_dump_info (REPORT_COST))
916 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
928 /* Function vect_model_load_cost
930 Models cost for loads. In the case of strided accesses, the last access
931 has the overhead of the strided access attributed to it. Since unaligned
932 accesses are supported for loads, we also account for the costs of the
933 access scheme chosen. */
936 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
942 unsigned int inside_cost = 0, outside_cost = 0;
944 /* The SLP costs were already calculated during SLP tree build. */
945 if (PURE_SLP_STMT (stmt_info))
948 /* Strided accesses? */
949 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
950 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
952 group_size = vect_cost_strided_group_size (stmt_info);
953 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
955 /* Not a strided access. */
962 /* We assume that the cost of a single load-lanes instruction is
963 equivalent to the cost of GROUP_SIZE separate loads. If a strided
964 access is instead being provided by a load-and-permute operation,
965 include the cost of the permutes. */
966 if (!load_lanes_p && group_size > 1)
968 /* Uses an even and odd extract operations for each needed permute. */
969 inside_cost = ncopies * exact_log2(group_size) * group_size
970 * vect_get_stmt_cost (vector_stmt);
972 if (vect_print_dump_info (REPORT_COST))
973 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
977 /* The loads themselves. */
978 vect_get_load_cost (first_dr, ncopies,
979 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
981 &inside_cost, &outside_cost);
983 if (vect_print_dump_info (REPORT_COST))
984 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
985 "outside_cost = %d .", inside_cost, outside_cost);
987 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
988 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
989 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
993 /* Calculate cost of DR's memory access. */
995 vect_get_load_cost (struct data_reference *dr, int ncopies,
996 bool add_realign_cost, unsigned int *inside_cost,
997 unsigned int *outside_cost)
999 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 switch (alignment_support_scheme)
1005 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1007 if (vect_print_dump_info (REPORT_COST))
1008 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1012 case dr_unaligned_supported:
1014 gimple stmt = DR_STMT (dr);
1015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1016 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1018 /* Here, we assign an additional cost for the unaligned load. */
1019 *inside_cost += ncopies
1020 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1021 vectype, DR_MISALIGNMENT (dr));
1022 if (vect_print_dump_info (REPORT_COST))
1023 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1028 case dr_explicit_realign:
1030 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1031 + vect_get_stmt_cost (vector_stmt));
1033 /* FIXME: If the misalignment remains fixed across the iterations of
1034 the containing loop, the following cost should be added to the
1036 if (targetm.vectorize.builtin_mask_for_load)
1037 *inside_cost += vect_get_stmt_cost (vector_stmt);
1041 case dr_explicit_realign_optimized:
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1047 /* Unaligned software pipeline has a load of an address, an initial
1048 load, and possibly a mask operation to "prime" the loop. However,
1049 if this is an access in a group of loads, which provide strided
1050 access, then the above cost should only be considered for one
1051 access in the group. Inside the loop, there is a load op
1052 and a realignment op. */
1054 if (add_realign_cost)
1056 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1057 if (targetm.vectorize.builtin_mask_for_load)
1058 *outside_cost += vect_get_stmt_cost (vector_stmt);
1061 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1062 + vect_get_stmt_cost (vector_stmt));
1072 /* Function vect_init_vector.
1074 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1075 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1076 is not NULL. Otherwise, place the initialization at the loop preheader.
1077 Return the DEF of INIT_STMT.
1078 It will be used in the vectorization of STMT. */
1081 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1082 gimple_stmt_iterator *gsi)
1084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1092 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1093 add_referenced_var (new_var);
1094 init_stmt = gimple_build_assign (new_var, vector_var);
1095 new_temp = make_ssa_name (new_var, init_stmt);
1096 gimple_assign_set_lhs (init_stmt, new_temp);
1099 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1108 if (nested_in_vect_loop_p (loop, stmt))
1111 pe = loop_preheader_edge (loop);
1112 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1113 gcc_assert (!new_bb);
1117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1119 gimple_stmt_iterator gsi_bb_start;
1121 gcc_assert (bb_vinfo);
1122 bb = BB_VINFO_BB (bb_vinfo);
1123 gsi_bb_start = gsi_after_labels (bb);
1124 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "created new init_stmt: ");
1131 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1134 vec_oprnd = gimple_assign_lhs (init_stmt);
1139 /* Function vect_get_vec_def_for_operand.
1141 OP is an operand in STMT. This function returns a (vector) def that will be
1142 used in the vectorized stmt for STMT.
1144 In the case that OP is an SSA_NAME which is defined in the loop, then
1145 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1147 In case OP is an invariant or constant, a new stmt that creates a vector def
1148 needs to be introduced. */
1151 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1156 stmt_vec_info def_stmt_info = NULL;
1157 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1158 unsigned int nunits;
1159 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1165 enum vect_def_type dt;
1169 if (vect_print_dump_info (REPORT_DETAILS))
1171 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1172 print_generic_expr (vect_dump, op, TDF_SLIM);
1175 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1177 gcc_assert (is_simple_use);
1178 if (vect_print_dump_info (REPORT_DETAILS))
1182 fprintf (vect_dump, "def = ");
1183 print_generic_expr (vect_dump, def, TDF_SLIM);
1187 fprintf (vect_dump, " def_stmt = ");
1188 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1194 /* Case 1: operand is a constant. */
1195 case vect_constant_def:
1197 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1198 gcc_assert (vector_type);
1199 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1204 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1205 if (vect_print_dump_info (REPORT_DETAILS))
1206 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1208 vec_cst = build_vector_from_val (vector_type,
1209 fold_convert (TREE_TYPE (vector_type),
1211 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1214 /* Case 2: operand is defined outside the loop - loop invariant. */
1215 case vect_external_def:
1217 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1218 gcc_assert (vector_type);
1219 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1224 /* Create 'vec_inv = {inv,inv,..,inv}' */
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "Create vector_inv.");
1228 for (i = nunits - 1; i >= 0; --i)
1230 t = tree_cons (NULL_TREE, def, t);
1233 /* FIXME: use build_constructor directly. */
1234 vec_inv = build_constructor_from_list (vector_type, t);
1235 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1238 /* Case 3: operand is defined inside the loop. */
1239 case vect_internal_def:
1242 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1244 /* Get the def from the vectorized stmt. */
1245 def_stmt_info = vinfo_for_stmt (def_stmt);
1247 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1248 /* Get vectorized pattern statement. */
1250 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1251 && !STMT_VINFO_RELEVANT (def_stmt_info))
1252 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1253 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1254 gcc_assert (vec_stmt);
1255 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1256 vec_oprnd = PHI_RESULT (vec_stmt);
1257 else if (is_gimple_call (vec_stmt))
1258 vec_oprnd = gimple_call_lhs (vec_stmt);
1260 vec_oprnd = gimple_assign_lhs (vec_stmt);
1264 /* Case 4: operand is defined by a loop header phi - reduction */
1265 case vect_reduction_def:
1266 case vect_double_reduction_def:
1267 case vect_nested_cycle:
1271 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1272 loop = (gimple_bb (def_stmt))->loop_father;
1274 /* Get the def before the loop */
1275 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1276 return get_initial_def_for_reduction (stmt, op, scalar_def);
1279 /* Case 5: operand is defined by loop-header phi - induction. */
1280 case vect_induction_def:
1282 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1284 /* Get the def from the vectorized stmt. */
1285 def_stmt_info = vinfo_for_stmt (def_stmt);
1286 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1287 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1288 vec_oprnd = PHI_RESULT (vec_stmt);
1290 vec_oprnd = gimple_get_lhs (vec_stmt);
1300 /* Function vect_get_vec_def_for_stmt_copy
1302 Return a vector-def for an operand. This function is used when the
1303 vectorized stmt to be created (by the caller to this function) is a "copy"
1304 created in case the vectorized result cannot fit in one vector, and several
1305 copies of the vector-stmt are required. In this case the vector-def is
1306 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1307 of the stmt that defines VEC_OPRND.
1308 DT is the type of the vector def VEC_OPRND.
1311 In case the vectorization factor (VF) is bigger than the number
1312 of elements that can fit in a vectype (nunits), we have to generate
1313 more than one vector stmt to vectorize the scalar stmt. This situation
1314 arises when there are multiple data-types operated upon in the loop; the
1315 smallest data-type determines the VF, and as a result, when vectorizing
1316 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1317 vector stmt (each computing a vector of 'nunits' results, and together
1318 computing 'VF' results in each iteration). This function is called when
1319 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1320 which VF=16 and nunits=4, so the number of copies required is 4):
1322 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1324 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1325 VS1.1: vx.1 = memref1 VS1.2
1326 VS1.2: vx.2 = memref2 VS1.3
1327 VS1.3: vx.3 = memref3
1329 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1330 VSnew.1: vz1 = vx.1 + ... VSnew.2
1331 VSnew.2: vz2 = vx.2 + ... VSnew.3
1332 VSnew.3: vz3 = vx.3 + ...
1334 The vectorization of S1 is explained in vectorizable_load.
1335 The vectorization of S2:
1336 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1337 the function 'vect_get_vec_def_for_operand' is called to
1338 get the relevant vector-def for each operand of S2. For operand x it
1339 returns the vector-def 'vx.0'.
1341 To create the remaining copies of the vector-stmt (VSnew.j), this
1342 function is called to get the relevant vector-def for each operand. It is
1343 obtained from the respective VS1.j stmt, which is recorded in the
1344 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1346 For example, to obtain the vector-def 'vx.1' in order to create the
1347 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1348 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1349 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1350 and return its def ('vx.1').
1351 Overall, to create the above sequence this function will be called 3 times:
1352 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1353 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1354 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1357 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1359 gimple vec_stmt_for_operand;
1360 stmt_vec_info def_stmt_info;
1362 /* Do nothing; can reuse same def. */
1363 if (dt == vect_external_def || dt == vect_constant_def )
1366 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1367 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1368 gcc_assert (def_stmt_info);
1369 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1370 gcc_assert (vec_stmt_for_operand);
1371 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1372 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1373 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1375 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1380 /* Get vectorized definitions for the operands to create a copy of an original
1381 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1384 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1385 VEC(tree,heap) **vec_oprnds0,
1386 VEC(tree,heap) **vec_oprnds1)
1388 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1390 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1391 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1393 if (vec_oprnds1 && *vec_oprnds1)
1395 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1396 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1397 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1402 /* Get vectorized definitions for OP0 and OP1.
1403 REDUC_INDEX is the index of reduction operand in case of reduction,
1404 and -1 otherwise. */
1407 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1408 VEC (tree, heap) **vec_oprnds0,
1409 VEC (tree, heap) **vec_oprnds1,
1410 slp_tree slp_node, int reduc_index)
1414 int nops = (op1 == NULL_TREE) ? 1 : 2;
1415 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1416 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1418 VEC_quick_push (tree, ops, op0);
1420 VEC_quick_push (tree, ops, op1);
1422 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1424 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1426 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1428 VEC_free (tree, heap, ops);
1429 VEC_free (slp_void_p, heap, vec_defs);
1435 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1436 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1437 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1441 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1442 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1443 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1449 /* Function vect_finish_stmt_generation.
1451 Insert a new stmt. */
1454 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1455 gimple_stmt_iterator *gsi)
1457 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1458 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1459 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1461 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1463 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1465 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1468 if (vect_print_dump_info (REPORT_DETAILS))
1470 fprintf (vect_dump, "add new stmt: ");
1471 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1474 gimple_set_location (vec_stmt, gimple_location (stmt));
1477 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1478 a function declaration if the target has a vectorized version
1479 of the function, or NULL_TREE if the function cannot be vectorized. */
1482 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1484 tree fndecl = gimple_call_fndecl (call);
1486 /* We only handle functions that do not read or clobber memory -- i.e.
1487 const or novops ones. */
1488 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1492 || TREE_CODE (fndecl) != FUNCTION_DECL
1493 || !DECL_BUILT_IN (fndecl))
1496 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1500 /* Function vectorizable_call.
1502 Check if STMT performs a function call that can be vectorized.
1503 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1504 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1505 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1508 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1513 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1514 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1515 tree vectype_out, vectype_in;
1518 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1519 tree fndecl, new_temp, def, rhs_type;
1521 enum vect_def_type dt[3]
1522 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1523 gimple new_stmt = NULL;
1525 VEC(tree, heap) *vargs = NULL;
1526 enum { NARROW, NONE, WIDEN } modifier;
1530 /* FORNOW: unsupported in basic block SLP. */
1531 gcc_assert (loop_vinfo);
1533 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1536 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1539 /* FORNOW: SLP not supported. */
1540 if (STMT_SLP_TYPE (stmt_info))
1543 /* Is STMT a vectorizable call? */
1544 if (!is_gimple_call (stmt))
1547 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1550 if (stmt_can_throw_internal (stmt))
1553 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1555 /* Process function arguments. */
1556 rhs_type = NULL_TREE;
1557 vectype_in = NULL_TREE;
1558 nargs = gimple_call_num_args (stmt);
1560 /* Bail out if the function has more than three arguments, we do not have
1561 interesting builtin functions to vectorize with more than two arguments
1562 except for fma. No arguments is also not good. */
1563 if (nargs == 0 || nargs > 3)
1566 for (i = 0; i < nargs; i++)
1570 op = gimple_call_arg (stmt, i);
1572 /* We can only handle calls with arguments of the same type. */
1574 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1576 if (vect_print_dump_info (REPORT_DETAILS))
1577 fprintf (vect_dump, "argument types differ.");
1581 rhs_type = TREE_TYPE (op);
1583 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1584 &def_stmt, &def, &dt[i], &opvectype))
1586 if (vect_print_dump_info (REPORT_DETAILS))
1587 fprintf (vect_dump, "use not simple.");
1592 vectype_in = opvectype;
1594 && opvectype != vectype_in)
1596 if (vect_print_dump_info (REPORT_DETAILS))
1597 fprintf (vect_dump, "argument vector types differ.");
1601 /* If all arguments are external or constant defs use a vector type with
1602 the same size as the output vector type. */
1604 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1606 gcc_assert (vectype_in);
1609 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "no vectype for scalar type ");
1612 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1619 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1620 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1621 if (nunits_in == nunits_out / 2)
1623 else if (nunits_out == nunits_in)
1625 else if (nunits_out == nunits_in / 2)
1630 /* For now, we only vectorize functions if a target specific builtin
1631 is available. TODO -- in some cases, it might be profitable to
1632 insert the calls for pieces of the vector, in order to be able
1633 to vectorize other operations in the loop. */
1634 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1635 if (fndecl == NULL_TREE)
1637 if (vect_print_dump_info (REPORT_DETAILS))
1638 fprintf (vect_dump, "function is not vectorizable.");
1643 gcc_assert (!gimple_vuse (stmt));
1645 if (modifier == NARROW)
1646 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1648 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1650 /* Sanity check: make sure that at least one copy of the vectorized stmt
1651 needs to be generated. */
1652 gcc_assert (ncopies >= 1);
1654 if (!vec_stmt) /* transformation not required. */
1656 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1657 if (vect_print_dump_info (REPORT_DETAILS))
1658 fprintf (vect_dump, "=== vectorizable_call ===");
1659 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1665 if (vect_print_dump_info (REPORT_DETAILS))
1666 fprintf (vect_dump, "transform call.");
1669 scalar_dest = gimple_call_lhs (stmt);
1670 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1672 prev_stmt_info = NULL;
1676 for (j = 0; j < ncopies; ++j)
1678 /* Build argument list for the vectorized call. */
1680 vargs = VEC_alloc (tree, heap, nargs);
1682 VEC_truncate (tree, vargs, 0);
1684 for (i = 0; i < nargs; i++)
1686 op = gimple_call_arg (stmt, i);
1689 = vect_get_vec_def_for_operand (op, stmt, NULL);
1692 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1694 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1697 VEC_quick_push (tree, vargs, vec_oprnd0);
1700 new_stmt = gimple_build_call_vec (fndecl, vargs);
1701 new_temp = make_ssa_name (vec_dest, new_stmt);
1702 gimple_call_set_lhs (new_stmt, new_temp);
1704 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1705 mark_symbols_for_renaming (new_stmt);
1708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1712 prev_stmt_info = vinfo_for_stmt (new_stmt);
1718 for (j = 0; j < ncopies; ++j)
1720 /* Build argument list for the vectorized call. */
1722 vargs = VEC_alloc (tree, heap, nargs * 2);
1724 VEC_truncate (tree, vargs, 0);
1726 for (i = 0; i < nargs; i++)
1728 op = gimple_call_arg (stmt, i);
1732 = vect_get_vec_def_for_operand (op, stmt, NULL);
1734 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1738 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1740 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1742 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1745 VEC_quick_push (tree, vargs, vec_oprnd0);
1746 VEC_quick_push (tree, vargs, vec_oprnd1);
1749 new_stmt = gimple_build_call_vec (fndecl, vargs);
1750 new_temp = make_ssa_name (vec_dest, new_stmt);
1751 gimple_call_set_lhs (new_stmt, new_temp);
1753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1754 mark_symbols_for_renaming (new_stmt);
1757 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1761 prev_stmt_info = vinfo_for_stmt (new_stmt);
1764 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1769 /* No current target implements this case. */
1773 VEC_free (tree, heap, vargs);
1775 /* Update the exception handling table with the vector stmt if necessary. */
1776 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1777 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1779 /* The call in STMT might prevent it from being removed in dce.
1780 We however cannot remove it here, due to the way the ssa name
1781 it defines is mapped to the new definition. So just replace
1782 rhs of the statement with something harmless. */
1784 type = TREE_TYPE (scalar_dest);
1785 if (is_pattern_stmt_p (stmt_info))
1786 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1788 lhs = gimple_call_lhs (stmt);
1789 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1790 set_vinfo_for_stmt (new_stmt, stmt_info);
1791 set_vinfo_for_stmt (stmt, NULL);
1792 STMT_VINFO_STMT (stmt_info) = new_stmt;
1793 gsi_replace (gsi, new_stmt, false);
1794 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1800 /* Function vect_gen_widened_results_half
1802 Create a vector stmt whose code, type, number of arguments, and result
1803 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1804 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1805 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1806 needs to be created (DECL is a function-decl of a target-builtin).
1807 STMT is the original scalar stmt that we are vectorizing. */
1810 vect_gen_widened_results_half (enum tree_code code,
1812 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1813 tree vec_dest, gimple_stmt_iterator *gsi,
1819 /* Generate half of the widened result: */
1820 if (code == CALL_EXPR)
1822 /* Target specific support */
1823 if (op_type == binary_op)
1824 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1826 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1827 new_temp = make_ssa_name (vec_dest, new_stmt);
1828 gimple_call_set_lhs (new_stmt, new_temp);
1832 /* Generic support */
1833 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1834 if (op_type != binary_op)
1836 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1838 new_temp = make_ssa_name (vec_dest, new_stmt);
1839 gimple_assign_set_lhs (new_stmt, new_temp);
1841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1847 /* Get vectorized definitions for loop-based vectorization. For the first
1848 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1849 scalar operand), and for the rest we get a copy with
1850 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1851 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1852 The vectors are collected into VEC_OPRNDS. */
1855 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1856 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1860 /* Get first vector operand. */
1861 /* All the vector operands except the very first one (that is scalar oprnd)
1863 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1864 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1866 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1868 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1870 /* Get second vector operand. */
1871 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1872 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1876 /* For conversion in multiple steps, continue to get operands
1879 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1883 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1884 For multi-step conversions store the resulting vectors and call the function
1888 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
1889 int multi_step_cvt, gimple stmt,
1890 VEC (tree, heap) *vec_dsts,
1891 gimple_stmt_iterator *gsi,
1892 slp_tree slp_node, enum tree_code code,
1893 stmt_vec_info *prev_stmt_info)
1896 tree vop0, vop1, new_tmp, vec_dest;
1898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1900 vec_dest = VEC_pop (tree, vec_dsts);
1902 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
1904 /* Create demotion operation. */
1905 vop0 = VEC_index (tree, *vec_oprnds, i);
1906 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
1907 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
1908 new_tmp = make_ssa_name (vec_dest, new_stmt);
1909 gimple_assign_set_lhs (new_stmt, new_tmp);
1910 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1913 /* Store the resulting vector for next recursive call. */
1914 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
1917 /* This is the last step of the conversion sequence. Store the
1918 vectors in SLP_NODE or in vector info of the scalar statement
1919 (or in STMT_VINFO_RELATED_STMT chain). */
1921 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1924 if (!*prev_stmt_info)
1925 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1927 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
1929 *prev_stmt_info = vinfo_for_stmt (new_stmt);
1934 /* For multi-step demotion operations we first generate demotion operations
1935 from the source type to the intermediate types, and then combine the
1936 results (stored in VEC_OPRNDS) in demotion operation to the destination
1940 /* At each level of recursion we have half of the operands we had at the
1942 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
1943 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
1944 stmt, vec_dsts, gsi, slp_node,
1945 VEC_PACK_TRUNC_EXPR,
1949 VEC_quick_push (tree, vec_dsts, vec_dest);
1953 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
1954 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
1955 the resulting vectors and call the function recursively. */
1958 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
1959 VEC (tree, heap) **vec_oprnds1,
1960 gimple stmt, tree vec_dest,
1961 gimple_stmt_iterator *gsi,
1962 enum tree_code code1,
1963 enum tree_code code2, tree decl1,
1964 tree decl2, int op_type)
1967 tree vop0, vop1, new_tmp1, new_tmp2;
1968 gimple new_stmt1, new_stmt2;
1969 VEC (tree, heap) *vec_tmp = NULL;
1971 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
1972 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
1974 if (op_type == binary_op)
1975 vop1 = VEC_index (tree, *vec_oprnds1, i);
1979 /* Generate the two halves of promotion operation. */
1980 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
1981 op_type, vec_dest, gsi, stmt);
1982 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
1983 op_type, vec_dest, gsi, stmt);
1984 if (is_gimple_call (new_stmt1))
1986 new_tmp1 = gimple_call_lhs (new_stmt1);
1987 new_tmp2 = gimple_call_lhs (new_stmt2);
1991 new_tmp1 = gimple_assign_lhs (new_stmt1);
1992 new_tmp2 = gimple_assign_lhs (new_stmt2);
1995 /* Store the results for the next step. */
1996 VEC_quick_push (tree, vec_tmp, new_tmp1);
1997 VEC_quick_push (tree, vec_tmp, new_tmp2);
2000 VEC_free (tree, heap, *vec_oprnds0);
2001 *vec_oprnds0 = vec_tmp;
2005 /* Check if STMT performs a conversion operation, that can be vectorized.
2006 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2007 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2011 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2012 gimple *vec_stmt, slp_tree slp_node)
2016 tree op0, op1 = NULL_TREE;
2017 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2019 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2020 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2021 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2022 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2026 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2027 gimple new_stmt = NULL;
2028 stmt_vec_info prev_stmt_info;
2031 tree vectype_out, vectype_in;
2033 tree lhs_type, rhs_type;
2034 enum { NARROW, NONE, WIDEN } modifier;
2035 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2037 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2038 int multi_step_cvt = 0;
2039 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2040 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2042 enum machine_mode rhs_mode;
2043 unsigned short fltsz;
2045 /* Is STMT a vectorizable conversion? */
2047 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2050 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2053 if (!is_gimple_assign (stmt))
2056 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2059 code = gimple_assign_rhs_code (stmt);
2060 if (!CONVERT_EXPR_CODE_P (code)
2061 && code != FIX_TRUNC_EXPR
2062 && code != FLOAT_EXPR
2063 && code != WIDEN_MULT_EXPR
2064 && code != WIDEN_LSHIFT_EXPR)
2067 op_type = TREE_CODE_LENGTH (code);
2069 /* Check types of lhs and rhs. */
2070 scalar_dest = gimple_assign_lhs (stmt);
2071 lhs_type = TREE_TYPE (scalar_dest);
2072 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2074 op0 = gimple_assign_rhs1 (stmt);
2075 rhs_type = TREE_TYPE (op0);
2077 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2078 && !((INTEGRAL_TYPE_P (lhs_type)
2079 && INTEGRAL_TYPE_P (rhs_type))
2080 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2081 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2084 if ((INTEGRAL_TYPE_P (lhs_type)
2085 && (TYPE_PRECISION (lhs_type)
2086 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2087 || (INTEGRAL_TYPE_P (rhs_type)
2088 && (TYPE_PRECISION (rhs_type)
2089 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2091 if (vect_print_dump_info (REPORT_DETAILS))
2093 "type conversion to/from bit-precision unsupported.");
2097 /* Check the operands of the operation. */
2098 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2099 &def_stmt, &def, &dt[0], &vectype_in))
2101 if (vect_print_dump_info (REPORT_DETAILS))
2102 fprintf (vect_dump, "use not simple.");
2105 if (op_type == binary_op)
2109 op1 = gimple_assign_rhs2 (stmt);
2110 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2111 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2113 if (CONSTANT_CLASS_P (op0))
2114 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2115 &def_stmt, &def, &dt[1], &vectype_in);
2117 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2122 if (vect_print_dump_info (REPORT_DETAILS))
2123 fprintf (vect_dump, "use not simple.");
2128 /* If op0 is an external or constant defs use a vector type of
2129 the same size as the output vector type. */
2131 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2133 gcc_assert (vectype_in);
2136 if (vect_print_dump_info (REPORT_DETAILS))
2138 fprintf (vect_dump, "no vectype for scalar type ");
2139 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2145 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2146 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2147 if (nunits_in < nunits_out)
2149 else if (nunits_out == nunits_in)
2154 /* Multiple types in SLP are handled by creating the appropriate number of
2155 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2157 if (slp_node || PURE_SLP_STMT (stmt_info))
2159 else if (modifier == NARROW)
2160 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2162 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2164 /* Sanity check: make sure that at least one copy of the vectorized stmt
2165 needs to be generated. */
2166 gcc_assert (ncopies >= 1);
2168 /* Supportable by target? */
2172 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2174 if (supportable_convert_operation (code, vectype_out, vectype_in,
2179 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "conversion not supported by target.");
2184 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2185 &decl1, &decl2, &code1, &code2,
2186 &multi_step_cvt, &interm_types))
2188 /* Binary widening operation can only be supported directly by the
2190 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2194 if (code != FLOAT_EXPR
2195 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2196 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2199 rhs_mode = TYPE_MODE (rhs_type);
2200 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2201 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2202 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2203 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2206 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2207 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2208 if (cvt_type == NULL_TREE)
2211 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2213 if (!supportable_convert_operation (code, vectype_out,
2214 cvt_type, &decl1, &codecvt1))
2217 else if (!supportable_widening_operation (code, stmt, vectype_out,
2218 cvt_type, &decl1, &decl2,
2219 &codecvt1, &codecvt2,
2224 gcc_assert (multi_step_cvt == 0);
2226 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2227 vectype_in, NULL, NULL, &code1,
2228 &code2, &multi_step_cvt,
2233 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2236 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2237 codecvt2 = ERROR_MARK;
2241 VEC_safe_push (tree, heap, interm_types, cvt_type);
2242 cvt_type = NULL_TREE;
2247 gcc_assert (op_type == unary_op);
2248 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2249 &code1, &multi_step_cvt,
2253 if (code != FIX_TRUNC_EXPR
2254 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2255 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2258 rhs_mode = TYPE_MODE (rhs_type);
2260 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2261 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2262 if (cvt_type == NULL_TREE)
2264 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2267 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2268 &code1, &multi_step_cvt,
2277 if (!vec_stmt) /* transformation not required. */
2279 if (vect_print_dump_info (REPORT_DETAILS))
2280 fprintf (vect_dump, "=== vectorizable_conversion ===");
2281 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2282 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2283 else if (modifier == NARROW)
2285 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2286 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2290 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2291 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2293 VEC_free (tree, heap, interm_types);
2298 if (vect_print_dump_info (REPORT_DETAILS))
2299 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2301 if (op_type == binary_op)
2303 if (CONSTANT_CLASS_P (op0))
2304 op0 = fold_convert (TREE_TYPE (op1), op0);
2305 else if (CONSTANT_CLASS_P (op1))
2306 op1 = fold_convert (TREE_TYPE (op0), op1);
2309 /* In case of multi-step conversion, we first generate conversion operations
2310 to the intermediate types, and then from that types to the final one.
2311 We create vector destinations for the intermediate type (TYPES) received
2312 from supportable_*_operation, and store them in the correct order
2313 for future use in vect_create_vectorized_*_stmts (). */
2314 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2315 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2316 VEC_quick_push (tree, vec_dsts, vec_dest);
2320 for (i = VEC_length (tree, interm_types) - 1;
2321 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2323 vec_dest = vect_create_destination_var (scalar_dest,
2325 VEC_quick_push (tree, vec_dsts, vec_dest);
2330 vec_dest = vect_create_destination_var (scalar_dest, cvt_type);
2334 if (modifier == NONE)
2335 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2336 else if (modifier == WIDEN)
2338 vec_oprnds0 = VEC_alloc (tree, heap,
2340 ? vect_pow2 (multi_step_cvt) : 1));
2341 if (op_type == binary_op)
2342 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2345 vec_oprnds0 = VEC_alloc (tree, heap,
2347 ? vect_pow2 (multi_step_cvt) : 1));
2349 else if (code == WIDEN_LSHIFT_EXPR)
2350 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2353 prev_stmt_info = NULL;
2357 for (j = 0; j < ncopies; j++)
2360 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2363 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2365 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2367 /* Arguments are ready, create the new vector stmt. */
2368 if (code1 == CALL_EXPR)
2370 new_stmt = gimple_build_call (decl1, 1, vop0);
2371 new_temp = make_ssa_name (vec_dest, new_stmt);
2372 gimple_call_set_lhs (new_stmt, new_temp);
2376 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2377 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2379 new_temp = make_ssa_name (vec_dest, new_stmt);
2380 gimple_assign_set_lhs (new_stmt, new_temp);
2383 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2385 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2390 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2392 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2393 prev_stmt_info = vinfo_for_stmt (new_stmt);
2398 /* In case the vectorization factor (VF) is bigger than the number
2399 of elements that we can fit in a vectype (nunits), we have to
2400 generate more than one vector stmt - i.e - we need to "unroll"
2401 the vector stmt by a factor VF/nunits. */
2402 for (j = 0; j < ncopies; j++)
2409 if (code == WIDEN_LSHIFT_EXPR)
2414 /* Store vec_oprnd1 for every vector stmt to be created
2415 for SLP_NODE. We check during the analysis that all
2416 the shift arguments are the same. */
2417 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2418 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2420 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2424 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2425 &vec_oprnds1, slp_node, -1);
2429 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2430 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2431 if (op_type == binary_op)
2433 if (code == WIDEN_LSHIFT_EXPR)
2436 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2438 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2444 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2445 VEC_truncate (tree, vec_oprnds0, 0);
2446 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2447 if (op_type == binary_op)
2449 if (code == WIDEN_LSHIFT_EXPR)
2452 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2454 VEC_truncate (tree, vec_oprnds1, 0);
2455 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2459 /* Arguments are ready. Create the new vector stmts. */
2460 for (i = multi_step_cvt; i >= 0; i--)
2462 tree this_dest = VEC_index (tree, vec_dsts, i);
2463 enum tree_code c1 = code1, c2 = code2;
2464 if (i == 0 && codecvt2 != ERROR_MARK)
2469 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2471 stmt, this_dest, gsi,
2472 c1, c2, decl1, decl2,
2476 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2480 if (codecvt1 == CALL_EXPR)
2482 new_stmt = gimple_build_call (decl1, 1, vop0);
2483 new_temp = make_ssa_name (vec_dest, new_stmt);
2484 gimple_call_set_lhs (new_stmt, new_temp);
2488 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2489 new_temp = make_ssa_name (vec_dest, NULL);
2490 new_stmt = gimple_build_assign_with_ops (codecvt1,
2495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2498 new_stmt = SSA_NAME_DEF_STMT (vop0);
2501 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2505 if (!prev_stmt_info)
2506 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2508 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2509 prev_stmt_info = vinfo_for_stmt (new_stmt);
2514 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2518 /* In case the vectorization factor (VF) is bigger than the number
2519 of elements that we can fit in a vectype (nunits), we have to
2520 generate more than one vector stmt - i.e - we need to "unroll"
2521 the vector stmt by a factor VF/nunits. */
2522 for (j = 0; j < ncopies; j++)
2526 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2530 VEC_truncate (tree, vec_oprnds0, 0);
2531 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2532 vect_pow2 (multi_step_cvt) - 1);
2535 /* Arguments are ready. Create the new vector stmts. */
2537 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2539 if (codecvt1 == CALL_EXPR)
2541 new_stmt = gimple_build_call (decl1, 1, vop0);
2542 new_temp = make_ssa_name (vec_dest, new_stmt);
2543 gimple_call_set_lhs (new_stmt, new_temp);
2547 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2548 new_temp = make_ssa_name (vec_dest, NULL);
2549 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2554 VEC_replace (tree, vec_oprnds0, i, new_temp);
2557 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2558 stmt, vec_dsts, gsi,
2563 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2567 VEC_free (tree, heap, vec_oprnds0);
2568 VEC_free (tree, heap, vec_oprnds1);
2569 VEC_free (tree, heap, vec_dsts);
2570 VEC_free (tree, heap, interm_types);
2576 /* Function vectorizable_assignment.
2578 Check if STMT performs an assignment (copy) that can be vectorized.
2579 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2580 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2581 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2584 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2585 gimple *vec_stmt, slp_tree slp_node)
2590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2591 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2592 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2596 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2597 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2600 VEC(tree,heap) *vec_oprnds = NULL;
2602 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2603 gimple new_stmt = NULL;
2604 stmt_vec_info prev_stmt_info = NULL;
2605 enum tree_code code;
2608 /* Multiple types in SLP are handled by creating the appropriate number of
2609 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2611 if (slp_node || PURE_SLP_STMT (stmt_info))
2614 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2616 gcc_assert (ncopies >= 1);
2618 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2621 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2624 /* Is vectorizable assignment? */
2625 if (!is_gimple_assign (stmt))
2628 scalar_dest = gimple_assign_lhs (stmt);
2629 if (TREE_CODE (scalar_dest) != SSA_NAME)
2632 code = gimple_assign_rhs_code (stmt);
2633 if (gimple_assign_single_p (stmt)
2634 || code == PAREN_EXPR
2635 || CONVERT_EXPR_CODE_P (code))
2636 op = gimple_assign_rhs1 (stmt);
2640 if (code == VIEW_CONVERT_EXPR)
2641 op = TREE_OPERAND (op, 0);
2643 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2644 &def_stmt, &def, &dt[0], &vectype_in))
2646 if (vect_print_dump_info (REPORT_DETAILS))
2647 fprintf (vect_dump, "use not simple.");
2651 /* We can handle NOP_EXPR conversions that do not change the number
2652 of elements or the vector size. */
2653 if ((CONVERT_EXPR_CODE_P (code)
2654 || code == VIEW_CONVERT_EXPR)
2656 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2657 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2658 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2661 /* We do not handle bit-precision changes. */
2662 if ((CONVERT_EXPR_CODE_P (code)
2663 || code == VIEW_CONVERT_EXPR)
2664 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2665 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2666 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2667 || ((TYPE_PRECISION (TREE_TYPE (op))
2668 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2669 /* But a conversion that does not change the bit-pattern is ok. */
2670 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2671 > TYPE_PRECISION (TREE_TYPE (op)))
2672 && TYPE_UNSIGNED (TREE_TYPE (op))))
2674 if (vect_print_dump_info (REPORT_DETAILS))
2675 fprintf (vect_dump, "type conversion to/from bit-precision "
2680 if (!vec_stmt) /* transformation not required. */
2682 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2683 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "=== vectorizable_assignment ===");
2685 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2690 if (vect_print_dump_info (REPORT_DETAILS))
2691 fprintf (vect_dump, "transform assignment.");
2694 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2697 for (j = 0; j < ncopies; j++)
2701 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2703 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2705 /* Arguments are ready. create the new vector stmt. */
2706 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2708 if (CONVERT_EXPR_CODE_P (code)
2709 || code == VIEW_CONVERT_EXPR)
2710 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2711 new_stmt = gimple_build_assign (vec_dest, vop);
2712 new_temp = make_ssa_name (vec_dest, new_stmt);
2713 gimple_assign_set_lhs (new_stmt, new_temp);
2714 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2716 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2723 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2725 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2727 prev_stmt_info = vinfo_for_stmt (new_stmt);
2730 VEC_free (tree, heap, vec_oprnds);
2735 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2736 either as shift by a scalar or by a vector. */
2739 vect_supportable_shift (enum tree_code code, tree scalar_type)
2742 enum machine_mode vec_mode;
2747 vectype = get_vectype_for_scalar_type (scalar_type);
2751 optab = optab_for_tree_code (code, vectype, optab_scalar);
2753 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2755 optab = optab_for_tree_code (code, vectype, optab_vector);
2757 || (optab_handler (optab, TYPE_MODE (vectype))
2758 == CODE_FOR_nothing))
2762 vec_mode = TYPE_MODE (vectype);
2763 icode = (int) optab_handler (optab, vec_mode);
2764 if (icode == CODE_FOR_nothing)
2771 /* Function vectorizable_shift.
2773 Check if STMT performs a shift operation that can be vectorized.
2774 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2775 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2776 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2779 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2780 gimple *vec_stmt, slp_tree slp_node)
2784 tree op0, op1 = NULL;
2785 tree vec_oprnd1 = NULL_TREE;
2786 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2788 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2789 enum tree_code code;
2790 enum machine_mode vec_mode;
2794 enum machine_mode optab_op2_mode;
2797 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2798 gimple new_stmt = NULL;
2799 stmt_vec_info prev_stmt_info;
2806 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2809 bool scalar_shift_arg = true;
2810 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2813 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2816 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2819 /* Is STMT a vectorizable binary/unary operation? */
2820 if (!is_gimple_assign (stmt))
2823 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2826 code = gimple_assign_rhs_code (stmt);
2828 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2829 || code == RROTATE_EXPR))
2832 scalar_dest = gimple_assign_lhs (stmt);
2833 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2834 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2835 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2837 if (vect_print_dump_info (REPORT_DETAILS))
2838 fprintf (vect_dump, "bit-precision shifts not supported.");
2842 op0 = gimple_assign_rhs1 (stmt);
2843 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2844 &def_stmt, &def, &dt[0], &vectype))
2846 if (vect_print_dump_info (REPORT_DETAILS))
2847 fprintf (vect_dump, "use not simple.");
2850 /* If op0 is an external or constant def use a vector type with
2851 the same size as the output vector type. */
2853 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2855 gcc_assert (vectype);
2858 if (vect_print_dump_info (REPORT_DETAILS))
2860 fprintf (vect_dump, "no vectype for scalar type ");
2861 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2867 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2868 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2869 if (nunits_out != nunits_in)
2872 op1 = gimple_assign_rhs2 (stmt);
2873 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2874 &dt[1], &op1_vectype))
2876 if (vect_print_dump_info (REPORT_DETAILS))
2877 fprintf (vect_dump, "use not simple.");
2882 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2886 /* Multiple types in SLP are handled by creating the appropriate number of
2887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2889 if (slp_node || PURE_SLP_STMT (stmt_info))
2892 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2894 gcc_assert (ncopies >= 1);
2896 /* Determine whether the shift amount is a vector, or scalar. If the
2897 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2899 if (dt[1] == vect_internal_def && !slp_node)
2900 scalar_shift_arg = false;
2901 else if (dt[1] == vect_constant_def
2902 || dt[1] == vect_external_def
2903 || dt[1] == vect_internal_def)
2905 /* In SLP, need to check whether the shift count is the same,
2906 in loops if it is a constant or invariant, it is always
2910 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2913 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2914 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2915 scalar_shift_arg = false;
2920 if (vect_print_dump_info (REPORT_DETAILS))
2921 fprintf (vect_dump, "operand mode requires invariant argument.");
2925 /* Vector shifted by vector. */
2926 if (!scalar_shift_arg)
2928 optab = optab_for_tree_code (code, vectype, optab_vector);
2929 if (vect_print_dump_info (REPORT_DETAILS))
2930 fprintf (vect_dump, "vector/vector shift/rotate found.");
2932 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
2933 if (op1_vectype == NULL_TREE
2934 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
2936 if (vect_print_dump_info (REPORT_DETAILS))
2937 fprintf (vect_dump, "unusable type for last operand in"
2938 " vector/vector shift/rotate.");
2942 /* See if the machine has a vector shifted by scalar insn and if not
2943 then see if it has a vector shifted by vector insn. */
2946 optab = optab_for_tree_code (code, vectype, optab_scalar);
2948 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2950 if (vect_print_dump_info (REPORT_DETAILS))
2951 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2955 optab = optab_for_tree_code (code, vectype, optab_vector);
2957 && (optab_handler (optab, TYPE_MODE (vectype))
2958 != CODE_FOR_nothing))
2960 scalar_shift_arg = false;
2962 if (vect_print_dump_info (REPORT_DETAILS))
2963 fprintf (vect_dump, "vector/vector shift/rotate found.");
2965 /* Unlike the other binary operators, shifts/rotates have
2966 the rhs being int, instead of the same type as the lhs,
2967 so make sure the scalar is the right type if we are
2968 dealing with vectors of long long/long/short/char. */
2969 if (dt[1] == vect_constant_def)
2970 op1 = fold_convert (TREE_TYPE (vectype), op1);
2971 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
2975 && TYPE_MODE (TREE_TYPE (vectype))
2976 != TYPE_MODE (TREE_TYPE (op1)))
2978 if (vect_print_dump_info (REPORT_DETAILS))
2979 fprintf (vect_dump, "unusable type for last operand in"
2980 " vector/vector shift/rotate.");
2983 if (vec_stmt && !slp_node)
2985 op1 = fold_convert (TREE_TYPE (vectype), op1);
2986 op1 = vect_init_vector (stmt, op1,
2987 TREE_TYPE (vectype), NULL);
2994 /* Supportable by target? */
2997 if (vect_print_dump_info (REPORT_DETAILS))
2998 fprintf (vect_dump, "no optab.");
3001 vec_mode = TYPE_MODE (vectype);
3002 icode = (int) optab_handler (optab, vec_mode);
3003 if (icode == CODE_FOR_nothing)
3005 if (vect_print_dump_info (REPORT_DETAILS))
3006 fprintf (vect_dump, "op not supported by target.");
3007 /* Check only during analysis. */
3008 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3009 || (vf < vect_min_worthwhile_factor (code)
3012 if (vect_print_dump_info (REPORT_DETAILS))
3013 fprintf (vect_dump, "proceeding using word mode.");
3016 /* Worthwhile without SIMD support? Check only during analysis. */
3017 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3018 && vf < vect_min_worthwhile_factor (code)
3021 if (vect_print_dump_info (REPORT_DETAILS))
3022 fprintf (vect_dump, "not worthwhile without SIMD support.");
3026 if (!vec_stmt) /* transformation not required. */
3028 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3029 if (vect_print_dump_info (REPORT_DETAILS))
3030 fprintf (vect_dump, "=== vectorizable_shift ===");
3031 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3037 if (vect_print_dump_info (REPORT_DETAILS))
3038 fprintf (vect_dump, "transform binary/unary operation.");
3041 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3043 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3044 created in the previous stages of the recursion, so no allocation is
3045 needed, except for the case of shift with scalar shift argument. In that
3046 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3047 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3048 In case of loop-based vectorization we allocate VECs of size 1. We
3049 allocate VEC_OPRNDS1 only in case of binary operation. */
3052 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3053 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3055 else if (scalar_shift_arg)
3056 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3058 prev_stmt_info = NULL;
3059 for (j = 0; j < ncopies; j++)
3064 if (scalar_shift_arg)
3066 /* Vector shl and shr insn patterns can be defined with scalar
3067 operand 2 (shift operand). In this case, use constant or loop
3068 invariant op1 directly, without extending it to vector mode
3070 optab_op2_mode = insn_data[icode].operand[2].mode;
3071 if (!VECTOR_MODE_P (optab_op2_mode))
3073 if (vect_print_dump_info (REPORT_DETAILS))
3074 fprintf (vect_dump, "operand 1 using scalar mode.");
3076 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3079 /* Store vec_oprnd1 for every vector stmt to be created
3080 for SLP_NODE. We check during the analysis that all
3081 the shift arguments are the same.
3082 TODO: Allow different constants for different vector
3083 stmts generated for an SLP instance. */
3084 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3085 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3090 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3091 (a special case for certain kind of vector shifts); otherwise,
3092 operand 1 should be of a vector type (the usual case). */
3094 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3097 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3101 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3103 /* Arguments are ready. Create the new vector stmt. */
3104 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3106 vop1 = VEC_index (tree, vec_oprnds1, i);
3107 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3108 new_temp = make_ssa_name (vec_dest, new_stmt);
3109 gimple_assign_set_lhs (new_stmt, new_temp);
3110 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3112 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3119 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3121 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3122 prev_stmt_info = vinfo_for_stmt (new_stmt);
3125 VEC_free (tree, heap, vec_oprnds0);
3126 VEC_free (tree, heap, vec_oprnds1);
3132 /* Function vectorizable_operation.
3134 Check if STMT performs a binary, unary or ternary operation that can
3136 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3137 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3138 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3141 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3142 gimple *vec_stmt, slp_tree slp_node)
3146 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3147 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3149 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3150 enum tree_code code;
3151 enum machine_mode vec_mode;
3158 enum vect_def_type dt[3]
3159 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3160 gimple new_stmt = NULL;
3161 stmt_vec_info prev_stmt_info;
3167 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3168 tree vop0, vop1, vop2;
3169 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3172 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3175 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3178 /* Is STMT a vectorizable binary/unary operation? */
3179 if (!is_gimple_assign (stmt))
3182 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3185 code = gimple_assign_rhs_code (stmt);
3187 /* For pointer addition, we should use the normal plus for
3188 the vector addition. */
3189 if (code == POINTER_PLUS_EXPR)
3192 /* Support only unary or binary operations. */
3193 op_type = TREE_CODE_LENGTH (code);
3194 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3196 if (vect_print_dump_info (REPORT_DETAILS))
3197 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3202 scalar_dest = gimple_assign_lhs (stmt);
3203 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3205 /* Most operations cannot handle bit-precision types without extra
3207 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3208 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3209 /* Exception are bitwise binary operations. */
3210 && code != BIT_IOR_EXPR
3211 && code != BIT_XOR_EXPR
3212 && code != BIT_AND_EXPR)
3214 if (vect_print_dump_info (REPORT_DETAILS))
3215 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3219 op0 = gimple_assign_rhs1 (stmt);
3220 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3221 &def_stmt, &def, &dt[0], &vectype))
3223 if (vect_print_dump_info (REPORT_DETAILS))
3224 fprintf (vect_dump, "use not simple.");
3227 /* If op0 is an external or constant def use a vector type with
3228 the same size as the output vector type. */
3230 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3232 gcc_assert (vectype);
3235 if (vect_print_dump_info (REPORT_DETAILS))
3237 fprintf (vect_dump, "no vectype for scalar type ");
3238 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3244 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3245 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3246 if (nunits_out != nunits_in)
3249 if (op_type == binary_op || op_type == ternary_op)
3251 op1 = gimple_assign_rhs2 (stmt);
3252 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
3255 if (vect_print_dump_info (REPORT_DETAILS))
3256 fprintf (vect_dump, "use not simple.");
3260 if (op_type == ternary_op)
3262 op2 = gimple_assign_rhs3 (stmt);
3263 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3266 if (vect_print_dump_info (REPORT_DETAILS))
3267 fprintf (vect_dump, "use not simple.");
3273 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3277 /* Multiple types in SLP are handled by creating the appropriate number of
3278 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3280 if (slp_node || PURE_SLP_STMT (stmt_info))
3283 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3285 gcc_assert (ncopies >= 1);
3287 /* Shifts are handled in vectorizable_shift (). */
3288 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3289 || code == RROTATE_EXPR)
3292 optab = optab_for_tree_code (code, vectype, optab_default);
3294 /* Supportable by target? */
3297 if (vect_print_dump_info (REPORT_DETAILS))
3298 fprintf (vect_dump, "no optab.");
3301 vec_mode = TYPE_MODE (vectype);
3302 icode = (int) optab_handler (optab, vec_mode);
3303 if (icode == CODE_FOR_nothing)
3305 if (vect_print_dump_info (REPORT_DETAILS))
3306 fprintf (vect_dump, "op not supported by target.");
3307 /* Check only during analysis. */
3308 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3309 || (vf < vect_min_worthwhile_factor (code)
3312 if (vect_print_dump_info (REPORT_DETAILS))
3313 fprintf (vect_dump, "proceeding using word mode.");
3316 /* Worthwhile without SIMD support? Check only during analysis. */
3317 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3318 && vf < vect_min_worthwhile_factor (code)
3321 if (vect_print_dump_info (REPORT_DETAILS))
3322 fprintf (vect_dump, "not worthwhile without SIMD support.");
3326 if (!vec_stmt) /* transformation not required. */
3328 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3329 if (vect_print_dump_info (REPORT_DETAILS))
3330 fprintf (vect_dump, "=== vectorizable_operation ===");
3331 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3337 if (vect_print_dump_info (REPORT_DETAILS))
3338 fprintf (vect_dump, "transform binary/unary operation.");
3341 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3343 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3344 created in the previous stages of the recursion, so no allocation is
3345 needed, except for the case of shift with scalar shift argument. In that
3346 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3347 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3348 In case of loop-based vectorization we allocate VECs of size 1. We
3349 allocate VEC_OPRNDS1 only in case of binary operation. */
3352 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3353 if (op_type == binary_op || op_type == ternary_op)
3354 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3355 if (op_type == ternary_op)
3356 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3359 /* In case the vectorization factor (VF) is bigger than the number
3360 of elements that we can fit in a vectype (nunits), we have to generate
3361 more than one vector stmt - i.e - we need to "unroll" the
3362 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3363 from one copy of the vector stmt to the next, in the field
3364 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3365 stages to find the correct vector defs to be used when vectorizing
3366 stmts that use the defs of the current stmt. The example below
3367 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3368 we need to create 4 vectorized stmts):
3370 before vectorization:
3371 RELATED_STMT VEC_STMT
3375 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3377 RELATED_STMT VEC_STMT
3378 VS1_0: vx0 = memref0 VS1_1 -
3379 VS1_1: vx1 = memref1 VS1_2 -
3380 VS1_2: vx2 = memref2 VS1_3 -
3381 VS1_3: vx3 = memref3 - -
3382 S1: x = load - VS1_0
3385 step2: vectorize stmt S2 (done here):
3386 To vectorize stmt S2 we first need to find the relevant vector
3387 def for the first operand 'x'. This is, as usual, obtained from
3388 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3389 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3390 relevant vector def 'vx0'. Having found 'vx0' we can generate
3391 the vector stmt VS2_0, and as usual, record it in the
3392 STMT_VINFO_VEC_STMT of stmt S2.
3393 When creating the second copy (VS2_1), we obtain the relevant vector
3394 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3395 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3396 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3397 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3398 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3399 chain of stmts and pointers:
3400 RELATED_STMT VEC_STMT
3401 VS1_0: vx0 = memref0 VS1_1 -
3402 VS1_1: vx1 = memref1 VS1_2 -
3403 VS1_2: vx2 = memref2 VS1_3 -
3404 VS1_3: vx3 = memref3 - -
3405 S1: x = load - VS1_0
3406 VS2_0: vz0 = vx0 + v1 VS2_1 -
3407 VS2_1: vz1 = vx1 + v1 VS2_2 -
3408 VS2_2: vz2 = vx2 + v1 VS2_3 -
3409 VS2_3: vz3 = vx3 + v1 - -
3410 S2: z = x + 1 - VS2_0 */
3412 prev_stmt_info = NULL;
3413 for (j = 0; j < ncopies; j++)
3418 if (op_type == binary_op || op_type == ternary_op)
3419 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3422 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3424 if (op_type == ternary_op)
3426 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3427 VEC_quick_push (tree, vec_oprnds2,
3428 vect_get_vec_def_for_operand (op2, stmt, NULL));
3433 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3434 if (op_type == ternary_op)
3436 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3437 VEC_quick_push (tree, vec_oprnds2,
3438 vect_get_vec_def_for_stmt_copy (dt[2],
3443 /* Arguments are ready. Create the new vector stmt. */
3444 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3446 vop1 = ((op_type == binary_op || op_type == ternary_op)
3447 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3448 vop2 = ((op_type == ternary_op)
3449 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3450 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3452 new_temp = make_ssa_name (vec_dest, new_stmt);
3453 gimple_assign_set_lhs (new_stmt, new_temp);
3454 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3456 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3463 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3465 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3466 prev_stmt_info = vinfo_for_stmt (new_stmt);
3469 VEC_free (tree, heap, vec_oprnds0);
3471 VEC_free (tree, heap, vec_oprnds1);
3473 VEC_free (tree, heap, vec_oprnds2);
3479 /* Function vectorizable_store.
3481 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3483 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3484 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3485 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3488 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3494 tree vec_oprnd = NULL_TREE;
3495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3496 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3497 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3499 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3500 struct loop *loop = NULL;
3501 enum machine_mode vec_mode;
3503 enum dr_alignment_support alignment_support_scheme;
3506 enum vect_def_type dt;
3507 stmt_vec_info prev_stmt_info = NULL;
3508 tree dataref_ptr = NULL_TREE;
3509 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3512 gimple next_stmt, first_stmt = NULL;
3513 bool strided_store = false;
3514 bool store_lanes_p = false;
3515 unsigned int group_size, i;
3516 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3518 VEC(tree,heap) *vec_oprnds = NULL;
3519 bool slp = (slp_node != NULL);
3520 unsigned int vec_num;
3521 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3525 loop = LOOP_VINFO_LOOP (loop_vinfo);
3527 /* Multiple types in SLP are handled by creating the appropriate number of
3528 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3530 if (slp || PURE_SLP_STMT (stmt_info))
3533 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3535 gcc_assert (ncopies >= 1);
3537 /* FORNOW. This restriction should be relaxed. */
3538 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3540 if (vect_print_dump_info (REPORT_DETAILS))
3541 fprintf (vect_dump, "multiple types in nested loop.");
3545 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3548 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3551 /* Is vectorizable store? */
3553 if (!is_gimple_assign (stmt))
3556 scalar_dest = gimple_assign_lhs (stmt);
3557 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3558 && is_pattern_stmt_p (stmt_info))
3559 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3560 if (TREE_CODE (scalar_dest) != ARRAY_REF
3561 && TREE_CODE (scalar_dest) != INDIRECT_REF
3562 && TREE_CODE (scalar_dest) != COMPONENT_REF
3563 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3564 && TREE_CODE (scalar_dest) != REALPART_EXPR
3565 && TREE_CODE (scalar_dest) != MEM_REF)
3568 gcc_assert (gimple_assign_single_p (stmt));
3569 op = gimple_assign_rhs1 (stmt);
3570 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3572 if (vect_print_dump_info (REPORT_DETAILS))
3573 fprintf (vect_dump, "use not simple.");
3577 elem_type = TREE_TYPE (vectype);
3578 vec_mode = TYPE_MODE (vectype);
3580 /* FORNOW. In some cases can vectorize even if data-type not supported
3581 (e.g. - array initialization with 0). */
3582 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3585 if (!STMT_VINFO_DATA_REF (stmt_info))
3588 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3590 if (vect_print_dump_info (REPORT_DETAILS))
3591 fprintf (vect_dump, "negative step for store.");
3595 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3597 strided_store = true;
3598 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3599 if (!slp && !PURE_SLP_STMT (stmt_info))
3601 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3602 if (vect_store_lanes_supported (vectype, group_size))
3603 store_lanes_p = true;
3604 else if (!vect_strided_store_supported (vectype, group_size))
3608 if (first_stmt == stmt)
3610 /* STMT is the leader of the group. Check the operands of all the
3611 stmts of the group. */
3612 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3615 gcc_assert (gimple_assign_single_p (next_stmt));
3616 op = gimple_assign_rhs1 (next_stmt);
3617 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3620 if (vect_print_dump_info (REPORT_DETAILS))
3621 fprintf (vect_dump, "use not simple.");
3624 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3629 if (!vec_stmt) /* transformation not required. */
3631 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3632 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3640 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3641 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3643 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3646 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3648 /* We vectorize all the stmts of the interleaving group when we
3649 reach the last stmt in the group. */
3650 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3651 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3660 strided_store = false;
3661 /* VEC_NUM is the number of vect stmts to be created for this
3663 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3664 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3665 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3666 op = gimple_assign_rhs1 (first_stmt);
3669 /* VEC_NUM is the number of vect stmts to be created for this
3671 vec_num = group_size;
3677 group_size = vec_num = 1;
3680 if (vect_print_dump_info (REPORT_DETAILS))
3681 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3683 dr_chain = VEC_alloc (tree, heap, group_size);
3684 oprnds = VEC_alloc (tree, heap, group_size);
3686 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3687 gcc_assert (alignment_support_scheme);
3688 /* Targets with store-lane instructions must not require explicit
3690 gcc_assert (!store_lanes_p
3691 || alignment_support_scheme == dr_aligned
3692 || alignment_support_scheme == dr_unaligned_supported);
3695 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3697 aggr_type = vectype;
3699 /* In case the vectorization factor (VF) is bigger than the number
3700 of elements that we can fit in a vectype (nunits), we have to generate
3701 more than one vector stmt - i.e - we need to "unroll" the
3702 vector stmt by a factor VF/nunits. For more details see documentation in
3703 vect_get_vec_def_for_copy_stmt. */
3705 /* In case of interleaving (non-unit strided access):
3712 We create vectorized stores starting from base address (the access of the
3713 first stmt in the chain (S2 in the above example), when the last store stmt
3714 of the chain (S4) is reached:
3717 VS2: &base + vec_size*1 = vx0
3718 VS3: &base + vec_size*2 = vx1
3719 VS4: &base + vec_size*3 = vx3
3721 Then permutation statements are generated:
3723 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3724 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3727 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3728 (the order of the data-refs in the output of vect_permute_store_chain
3729 corresponds to the order of scalar stmts in the interleaving chain - see
3730 the documentation of vect_permute_store_chain()).
3732 In case of both multiple types and interleaving, above vector stores and
3733 permutation stmts are created for every copy. The result vector stmts are
3734 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3735 STMT_VINFO_RELATED_STMT for the next copies.
3738 prev_stmt_info = NULL;
3739 for (j = 0; j < ncopies; j++)
3748 /* Get vectorized arguments for SLP_NODE. */
3749 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3750 NULL, slp_node, -1);
3752 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3756 /* For interleaved stores we collect vectorized defs for all the
3757 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3758 used as an input to vect_permute_store_chain(), and OPRNDS as
3759 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3761 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3762 OPRNDS are of size 1. */
3763 next_stmt = first_stmt;
3764 for (i = 0; i < group_size; i++)
3766 /* Since gaps are not supported for interleaved stores,
3767 GROUP_SIZE is the exact number of stmts in the chain.
3768 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3769 there is no interleaving, GROUP_SIZE is 1, and only one
3770 iteration of the loop will be executed. */
3771 gcc_assert (next_stmt
3772 && gimple_assign_single_p (next_stmt));
3773 op = gimple_assign_rhs1 (next_stmt);
3775 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3777 VEC_quick_push(tree, dr_chain, vec_oprnd);
3778 VEC_quick_push(tree, oprnds, vec_oprnd);
3779 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3783 /* We should have catched mismatched types earlier. */
3784 gcc_assert (useless_type_conversion_p (vectype,
3785 TREE_TYPE (vec_oprnd)));
3786 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3787 NULL_TREE, &dummy, gsi,
3788 &ptr_incr, false, &inv_p);
3789 gcc_assert (bb_vinfo || !inv_p);
3793 /* For interleaved stores we created vectorized defs for all the
3794 defs stored in OPRNDS in the previous iteration (previous copy).
3795 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3796 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3798 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3799 OPRNDS are of size 1. */
3800 for (i = 0; i < group_size; i++)
3802 op = VEC_index (tree, oprnds, i);
3803 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3805 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3806 VEC_replace(tree, dr_chain, i, vec_oprnd);
3807 VEC_replace(tree, oprnds, i, vec_oprnd);
3809 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3810 TYPE_SIZE_UNIT (aggr_type));
3817 /* Combine all the vectors into an array. */
3818 vec_array = create_vector_array (vectype, vec_num);
3819 for (i = 0; i < vec_num; i++)
3821 vec_oprnd = VEC_index (tree, dr_chain, i);
3822 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3826 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3827 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3828 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3829 gimple_call_set_lhs (new_stmt, data_ref);
3830 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3831 mark_symbols_for_renaming (new_stmt);
3838 result_chain = VEC_alloc (tree, heap, group_size);
3840 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3844 next_stmt = first_stmt;
3845 for (i = 0; i < vec_num; i++)
3847 struct ptr_info_def *pi;
3850 /* Bump the vector pointer. */
3851 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3855 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3856 else if (strided_store)
3857 /* For strided stores vectorized defs are interleaved in
3858 vect_permute_store_chain(). */
3859 vec_oprnd = VEC_index (tree, result_chain, i);
3861 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3862 build_int_cst (reference_alias_ptr_type
3863 (DR_REF (first_dr)), 0));
3864 pi = get_ptr_info (dataref_ptr);
3865 pi->align = TYPE_ALIGN_UNIT (vectype);
3866 if (aligned_access_p (first_dr))
3868 else if (DR_MISALIGNMENT (first_dr) == -1)
3870 TREE_TYPE (data_ref)
3871 = build_aligned_type (TREE_TYPE (data_ref),
3872 TYPE_ALIGN (elem_type));
3873 pi->align = TYPE_ALIGN_UNIT (elem_type);
3878 TREE_TYPE (data_ref)
3879 = build_aligned_type (TREE_TYPE (data_ref),
3880 TYPE_ALIGN (elem_type));
3881 pi->misalign = DR_MISALIGNMENT (first_dr);
3884 /* Arguments are ready. Create the new vector stmt. */
3885 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3886 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3887 mark_symbols_for_renaming (new_stmt);
3892 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3900 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3902 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3903 prev_stmt_info = vinfo_for_stmt (new_stmt);
3907 VEC_free (tree, heap, dr_chain);
3908 VEC_free (tree, heap, oprnds);
3910 VEC_free (tree, heap, result_chain);
3912 VEC_free (tree, heap, vec_oprnds);
3917 /* Given a vector type VECTYPE returns a builtin DECL to be used
3918 for vector permutation and returns the mask that implements
3919 reversal of the vector elements. If that is impossible to do,
3923 perm_mask_for_reverse (tree vectype)
3925 tree mask_elt_type, mask_type, mask_vec;
3929 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3930 sel = XALLOCAVEC (unsigned char, nunits);
3932 for (i = 0; i < nunits; ++i)
3933 sel[i] = nunits - 1 - i;
3935 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
3939 = lang_hooks.types.type_for_size
3940 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
3941 mask_type = get_vectype_for_scalar_type (mask_elt_type);
3944 for (i = 0; i < nunits; i++)
3945 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec);
3946 mask_vec = build_vector (mask_type, mask_vec);
3951 /* Given a vector variable X, that was generated for the scalar LHS of
3952 STMT, generate instructions to reverse the vector elements of X,
3953 insert them a *GSI and return the permuted vector variable. */
3956 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
3958 tree vectype = TREE_TYPE (x);
3959 tree mask_vec, perm_dest, data_ref;
3962 mask_vec = perm_mask_for_reverse (vectype);
3964 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3966 /* Generate the permute statement. */
3967 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
3969 data_ref = make_ssa_name (perm_dest, perm_stmt);
3970 gimple_set_lhs (perm_stmt, data_ref);
3971 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3976 /* vectorizable_load.
3978 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3980 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3981 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3982 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3985 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3986 slp_tree slp_node, slp_instance slp_node_instance)
3989 tree vec_dest = NULL;
3990 tree data_ref = NULL;
3991 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3992 stmt_vec_info prev_stmt_info;
3993 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3994 struct loop *loop = NULL;
3995 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3996 bool nested_in_vect_loop = false;
3997 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3998 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4001 enum machine_mode mode;
4002 gimple new_stmt = NULL;
4004 enum dr_alignment_support alignment_support_scheme;
4005 tree dataref_ptr = NULL_TREE;
4007 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4009 int i, j, group_size;
4010 tree msq = NULL_TREE, lsq;
4011 tree offset = NULL_TREE;
4012 tree realignment_token = NULL_TREE;
4014 VEC(tree,heap) *dr_chain = NULL;
4015 bool strided_load = false;
4016 bool load_lanes_p = false;
4020 bool compute_in_loop = false;
4021 struct loop *at_loop;
4023 bool slp = (slp_node != NULL);
4024 bool slp_perm = false;
4025 enum tree_code code;
4026 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4032 loop = LOOP_VINFO_LOOP (loop_vinfo);
4033 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4034 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4039 /* Multiple types in SLP are handled by creating the appropriate number of
4040 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4042 if (slp || PURE_SLP_STMT (stmt_info))
4045 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4047 gcc_assert (ncopies >= 1);
4049 /* FORNOW. This restriction should be relaxed. */
4050 if (nested_in_vect_loop && ncopies > 1)
4052 if (vect_print_dump_info (REPORT_DETAILS))
4053 fprintf (vect_dump, "multiple types in nested loop.");
4057 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4060 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4063 /* Is vectorizable load? */
4064 if (!is_gimple_assign (stmt))
4067 scalar_dest = gimple_assign_lhs (stmt);
4068 if (TREE_CODE (scalar_dest) != SSA_NAME)
4071 code = gimple_assign_rhs_code (stmt);
4072 if (code != ARRAY_REF
4073 && code != INDIRECT_REF
4074 && code != COMPONENT_REF
4075 && code != IMAGPART_EXPR
4076 && code != REALPART_EXPR
4078 && TREE_CODE_CLASS (code) != tcc_declaration)
4081 if (!STMT_VINFO_DATA_REF (stmt_info))
4084 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4085 if (negative && ncopies > 1)
4087 if (vect_print_dump_info (REPORT_DETAILS))
4088 fprintf (vect_dump, "multiple types with negative step.");
4092 elem_type = TREE_TYPE (vectype);
4093 mode = TYPE_MODE (vectype);
4095 /* FORNOW. In some cases can vectorize even if data-type not supported
4096 (e.g. - data copies). */
4097 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4099 if (vect_print_dump_info (REPORT_DETAILS))
4100 fprintf (vect_dump, "Aligned load, but unsupported type.");
4104 /* Check if the load is a part of an interleaving chain. */
4105 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4107 strided_load = true;
4109 gcc_assert (! nested_in_vect_loop);
4111 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4112 if (!slp && !PURE_SLP_STMT (stmt_info))
4114 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4115 if (vect_load_lanes_supported (vectype, group_size))
4116 load_lanes_p = true;
4117 else if (!vect_strided_load_supported (vectype, group_size))
4124 gcc_assert (!strided_load);
4125 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4126 if (alignment_support_scheme != dr_aligned
4127 && alignment_support_scheme != dr_unaligned_supported)
4129 if (vect_print_dump_info (REPORT_DETAILS))
4130 fprintf (vect_dump, "negative step but alignment required.");
4133 if (!perm_mask_for_reverse (vectype))
4135 if (vect_print_dump_info (REPORT_DETAILS))
4136 fprintf (vect_dump, "negative step and reversing not supported.");
4141 if (!vec_stmt) /* transformation not required. */
4143 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4144 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4148 if (vect_print_dump_info (REPORT_DETAILS))
4149 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4155 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4157 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4158 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4159 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4161 /* Check if the chain of loads is already vectorized. */
4162 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4164 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4167 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4168 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4170 /* VEC_NUM is the number of vect stmts to be created for this group. */
4173 strided_load = false;
4174 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4175 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4179 vec_num = group_size;
4185 group_size = vec_num = 1;
4188 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4189 gcc_assert (alignment_support_scheme);
4190 /* Targets with load-lane instructions must not require explicit
4192 gcc_assert (!load_lanes_p
4193 || alignment_support_scheme == dr_aligned
4194 || alignment_support_scheme == dr_unaligned_supported);
4196 /* In case the vectorization factor (VF) is bigger than the number
4197 of elements that we can fit in a vectype (nunits), we have to generate
4198 more than one vector stmt - i.e - we need to "unroll" the
4199 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4200 from one copy of the vector stmt to the next, in the field
4201 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4202 stages to find the correct vector defs to be used when vectorizing
4203 stmts that use the defs of the current stmt. The example below
4204 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4205 need to create 4 vectorized stmts):
4207 before vectorization:
4208 RELATED_STMT VEC_STMT
4212 step 1: vectorize stmt S1:
4213 We first create the vector stmt VS1_0, and, as usual, record a
4214 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4215 Next, we create the vector stmt VS1_1, and record a pointer to
4216 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4217 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4219 RELATED_STMT VEC_STMT
4220 VS1_0: vx0 = memref0 VS1_1 -
4221 VS1_1: vx1 = memref1 VS1_2 -
4222 VS1_2: vx2 = memref2 VS1_3 -
4223 VS1_3: vx3 = memref3 - -
4224 S1: x = load - VS1_0
4227 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4228 information we recorded in RELATED_STMT field is used to vectorize
4231 /* In case of interleaving (non-unit strided access):
4238 Vectorized loads are created in the order of memory accesses
4239 starting from the access of the first stmt of the chain:
4242 VS2: vx1 = &base + vec_size*1
4243 VS3: vx3 = &base + vec_size*2
4244 VS4: vx4 = &base + vec_size*3
4246 Then permutation statements are generated:
4248 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4249 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4252 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4253 (the order of the data-refs in the output of vect_permute_load_chain
4254 corresponds to the order of scalar stmts in the interleaving chain - see
4255 the documentation of vect_permute_load_chain()).
4256 The generation of permutation stmts and recording them in
4257 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4259 In case of both multiple types and interleaving, the vector loads and
4260 permutation stmts above are created for every copy. The result vector
4261 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4262 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4264 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4265 on a target that supports unaligned accesses (dr_unaligned_supported)
4266 we generate the following code:
4270 p = p + indx * vectype_size;
4275 Otherwise, the data reference is potentially unaligned on a target that
4276 does not support unaligned accesses (dr_explicit_realign_optimized) -
4277 then generate the following code, in which the data in each iteration is
4278 obtained by two vector loads, one from the previous iteration, and one
4279 from the current iteration:
4281 msq_init = *(floor(p1))
4282 p2 = initial_addr + VS - 1;
4283 realignment_token = call target_builtin;
4286 p2 = p2 + indx * vectype_size
4288 vec_dest = realign_load (msq, lsq, realignment_token)
4293 /* If the misalignment remains the same throughout the execution of the
4294 loop, we can create the init_addr and permutation mask at the loop
4295 preheader. Otherwise, it needs to be created inside the loop.
4296 This can only occur when vectorizing memory accesses in the inner-loop
4297 nested within an outer-loop that is being vectorized. */
4299 if (loop && nested_in_vect_loop_p (loop, stmt)
4300 && (TREE_INT_CST_LOW (DR_STEP (dr))
4301 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4303 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4304 compute_in_loop = true;
4307 if ((alignment_support_scheme == dr_explicit_realign_optimized
4308 || alignment_support_scheme == dr_explicit_realign)
4309 && !compute_in_loop)
4311 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4312 alignment_support_scheme, NULL_TREE,
4314 if (alignment_support_scheme == dr_explicit_realign_optimized)
4316 phi = SSA_NAME_DEF_STMT (msq);
4317 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4324 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4327 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4329 aggr_type = vectype;
4331 prev_stmt_info = NULL;
4332 for (j = 0; j < ncopies; j++)
4334 /* 1. Create the vector or array pointer update chain. */
4336 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4337 offset, &dummy, gsi,
4338 &ptr_incr, false, &inv_p);
4340 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4341 TYPE_SIZE_UNIT (aggr_type));
4343 if (strided_load || slp_perm)
4344 dr_chain = VEC_alloc (tree, heap, vec_num);
4350 vec_array = create_vector_array (vectype, vec_num);
4353 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4354 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4355 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4356 gimple_call_set_lhs (new_stmt, vec_array);
4357 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4358 mark_symbols_for_renaming (new_stmt);
4360 /* Extract each vector into an SSA_NAME. */
4361 for (i = 0; i < vec_num; i++)
4363 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4365 VEC_quick_push (tree, dr_chain, new_temp);
4368 /* Record the mapping between SSA_NAMEs and statements. */
4369 vect_record_strided_load_vectors (stmt, dr_chain);
4373 for (i = 0; i < vec_num; i++)
4376 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4379 /* 2. Create the vector-load in the loop. */
4380 switch (alignment_support_scheme)
4383 case dr_unaligned_supported:
4385 struct ptr_info_def *pi;
4387 = build2 (MEM_REF, vectype, dataref_ptr,
4388 build_int_cst (reference_alias_ptr_type
4389 (DR_REF (first_dr)), 0));
4390 pi = get_ptr_info (dataref_ptr);
4391 pi->align = TYPE_ALIGN_UNIT (vectype);
4392 if (alignment_support_scheme == dr_aligned)
4394 gcc_assert (aligned_access_p (first_dr));
4397 else if (DR_MISALIGNMENT (first_dr) == -1)
4399 TREE_TYPE (data_ref)
4400 = build_aligned_type (TREE_TYPE (data_ref),
4401 TYPE_ALIGN (elem_type));
4402 pi->align = TYPE_ALIGN_UNIT (elem_type);
4407 TREE_TYPE (data_ref)
4408 = build_aligned_type (TREE_TYPE (data_ref),
4409 TYPE_ALIGN (elem_type));
4410 pi->misalign = DR_MISALIGNMENT (first_dr);
4414 case dr_explicit_realign:
4419 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4421 if (compute_in_loop)
4422 msq = vect_setup_realignment (first_stmt, gsi,
4424 dr_explicit_realign,
4427 new_stmt = gimple_build_assign_with_ops
4428 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4430 (TREE_TYPE (dataref_ptr),
4431 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4432 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4433 gimple_assign_set_lhs (new_stmt, ptr);
4434 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4436 = build2 (MEM_REF, vectype, ptr,
4437 build_int_cst (reference_alias_ptr_type
4438 (DR_REF (first_dr)), 0));
4439 vec_dest = vect_create_destination_var (scalar_dest,
4441 new_stmt = gimple_build_assign (vec_dest, data_ref);
4442 new_temp = make_ssa_name (vec_dest, new_stmt);
4443 gimple_assign_set_lhs (new_stmt, new_temp);
4444 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4445 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4449 bump = size_binop (MULT_EXPR, vs_minus_1,
4450 TYPE_SIZE_UNIT (elem_type));
4451 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4452 new_stmt = gimple_build_assign_with_ops
4453 (BIT_AND_EXPR, NULL_TREE, ptr,
4456 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4457 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4458 gimple_assign_set_lhs (new_stmt, ptr);
4459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4461 = build2 (MEM_REF, vectype, ptr,
4462 build_int_cst (reference_alias_ptr_type
4463 (DR_REF (first_dr)), 0));
4466 case dr_explicit_realign_optimized:
4467 new_stmt = gimple_build_assign_with_ops
4468 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4470 (TREE_TYPE (dataref_ptr),
4471 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4472 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4474 gimple_assign_set_lhs (new_stmt, new_temp);
4475 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4477 = build2 (MEM_REF, vectype, new_temp,
4478 build_int_cst (reference_alias_ptr_type
4479 (DR_REF (first_dr)), 0));
4484 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4485 new_stmt = gimple_build_assign (vec_dest, data_ref);
4486 new_temp = make_ssa_name (vec_dest, new_stmt);
4487 gimple_assign_set_lhs (new_stmt, new_temp);
4488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4489 mark_symbols_for_renaming (new_stmt);
4491 /* 3. Handle explicit realignment if necessary/supported.
4493 vec_dest = realign_load (msq, lsq, realignment_token) */
4494 if (alignment_support_scheme == dr_explicit_realign_optimized
4495 || alignment_support_scheme == dr_explicit_realign)
4497 lsq = gimple_assign_lhs (new_stmt);
4498 if (!realignment_token)
4499 realignment_token = dataref_ptr;
4500 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4502 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4505 new_temp = make_ssa_name (vec_dest, new_stmt);
4506 gimple_assign_set_lhs (new_stmt, new_temp);
4507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4509 if (alignment_support_scheme == dr_explicit_realign_optimized)
4512 if (i == vec_num - 1 && j == ncopies - 1)
4513 add_phi_arg (phi, lsq,
4514 loop_latch_edge (containing_loop),
4520 /* 4. Handle invariant-load. */
4521 if (inv_p && !bb_vinfo)
4524 gimple_stmt_iterator gsi2 = *gsi;
4525 gcc_assert (!strided_load);
4528 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4531 tem = fold_convert (TREE_TYPE (vectype), tem);
4532 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4536 vec_inv = build_vector_from_val (vectype, tem);
4537 new_temp = vect_init_vector (stmt, vec_inv,
4539 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4544 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4545 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4548 /* Collect vector loads and later create their permutation in
4549 vect_transform_strided_load (). */
4550 if (strided_load || slp_perm)
4551 VEC_quick_push (tree, dr_chain, new_temp);
4553 /* Store vector loads in the corresponding SLP_NODE. */
4554 if (slp && !slp_perm)
4555 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4560 if (slp && !slp_perm)
4565 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4566 slp_node_instance, false))
4568 VEC_free (tree, heap, dr_chain);
4577 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4578 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4583 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4585 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4586 prev_stmt_info = vinfo_for_stmt (new_stmt);
4590 VEC_free (tree, heap, dr_chain);
4596 /* Function vect_is_simple_cond.
4599 LOOP - the loop that is being vectorized.
4600 COND - Condition that is checked for simple use.
4603 *COMP_VECTYPE - the vector type for the comparison.
4605 Returns whether a COND can be vectorized. Checks whether
4606 condition operands are supportable using vec_is_simple_use. */
4609 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4613 enum vect_def_type dt;
4614 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4616 if (!COMPARISON_CLASS_P (cond))
4619 lhs = TREE_OPERAND (cond, 0);
4620 rhs = TREE_OPERAND (cond, 1);
4622 if (TREE_CODE (lhs) == SSA_NAME)
4624 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4625 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4629 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4630 && TREE_CODE (lhs) != FIXED_CST)
4633 if (TREE_CODE (rhs) == SSA_NAME)
4635 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4636 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4640 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4641 && TREE_CODE (rhs) != FIXED_CST)
4644 *comp_vectype = vectype1 ? vectype1 : vectype2;
4648 /* vectorizable_condition.
4650 Check if STMT is conditional modify expression that can be vectorized.
4651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4652 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4655 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4656 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4657 else caluse if it is 2).
4659 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4662 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4663 gimple *vec_stmt, tree reduc_def, int reduc_index)
4665 tree scalar_dest = NULL_TREE;
4666 tree vec_dest = NULL_TREE;
4667 tree cond_expr, then_clause, else_clause;
4668 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4669 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4671 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4672 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4673 tree vec_compare, vec_cond_expr;
4675 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4677 enum vect_def_type dt, dts[4];
4678 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4679 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4680 enum tree_code code;
4681 stmt_vec_info prev_stmt_info = NULL;
4684 /* FORNOW: unsupported in basic block SLP. */
4685 gcc_assert (loop_vinfo);
4687 /* FORNOW: SLP not supported. */
4688 if (STMT_SLP_TYPE (stmt_info))
4691 gcc_assert (ncopies >= 1);
4692 if (reduc_index && ncopies > 1)
4693 return false; /* FORNOW */
4695 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4698 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4699 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4703 /* FORNOW: not yet supported. */
4704 if (STMT_VINFO_LIVE_P (stmt_info))
4706 if (vect_print_dump_info (REPORT_DETAILS))
4707 fprintf (vect_dump, "value used after loop.");
4711 /* Is vectorizable conditional operation? */
4712 if (!is_gimple_assign (stmt))
4715 code = gimple_assign_rhs_code (stmt);
4717 if (code != COND_EXPR)
4720 cond_expr = gimple_assign_rhs1 (stmt);
4721 then_clause = gimple_assign_rhs2 (stmt);
4722 else_clause = gimple_assign_rhs3 (stmt);
4724 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4728 if (TREE_CODE (then_clause) == SSA_NAME)
4730 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4731 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4732 &then_def_stmt, &def, &dt))
4735 else if (TREE_CODE (then_clause) != INTEGER_CST
4736 && TREE_CODE (then_clause) != REAL_CST
4737 && TREE_CODE (then_clause) != FIXED_CST)
4740 if (TREE_CODE (else_clause) == SSA_NAME)
4742 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4743 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4744 &else_def_stmt, &def, &dt))
4747 else if (TREE_CODE (else_clause) != INTEGER_CST
4748 && TREE_CODE (else_clause) != REAL_CST
4749 && TREE_CODE (else_clause) != FIXED_CST)
4754 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4755 return expand_vec_cond_expr_p (vectype, comp_vectype);
4761 scalar_dest = gimple_assign_lhs (stmt);
4762 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4764 /* Handle cond expr. */
4765 for (j = 0; j < ncopies; j++)
4772 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4774 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4775 NULL, >emp, &def, &dts[0]);
4777 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4779 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4780 NULL, >emp, &def, &dts[1]);
4781 if (reduc_index == 1)
4782 vec_then_clause = reduc_def;
4785 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4787 vect_is_simple_use (then_clause, loop_vinfo,
4788 NULL, >emp, &def, &dts[2]);
4790 if (reduc_index == 2)
4791 vec_else_clause = reduc_def;
4794 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4796 vect_is_simple_use (else_clause, loop_vinfo,
4797 NULL, >emp, &def, &dts[3]);
4802 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4803 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4804 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4806 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4810 /* Arguments are ready. Create the new vector stmt. */
4811 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4812 vec_cond_lhs, vec_cond_rhs);
4813 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4814 vec_compare, vec_then_clause, vec_else_clause);
4816 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4817 new_temp = make_ssa_name (vec_dest, new_stmt);
4818 gimple_assign_set_lhs (new_stmt, new_temp);
4819 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4821 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4823 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4825 prev_stmt_info = vinfo_for_stmt (new_stmt);
4832 /* Make sure the statement is vectorizable. */
4835 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4837 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4838 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4839 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4841 tree scalar_type, vectype;
4842 gimple pattern_stmt, pattern_def_stmt;
4844 if (vect_print_dump_info (REPORT_DETAILS))
4846 fprintf (vect_dump, "==> examining statement: ");
4847 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4850 if (gimple_has_volatile_ops (stmt))
4852 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4853 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4858 /* Skip stmts that do not need to be vectorized. In loops this is expected
4860 - the COND_EXPR which is the loop exit condition
4861 - any LABEL_EXPRs in the loop
4862 - computations that are used only for array indexing or loop control.
4863 In basic blocks we only analyze statements that are a part of some SLP
4864 instance, therefore, all the statements are relevant.
4866 Pattern statement needs to be analyzed instead of the original statement
4867 if the original statement is not relevant. Otherwise, we analyze both
4870 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4871 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4872 && !STMT_VINFO_LIVE_P (stmt_info))
4874 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4876 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4877 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4879 /* Analyze PATTERN_STMT instead of the original stmt. */
4880 stmt = pattern_stmt;
4881 stmt_info = vinfo_for_stmt (pattern_stmt);
4882 if (vect_print_dump_info (REPORT_DETAILS))
4884 fprintf (vect_dump, "==> examining pattern statement: ");
4885 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4890 if (vect_print_dump_info (REPORT_DETAILS))
4891 fprintf (vect_dump, "irrelevant.");
4896 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4898 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4899 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4901 /* Analyze PATTERN_STMT too. */
4902 if (vect_print_dump_info (REPORT_DETAILS))
4904 fprintf (vect_dump, "==> examining pattern statement: ");
4905 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4908 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
4912 if (is_pattern_stmt_p (stmt_info)
4913 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
4914 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
4915 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
4917 /* Analyze def stmt of STMT if it's a pattern stmt. */
4918 if (vect_print_dump_info (REPORT_DETAILS))
4920 fprintf (vect_dump, "==> examining pattern def statement: ");
4921 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
4924 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
4929 switch (STMT_VINFO_DEF_TYPE (stmt_info))
4931 case vect_internal_def:
4934 case vect_reduction_def:
4935 case vect_nested_cycle:
4936 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
4937 || relevance == vect_used_in_outer_by_reduction
4938 || relevance == vect_unused_in_scope));
4941 case vect_induction_def:
4942 case vect_constant_def:
4943 case vect_external_def:
4944 case vect_unknown_def_type:
4951 gcc_assert (PURE_SLP_STMT (stmt_info));
4953 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
4954 if (vect_print_dump_info (REPORT_DETAILS))
4956 fprintf (vect_dump, "get vectype for scalar type: ");
4957 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4960 vectype = get_vectype_for_scalar_type (scalar_type);
4963 if (vect_print_dump_info (REPORT_DETAILS))
4965 fprintf (vect_dump, "not SLPed: unsupported data-type ");
4966 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4971 if (vect_print_dump_info (REPORT_DETAILS))
4973 fprintf (vect_dump, "vectype: ");
4974 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4977 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4980 if (STMT_VINFO_RELEVANT_P (stmt_info))
4982 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4983 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4984 *need_to_vectorize = true;
4989 && (STMT_VINFO_RELEVANT_P (stmt_info)
4990 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4991 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
4992 || vectorizable_shift (stmt, NULL, NULL, NULL)
4993 || vectorizable_operation (stmt, NULL, NULL, NULL)
4994 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4995 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4996 || vectorizable_call (stmt, NULL, NULL)
4997 || vectorizable_store (stmt, NULL, NULL, NULL)
4998 || vectorizable_reduction (stmt, NULL, NULL, NULL)
4999 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5003 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5004 || vectorizable_shift (stmt, NULL, NULL, node)
5005 || vectorizable_operation (stmt, NULL, NULL, node)
5006 || vectorizable_assignment (stmt, NULL, NULL, node)
5007 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5008 || vectorizable_store (stmt, NULL, NULL, node));
5013 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5015 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5016 fprintf (vect_dump, "supported: ");
5017 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5026 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5027 need extra handling, except for vectorizable reductions. */
5028 if (STMT_VINFO_LIVE_P (stmt_info)
5029 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5030 ok = vectorizable_live_operation (stmt, NULL, NULL);
5034 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5036 fprintf (vect_dump, "not vectorized: live stmt not ");
5037 fprintf (vect_dump, "supported: ");
5038 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5048 /* Function vect_transform_stmt.
5050 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5053 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5054 bool *strided_store, slp_tree slp_node,
5055 slp_instance slp_node_instance)
5057 bool is_store = false;
5058 gimple vec_stmt = NULL;
5059 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5062 switch (STMT_VINFO_TYPE (stmt_info))
5064 case type_demotion_vec_info_type:
5065 case type_promotion_vec_info_type:
5066 case type_conversion_vec_info_type:
5067 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5071 case induc_vec_info_type:
5072 gcc_assert (!slp_node);
5073 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5077 case shift_vec_info_type:
5078 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5082 case op_vec_info_type:
5083 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5087 case assignment_vec_info_type:
5088 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5092 case load_vec_info_type:
5093 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5098 case store_vec_info_type:
5099 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5101 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5103 /* In case of interleaving, the whole chain is vectorized when the
5104 last store in the chain is reached. Store stmts before the last
5105 one are skipped, and there vec_stmt_info shouldn't be freed
5107 *strided_store = true;
5108 if (STMT_VINFO_VEC_STMT (stmt_info))
5115 case condition_vec_info_type:
5116 gcc_assert (!slp_node);
5117 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5121 case call_vec_info_type:
5122 gcc_assert (!slp_node);
5123 done = vectorizable_call (stmt, gsi, &vec_stmt);
5124 stmt = gsi_stmt (*gsi);
5127 case reduc_vec_info_type:
5128 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5133 if (!STMT_VINFO_LIVE_P (stmt_info))
5135 if (vect_print_dump_info (REPORT_DETAILS))
5136 fprintf (vect_dump, "stmt not supported.");
5141 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5142 is being vectorized, but outside the immediately enclosing loop. */
5144 && STMT_VINFO_LOOP_VINFO (stmt_info)
5145 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5146 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5147 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5148 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5149 || STMT_VINFO_RELEVANT (stmt_info) ==
5150 vect_used_in_outer_by_reduction))
5152 struct loop *innerloop = LOOP_VINFO_LOOP (
5153 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5154 imm_use_iterator imm_iter;
5155 use_operand_p use_p;
5159 if (vect_print_dump_info (REPORT_DETAILS))
5160 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5162 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5163 (to be used when vectorizing outer-loop stmts that use the DEF of
5165 if (gimple_code (stmt) == GIMPLE_PHI)
5166 scalar_dest = PHI_RESULT (stmt);
5168 scalar_dest = gimple_assign_lhs (stmt);
5170 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5172 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5174 exit_phi = USE_STMT (use_p);
5175 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5180 /* Handle stmts whose DEF is used outside the loop-nest that is
5181 being vectorized. */
5182 if (STMT_VINFO_LIVE_P (stmt_info)
5183 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5185 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5190 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5196 /* Remove a group of stores (for SLP or interleaving), free their
5200 vect_remove_stores (gimple first_stmt)
5202 gimple next = first_stmt;
5204 gimple_stmt_iterator next_si;
5208 /* Free the attached stmt_vec_info and remove the stmt. */
5209 next_si = gsi_for_stmt (next);
5210 gsi_remove (&next_si, true);
5211 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5212 free_stmt_vec_info (next);
5218 /* Function new_stmt_vec_info.
5220 Create and initialize a new stmt_vec_info struct for STMT. */
5223 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5224 bb_vec_info bb_vinfo)
5227 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5229 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5230 STMT_VINFO_STMT (res) = stmt;
5231 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5232 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5233 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5234 STMT_VINFO_LIVE_P (res) = false;
5235 STMT_VINFO_VECTYPE (res) = NULL;
5236 STMT_VINFO_VEC_STMT (res) = NULL;
5237 STMT_VINFO_VECTORIZABLE (res) = true;
5238 STMT_VINFO_IN_PATTERN_P (res) = false;
5239 STMT_VINFO_RELATED_STMT (res) = NULL;
5240 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5241 STMT_VINFO_DATA_REF (res) = NULL;
5243 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5244 STMT_VINFO_DR_OFFSET (res) = NULL;
5245 STMT_VINFO_DR_INIT (res) = NULL;
5246 STMT_VINFO_DR_STEP (res) = NULL;
5247 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5249 if (gimple_code (stmt) == GIMPLE_PHI
5250 && is_loop_header_bb_p (gimple_bb (stmt)))
5251 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5253 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5255 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5256 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5257 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5258 STMT_SLP_TYPE (res) = loop_vect;
5259 GROUP_FIRST_ELEMENT (res) = NULL;
5260 GROUP_NEXT_ELEMENT (res) = NULL;
5261 GROUP_SIZE (res) = 0;
5262 GROUP_STORE_COUNT (res) = 0;
5263 GROUP_GAP (res) = 0;
5264 GROUP_SAME_DR_STMT (res) = NULL;
5265 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5271 /* Create a hash table for stmt_vec_info. */
5274 init_stmt_vec_info_vec (void)
5276 gcc_assert (!stmt_vec_info_vec);
5277 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5281 /* Free hash table for stmt_vec_info. */
5284 free_stmt_vec_info_vec (void)
5286 gcc_assert (stmt_vec_info_vec);
5287 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5291 /* Free stmt vectorization related info. */
5294 free_stmt_vec_info (gimple stmt)
5296 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5301 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5302 set_vinfo_for_stmt (stmt, NULL);
5307 /* Function get_vectype_for_scalar_type_and_size.
5309 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5313 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5315 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5316 enum machine_mode simd_mode;
5317 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5324 /* We can't build a vector type of elements with alignment bigger than
5326 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5329 /* For vector types of elements whose mode precision doesn't
5330 match their types precision we use a element type of mode
5331 precision. The vectorization routines will have to make sure
5332 they support the proper result truncation/extension. */
5333 if (INTEGRAL_TYPE_P (scalar_type)
5334 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5335 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5336 TYPE_UNSIGNED (scalar_type));
5338 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5339 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5342 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5343 When the component mode passes the above test simply use a type
5344 corresponding to that mode. The theory is that any use that
5345 would cause problems with this will disable vectorization anyway. */
5346 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5347 && !INTEGRAL_TYPE_P (scalar_type)
5348 && !POINTER_TYPE_P (scalar_type))
5349 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5351 /* If no size was supplied use the mode the target prefers. Otherwise
5352 lookup a vector mode of the specified size. */
5354 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5356 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5357 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5361 vectype = build_vector_type (scalar_type, nunits);
5362 if (vect_print_dump_info (REPORT_DETAILS))
5364 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5365 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5371 if (vect_print_dump_info (REPORT_DETAILS))
5373 fprintf (vect_dump, "vectype: ");
5374 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5377 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5378 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5380 if (vect_print_dump_info (REPORT_DETAILS))
5381 fprintf (vect_dump, "mode not supported by target.");
5388 unsigned int current_vector_size;
5390 /* Function get_vectype_for_scalar_type.
5392 Returns the vector type corresponding to SCALAR_TYPE as supported
5396 get_vectype_for_scalar_type (tree scalar_type)
5399 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5400 current_vector_size);
5402 && current_vector_size == 0)
5403 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5407 /* Function get_same_sized_vectype
5409 Returns a vector type corresponding to SCALAR_TYPE of size
5410 VECTOR_TYPE if supported by the target. */
5413 get_same_sized_vectype (tree scalar_type, tree vector_type)
5415 return get_vectype_for_scalar_type_and_size
5416 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5419 /* Function vect_is_simple_use.
5422 LOOP_VINFO - the vect info of the loop that is being vectorized.
5423 BB_VINFO - the vect info of the basic block that is being vectorized.
5424 OPERAND - operand of a stmt in the loop or bb.
5425 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5427 Returns whether a stmt with OPERAND can be vectorized.
5428 For loops, supportable operands are constants, loop invariants, and operands
5429 that are defined by the current iteration of the loop. Unsupportable
5430 operands are those that are defined by a previous iteration of the loop (as
5431 is the case in reduction/induction computations).
5432 For basic blocks, supportable operands are constants and bb invariants.
5433 For now, operands defined outside the basic block are not supported. */
5436 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5437 bb_vec_info bb_vinfo, gimple *def_stmt,
5438 tree *def, enum vect_def_type *dt)
5441 stmt_vec_info stmt_vinfo;
5442 struct loop *loop = NULL;
5445 loop = LOOP_VINFO_LOOP (loop_vinfo);
5450 if (vect_print_dump_info (REPORT_DETAILS))
5452 fprintf (vect_dump, "vect_is_simple_use: operand ");
5453 print_generic_expr (vect_dump, operand, TDF_SLIM);
5456 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5458 *dt = vect_constant_def;
5462 if (is_gimple_min_invariant (operand))
5465 *dt = vect_external_def;
5469 if (TREE_CODE (operand) == PAREN_EXPR)
5471 if (vect_print_dump_info (REPORT_DETAILS))
5472 fprintf (vect_dump, "non-associatable copy.");
5473 operand = TREE_OPERAND (operand, 0);
5476 if (TREE_CODE (operand) != SSA_NAME)
5478 if (vect_print_dump_info (REPORT_DETAILS))
5479 fprintf (vect_dump, "not ssa-name.");
5483 *def_stmt = SSA_NAME_DEF_STMT (operand);
5484 if (*def_stmt == NULL)
5486 if (vect_print_dump_info (REPORT_DETAILS))
5487 fprintf (vect_dump, "no def_stmt.");
5491 if (vect_print_dump_info (REPORT_DETAILS))
5493 fprintf (vect_dump, "def_stmt: ");
5494 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5497 /* Empty stmt is expected only in case of a function argument.
5498 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5499 if (gimple_nop_p (*def_stmt))
5502 *dt = vect_external_def;
5506 bb = gimple_bb (*def_stmt);
5508 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5509 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5510 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5511 *dt = vect_external_def;
5514 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5515 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5518 if (*dt == vect_unknown_def_type)
5520 if (vect_print_dump_info (REPORT_DETAILS))
5521 fprintf (vect_dump, "Unsupported pattern.");
5525 if (vect_print_dump_info (REPORT_DETAILS))
5526 fprintf (vect_dump, "type of def: %d.",*dt);
5528 switch (gimple_code (*def_stmt))
5531 *def = gimple_phi_result (*def_stmt);
5535 *def = gimple_assign_lhs (*def_stmt);
5539 *def = gimple_call_lhs (*def_stmt);
5544 if (vect_print_dump_info (REPORT_DETAILS))
5545 fprintf (vect_dump, "unsupported defining stmt: ");
5552 /* Function vect_is_simple_use_1.
5554 Same as vect_is_simple_use_1 but also determines the vector operand
5555 type of OPERAND and stores it to *VECTYPE. If the definition of
5556 OPERAND is vect_uninitialized_def, vect_constant_def or
5557 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5558 is responsible to compute the best suited vector type for the
5562 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5563 bb_vec_info bb_vinfo, gimple *def_stmt,
5564 tree *def, enum vect_def_type *dt, tree *vectype)
5566 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5569 /* Now get a vector type if the def is internal, otherwise supply
5570 NULL_TREE and leave it up to the caller to figure out a proper
5571 type for the use stmt. */
5572 if (*dt == vect_internal_def
5573 || *dt == vect_induction_def
5574 || *dt == vect_reduction_def
5575 || *dt == vect_double_reduction_def
5576 || *dt == vect_nested_cycle)
5578 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5580 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5581 && !STMT_VINFO_RELEVANT (stmt_info)
5582 && !STMT_VINFO_LIVE_P (stmt_info))
5583 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5585 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5586 gcc_assert (*vectype != NULL_TREE);
5588 else if (*dt == vect_uninitialized_def
5589 || *dt == vect_constant_def
5590 || *dt == vect_external_def)
5591 *vectype = NULL_TREE;
5599 /* Function supportable_widening_operation
5601 Check whether an operation represented by the code CODE is a
5602 widening operation that is supported by the target platform in
5603 vector form (i.e., when operating on arguments of type VECTYPE_IN
5604 producing a result of type VECTYPE_OUT).
5606 Widening operations we currently support are NOP (CONVERT), FLOAT
5607 and WIDEN_MULT. This function checks if these operations are supported
5608 by the target platform either directly (via vector tree-codes), or via
5612 - CODE1 and CODE2 are codes of vector operations to be used when
5613 vectorizing the operation, if available.
5614 - DECL1 and DECL2 are decls of target builtin functions to be used
5615 when vectorizing the operation, if available. In this case,
5616 CODE1 and CODE2 are CALL_EXPR.
5617 - MULTI_STEP_CVT determines the number of required intermediate steps in
5618 case of multi-step conversion (like char->short->int - in that case
5619 MULTI_STEP_CVT will be 1).
5620 - INTERM_TYPES contains the intermediate type required to perform the
5621 widening operation (short in the above example). */
5624 supportable_widening_operation (enum tree_code code, gimple stmt,
5625 tree vectype_out, tree vectype_in,
5626 tree *decl1, tree *decl2,
5627 enum tree_code *code1, enum tree_code *code2,
5628 int *multi_step_cvt,
5629 VEC (tree, heap) **interm_types)
5631 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5632 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5633 struct loop *vect_loop = NULL;
5635 enum machine_mode vec_mode;
5636 enum insn_code icode1, icode2;
5637 optab optab1, optab2;
5638 tree vectype = vectype_in;
5639 tree wide_vectype = vectype_out;
5640 enum tree_code c1, c2;
5642 tree prev_type, intermediate_type;
5643 enum machine_mode intermediate_mode, prev_mode;
5644 optab optab3, optab4;
5646 *multi_step_cvt = 0;
5648 vect_loop = LOOP_VINFO_LOOP (loop_info);
5650 /* The result of a vectorized widening operation usually requires two vectors
5651 (because the widened results do not fit into one vector). The generated
5652 vector results would normally be expected to be generated in the same
5653 order as in the original scalar computation, i.e. if 8 results are
5654 generated in each vector iteration, they are to be organized as follows:
5655 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5657 However, in the special case that the result of the widening operation is
5658 used in a reduction computation only, the order doesn't matter (because
5659 when vectorizing a reduction we change the order of the computation).
5660 Some targets can take advantage of this and generate more efficient code.
5661 For example, targets like Altivec, that support widen_mult using a sequence
5662 of {mult_even,mult_odd} generate the following vectors:
5663 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5665 When vectorizing outer-loops, we execute the inner-loop sequentially
5666 (each vectorized inner-loop iteration contributes to VF outer-loop
5667 iterations in parallel). We therefore don't allow to change the order
5668 of the computation in the inner-loop during outer-loop vectorization. */
5671 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5672 && !nested_in_vect_loop_p (vect_loop, stmt))
5678 && code == WIDEN_MULT_EXPR
5679 && targetm.vectorize.builtin_mul_widen_even
5680 && targetm.vectorize.builtin_mul_widen_even (vectype)
5681 && targetm.vectorize.builtin_mul_widen_odd
5682 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5684 if (vect_print_dump_info (REPORT_DETAILS))
5685 fprintf (vect_dump, "Unordered widening operation detected.");
5687 *code1 = *code2 = CALL_EXPR;
5688 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5689 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5695 case WIDEN_MULT_EXPR:
5696 c1 = VEC_WIDEN_MULT_LO_EXPR;
5697 c2 = VEC_WIDEN_MULT_HI_EXPR;
5700 case WIDEN_LSHIFT_EXPR:
5701 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5702 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5706 c1 = VEC_UNPACK_LO_EXPR;
5707 c2 = VEC_UNPACK_HI_EXPR;
5711 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5712 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5715 case FIX_TRUNC_EXPR:
5716 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5717 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5718 computing the operation. */
5725 if (BYTES_BIG_ENDIAN)
5727 enum tree_code ctmp = c1;
5732 if (code == FIX_TRUNC_EXPR)
5734 /* The signedness is determined from output operand. */
5735 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5736 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5740 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5741 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5744 if (!optab1 || !optab2)
5747 vec_mode = TYPE_MODE (vectype);
5748 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5749 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5755 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5756 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5759 /* Check if it's a multi-step conversion that can be done using intermediate
5762 prev_type = vectype;
5763 prev_mode = vec_mode;
5765 if (!CONVERT_EXPR_CODE_P (code))
5768 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5769 intermediate steps in promotion sequence. We try
5770 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5772 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5773 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
5775 intermediate_mode = insn_data[icode1].operand[0].mode;
5777 = lang_hooks.types.type_for_mode (intermediate_mode,
5778 TYPE_UNSIGNED (prev_type));
5779 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5780 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5782 if (!optab3 || !optab4
5783 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
5784 || insn_data[icode1].operand[0].mode != intermediate_mode
5785 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
5786 || insn_data[icode2].operand[0].mode != intermediate_mode
5787 || ((icode1 = optab_handler (optab3, intermediate_mode))
5788 == CODE_FOR_nothing)
5789 || ((icode2 = optab_handler (optab4, intermediate_mode))
5790 == CODE_FOR_nothing))
5793 VEC_quick_push (tree, *interm_types, intermediate_type);
5794 (*multi_step_cvt)++;
5796 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5797 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5800 prev_type = intermediate_type;
5801 prev_mode = intermediate_mode;
5804 VEC_free (tree, heap, *interm_types);
5809 /* Function supportable_narrowing_operation
5811 Check whether an operation represented by the code CODE is a
5812 narrowing operation that is supported by the target platform in
5813 vector form (i.e., when operating on arguments of type VECTYPE_IN
5814 and producing a result of type VECTYPE_OUT).
5816 Narrowing operations we currently support are NOP (CONVERT) and
5817 FIX_TRUNC. This function checks if these operations are supported by
5818 the target platform directly via vector tree-codes.
5821 - CODE1 is the code of a vector operation to be used when
5822 vectorizing the operation, if available.
5823 - MULTI_STEP_CVT determines the number of required intermediate steps in
5824 case of multi-step conversion (like int->short->char - in that case
5825 MULTI_STEP_CVT will be 1).
5826 - INTERM_TYPES contains the intermediate type required to perform the
5827 narrowing operation (short in the above example). */
5830 supportable_narrowing_operation (enum tree_code code,
5831 tree vectype_out, tree vectype_in,
5832 enum tree_code *code1, int *multi_step_cvt,
5833 VEC (tree, heap) **interm_types)
5835 enum machine_mode vec_mode;
5836 enum insn_code icode1;
5837 optab optab1, interm_optab;
5838 tree vectype = vectype_in;
5839 tree narrow_vectype = vectype_out;
5841 tree intermediate_type;
5842 enum machine_mode intermediate_mode, prev_mode;
5846 *multi_step_cvt = 0;
5850 c1 = VEC_PACK_TRUNC_EXPR;
5853 case FIX_TRUNC_EXPR:
5854 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5858 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5859 tree code and optabs used for computing the operation. */
5866 if (code == FIX_TRUNC_EXPR)
5867 /* The signedness is determined from output operand. */
5868 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5870 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5875 vec_mode = TYPE_MODE (vectype);
5876 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
5881 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5884 /* Check if it's a multi-step conversion that can be done using intermediate
5886 prev_mode = vec_mode;
5887 if (code == FIX_TRUNC_EXPR)
5888 uns = TYPE_UNSIGNED (vectype_out);
5890 uns = TYPE_UNSIGNED (vectype);
5892 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
5893 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
5894 costly than signed. */
5895 if (code == FIX_TRUNC_EXPR && uns)
5897 enum insn_code icode2;
5900 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
5902 = optab_for_tree_code (c1, intermediate_type, optab_default);
5903 if (interm_optab != NULL
5904 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
5905 && insn_data[icode1].operand[0].mode
5906 == insn_data[icode2].operand[0].mode)
5909 optab1 = interm_optab;
5914 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5915 intermediate steps in promotion sequence. We try
5916 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
5917 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5918 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
5920 intermediate_mode = insn_data[icode1].operand[0].mode;
5922 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
5924 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
5927 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
5928 || insn_data[icode1].operand[0].mode != intermediate_mode
5929 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
5930 == CODE_FOR_nothing))
5933 VEC_quick_push (tree, *interm_types, intermediate_type);
5934 (*multi_step_cvt)++;
5936 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5939 prev_mode = intermediate_mode;
5940 optab1 = interm_optab;
5943 VEC_free (tree, heap, *interm_types);