1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb)
50 gimple_stmt_iterator si;
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node)
74 if (SLP_TREE_LEFT (node))
75 vect_free_slp_tree (SLP_TREE_LEFT (node));
77 if (SLP_TREE_RIGHT (node))
78 vect_free_slp_tree (SLP_TREE_RIGHT (node));
80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
89 /* Free the memory allocated for the SLP instance. */
92 vect_free_slp_instance (slp_instance instance)
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
100 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
101 they are of a legal type and that they match the defs of the first stmt of
102 the SLP group (stored in FIRST_STMT_...). */
105 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
106 slp_tree slp_node, gimple stmt,
107 VEC (gimple, heap) **def_stmts0,
108 VEC (gimple, heap) **def_stmts1,
109 enum vect_def_type *first_stmt_dt0,
110 enum vect_def_type *first_stmt_dt1,
111 tree *first_stmt_def0_type,
112 tree *first_stmt_def1_type,
113 tree *first_stmt_const_oprnd,
114 int ncopies_for_cost,
115 bool *pattern0, bool *pattern1)
118 unsigned int i, number_of_oprnds;
121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
122 stmt_vec_info stmt_info =
123 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
124 enum gimple_rhs_class rhs_class;
125 struct loop *loop = NULL;
126 enum tree_code rhs_code;
127 bool different_types = false;
130 loop = LOOP_VINFO_LOOP (loop_vinfo);
132 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
133 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
135 for (i = 0; i < number_of_oprnds; i++)
137 oprnd = gimple_op (stmt, i + 1);
139 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
141 || (!def_stmt && dt[i] != vect_constant_def))
143 if (vect_print_dump_info (REPORT_SLP))
145 fprintf (vect_dump, "Build SLP failed: can't find def for ");
146 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
152 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
153 from the pattern. Check that all the stmts of the node are in the
155 if (loop && def_stmt && gimple_bb (def_stmt)
156 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
157 && vinfo_for_stmt (def_stmt)
158 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
159 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
160 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
162 if (!*first_stmt_dt0)
166 if (i == 1 && !*first_stmt_dt1)
168 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "Build SLP failed: some of the stmts"
173 " are in a pattern, and others are not ");
174 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
181 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
182 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
184 if (*dt == vect_unknown_def_type)
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "Unsupported pattern.");
191 switch (gimple_code (def_stmt))
194 def[i] = gimple_phi_result (def_stmt);
198 def[i] = gimple_assign_lhs (def_stmt);
202 if (vect_print_dump_info (REPORT_DETAILS))
203 fprintf (vect_dump, "unsupported defining stmt: ");
208 if (!*first_stmt_dt0)
210 /* op0 of the first stmt of the group - store its info. */
211 *first_stmt_dt0 = dt[i];
213 *first_stmt_def0_type = TREE_TYPE (def[i]);
215 *first_stmt_const_oprnd = oprnd;
217 /* Analyze costs (for the first stmt of the group only). */
218 if (rhs_class != GIMPLE_SINGLE_RHS)
219 /* Not memory operation (we don't call this functions for loads). */
220 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
223 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
229 if (!*first_stmt_dt1 && i == 1)
231 /* op1 of the first stmt of the group - store its info. */
232 *first_stmt_dt1 = dt[i];
234 *first_stmt_def1_type = TREE_TYPE (def[i]);
237 /* We assume that the stmt contains only one constant
238 operand. We fail otherwise, to be on the safe side. */
239 if (*first_stmt_const_oprnd)
241 if (vect_print_dump_info (REPORT_SLP))
242 fprintf (vect_dump, "Build SLP failed: two constant "
246 *first_stmt_const_oprnd = oprnd;
251 /* Not first stmt of the group, check that the def-stmt/s match
252 the def-stmt/s of the first stmt. Allow different definition
253 types for reduction chains: the first stmt must be a
254 vect_reduction_def (a phi node), and the rest
255 vect_internal_def. */
257 && ((*first_stmt_dt0 != dt[i]
258 && !(*first_stmt_dt0 == vect_reduction_def
259 && dt[i] == vect_internal_def))
260 || (*first_stmt_def0_type && def[0]
261 && !types_compatible_p (*first_stmt_def0_type,
262 TREE_TYPE (def[0])))))
264 && ((*first_stmt_dt1 != dt[i]
265 && !(*first_stmt_dt1 == vect_reduction_def
266 && dt[i] == vect_internal_def))
267 || (*first_stmt_def1_type && def[1]
268 && !types_compatible_p (*first_stmt_def1_type,
269 TREE_TYPE (def[1])))))
271 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
275 if (i != number_of_oprnds - 1)
276 different_types = true;
279 if (is_gimple_assign (stmt)
280 && (rhs_code = gimple_assign_rhs_code (stmt))
281 && TREE_CODE_CLASS (rhs_code) == tcc_binary
282 && commutative_tree_code (rhs_code)
283 && *first_stmt_dt0 == dt[1]
284 && *first_stmt_dt1 == dt[0]
286 && !(*first_stmt_def0_type
287 && !types_compatible_p (*first_stmt_def0_type,
289 && !(*first_stmt_def1_type
290 && !types_compatible_p (*first_stmt_def1_type,
291 TREE_TYPE (def[0]))))
293 if (vect_print_dump_info (REPORT_SLP))
295 fprintf (vect_dump, "Swapping operands of ");
296 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
299 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
300 gimple_assign_rhs2_ptr (stmt));
304 if (vect_print_dump_info (REPORT_SLP))
305 fprintf (vect_dump, "Build SLP failed: different types ");
314 /* Check the types of the definitions. */
317 case vect_constant_def:
318 case vect_external_def:
321 case vect_internal_def:
322 case vect_reduction_def:
323 if ((i == 0 && !different_types) || (i == 1 && different_types))
324 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
326 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
330 /* FORNOW: Not supported. */
331 if (vect_print_dump_info (REPORT_SLP))
333 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
334 print_generic_expr (vect_dump, def[i], TDF_SLIM);
345 /* Recursively build an SLP tree starting from NODE.
346 Fail (and return FALSE) if def-stmts are not isomorphic, require data
347 permutation or are of unsupported types of operation. Otherwise, return
351 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
352 slp_tree *node, unsigned int group_size,
353 int *inside_cost, int *outside_cost,
354 int ncopies_for_cost, unsigned int *max_nunits,
355 VEC (int, heap) **load_permutation,
356 VEC (slp_tree, heap) **loads,
357 unsigned int vectorization_factor, bool *loads_permuted)
359 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
360 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
362 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
363 gimple stmt = VEC_index (gimple, stmts, 0);
364 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
365 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
366 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
367 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
369 bool stop_recursion = false, need_same_oprnds = false;
370 tree vectype, scalar_type, first_op1 = NULL_TREE;
371 unsigned int ncopies;
374 enum machine_mode optab_op2_mode;
375 enum machine_mode vec_mode;
376 tree first_stmt_const_oprnd = NULL_TREE;
377 struct data_reference *first_dr;
378 bool pattern0 = false, pattern1 = false;
380 bool permutation = false;
381 unsigned int load_place;
382 gimple first_load, prev_first_load = NULL;
384 /* For every stmt in NODE find its def stmt/s. */
385 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
387 if (vect_print_dump_info (REPORT_SLP))
389 fprintf (vect_dump, "Build SLP for ");
390 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
393 /* Fail to vectorize statements marked as unvectorizable. */
394 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
396 if (vect_print_dump_info (REPORT_SLP))
399 "Build SLP failed: unvectorizable statement ");
400 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
406 lhs = gimple_get_lhs (stmt);
407 if (lhs == NULL_TREE)
409 if (vect_print_dump_info (REPORT_SLP))
412 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
413 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
419 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
420 vectype = get_vectype_for_scalar_type (scalar_type);
423 if (vect_print_dump_info (REPORT_SLP))
425 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
426 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
431 /* In case of multiple types we need to detect the smallest type. */
432 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
434 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
436 vectorization_factor = *max_nunits;
439 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
441 if (is_gimple_call (stmt))
442 rhs_code = CALL_EXPR;
444 rhs_code = gimple_assign_rhs_code (stmt);
446 /* Check the operation. */
449 first_stmt_code = rhs_code;
451 /* Shift arguments should be equal in all the packed stmts for a
452 vector shift with scalar shift operand. */
453 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
454 || rhs_code == LROTATE_EXPR
455 || rhs_code == RROTATE_EXPR)
457 vec_mode = TYPE_MODE (vectype);
459 /* First see if we have a vector/vector shift. */
460 optab = optab_for_tree_code (rhs_code, vectype,
464 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
466 /* No vector/vector shift, try for a vector/scalar shift. */
467 optab = optab_for_tree_code (rhs_code, vectype,
472 if (vect_print_dump_info (REPORT_SLP))
473 fprintf (vect_dump, "Build SLP failed: no optab.");
476 icode = (int) optab_handler (optab, vec_mode);
477 if (icode == CODE_FOR_nothing)
479 if (vect_print_dump_info (REPORT_SLP))
480 fprintf (vect_dump, "Build SLP failed: "
481 "op not supported by target.");
484 optab_op2_mode = insn_data[icode].operand[2].mode;
485 if (!VECTOR_MODE_P (optab_op2_mode))
487 need_same_oprnds = true;
488 first_op1 = gimple_assign_rhs2 (stmt);
492 else if (rhs_code == WIDEN_LSHIFT_EXPR)
494 need_same_oprnds = true;
495 first_op1 = gimple_assign_rhs2 (stmt);
500 if (first_stmt_code != rhs_code
501 && (first_stmt_code != IMAGPART_EXPR
502 || rhs_code != REALPART_EXPR)
503 && (first_stmt_code != REALPART_EXPR
504 || rhs_code != IMAGPART_EXPR)
505 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
506 && (first_stmt_code == ARRAY_REF
507 || first_stmt_code == INDIRECT_REF
508 || first_stmt_code == COMPONENT_REF
509 || first_stmt_code == MEM_REF)))
511 if (vect_print_dump_info (REPORT_SLP))
514 "Build SLP failed: different operation in stmt ");
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
522 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
524 if (vect_print_dump_info (REPORT_SLP))
527 "Build SLP failed: different shift arguments in ");
528 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
535 /* Strided store or load. */
536 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
538 if (REFERENCE_CLASS_P (lhs))
541 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
542 stmt, &def_stmts0, &def_stmts1,
545 &first_stmt_def0_type,
546 &first_stmt_def1_type,
547 &first_stmt_const_oprnd,
549 &pattern0, &pattern1))
555 /* FORNOW: Check that there is no gap between the loads. */
556 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
557 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
558 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
559 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
561 if (vect_print_dump_info (REPORT_SLP))
563 fprintf (vect_dump, "Build SLP failed: strided "
565 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
571 /* Check that the size of interleaved loads group is not
572 greater than the SLP group size. */
574 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
576 if (vect_print_dump_info (REPORT_SLP))
578 fprintf (vect_dump, "Build SLP failed: the number of "
579 "interleaved loads is greater than"
580 " the SLP group size ");
581 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
587 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
590 /* Check that there are no loads from different interleaving
591 chains in the same node. The only exception is complex
593 if (prev_first_load != first_load
594 && rhs_code != REALPART_EXPR
595 && rhs_code != IMAGPART_EXPR)
597 if (vect_print_dump_info (REPORT_SLP))
599 fprintf (vect_dump, "Build SLP failed: different "
600 "interleaving chains in one node ");
601 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
608 prev_first_load = first_load;
610 if (first_load == stmt)
612 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
613 if (vect_supportable_dr_alignment (first_dr, false)
614 == dr_unaligned_unsupported)
616 if (vect_print_dump_info (REPORT_SLP))
618 fprintf (vect_dump, "Build SLP failed: unsupported "
620 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
626 /* Analyze costs (for the first stmt in the group). */
627 vect_model_load_cost (vinfo_for_stmt (stmt),
628 ncopies_for_cost, false, *node);
631 /* Store the place of this load in the interleaving chain. In
632 case that permutation is needed we later decide if a specific
633 permutation is supported. */
634 load_place = vect_get_place_in_interleaving_chain (stmt,
639 VEC_safe_push (int, heap, *load_permutation, load_place);
641 /* We stop the tree when we reach a group of loads. */
642 stop_recursion = true;
645 } /* Strided access. */
648 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
650 /* Not strided load. */
651 if (vect_print_dump_info (REPORT_SLP))
653 fprintf (vect_dump, "Build SLP failed: not strided load ");
654 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
657 /* FORNOW: Not strided loads are not supported. */
661 /* Not memory operation. */
662 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
663 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
665 if (vect_print_dump_info (REPORT_SLP))
667 fprintf (vect_dump, "Build SLP failed: operation");
668 fprintf (vect_dump, " unsupported ");
669 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
675 /* Find the def-stmts. */
676 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
677 &def_stmts0, &def_stmts1,
678 &first_stmt_dt0, &first_stmt_dt1,
679 &first_stmt_def0_type,
680 &first_stmt_def1_type,
681 &first_stmt_const_oprnd,
683 &pattern0, &pattern1))
688 /* Add the costs of the node to the overall instance costs. */
689 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
690 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
692 /* Strided loads were reached - stop the recursion. */
695 VEC_safe_push (slp_tree, heap, *loads, *node);
699 *loads_permuted = true;
701 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
706 /* We don't check here complex numbers chains, so we set
707 LOADS_PERMUTED for further check in
708 vect_supported_load_permutation_p. */
709 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
710 *loads_permuted = true;
716 /* Create SLP_TREE nodes for the definition node/s. */
717 if (first_stmt_dt0 == vect_internal_def)
719 slp_tree left_node = XNEW (struct _slp_tree);
720 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
721 SLP_TREE_VEC_STMTS (left_node) = NULL;
722 SLP_TREE_LEFT (left_node) = NULL;
723 SLP_TREE_RIGHT (left_node) = NULL;
724 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
725 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
726 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
727 inside_cost, outside_cost, ncopies_for_cost,
728 max_nunits, load_permutation, loads,
729 vectorization_factor, loads_permuted))
732 SLP_TREE_LEFT (*node) = left_node;
735 if (first_stmt_dt1 == vect_internal_def)
737 slp_tree right_node = XNEW (struct _slp_tree);
738 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
739 SLP_TREE_VEC_STMTS (right_node) = NULL;
740 SLP_TREE_LEFT (right_node) = NULL;
741 SLP_TREE_RIGHT (right_node) = NULL;
742 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
743 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
744 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
745 inside_cost, outside_cost, ncopies_for_cost,
746 max_nunits, load_permutation, loads,
747 vectorization_factor, loads_permuted))
750 SLP_TREE_RIGHT (*node) = right_node;
758 vect_print_slp_tree (slp_tree node)
766 fprintf (vect_dump, "node ");
767 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
769 fprintf (vect_dump, "\n\tstmt %d ", i);
770 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
772 fprintf (vect_dump, "\n");
774 vect_print_slp_tree (SLP_TREE_LEFT (node));
775 vect_print_slp_tree (SLP_TREE_RIGHT (node));
779 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
780 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
781 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
782 stmts in NODE are to be marked. */
785 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
793 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
795 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
797 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
798 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
802 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
805 vect_mark_slp_stmts_relevant (slp_tree node)
809 stmt_vec_info stmt_info;
814 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
816 stmt_info = vinfo_for_stmt (stmt);
817 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
818 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
819 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
822 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
823 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
827 /* Check if the permutation required by the SLP INSTANCE is supported.
828 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
831 vect_supported_slp_permutation_p (slp_instance instance)
833 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
834 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
835 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
836 VEC (slp_tree, heap) *sorted_loads = NULL;
838 slp_tree *tmp_loads = NULL;
839 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
842 /* FORNOW: The only supported loads permutation is loads from the same
843 location in all the loads in the node, when the data-refs in
844 nodes of LOADS constitute an interleaving chain.
845 Sort the nodes according to the order of accesses in the chain. */
846 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
848 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
849 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
850 i += group_size, j++)
852 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
853 /* Check that the loads are all in the same interleaving chain. */
854 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
856 if (vect_print_dump_info (REPORT_DETAILS))
858 fprintf (vect_dump, "Build SLP failed: unsupported data "
860 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
867 tmp_loads[index] = load;
870 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
871 for (i = 0; i < group_size; i++)
872 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
874 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
875 SLP_INSTANCE_LOADS (instance) = sorted_loads;
878 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
879 SLP_INSTANCE_UNROLLING_FACTOR (instance),
887 /* Rearrange the statements of NODE according to PERMUTATION. */
890 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
891 VEC (int, heap) *permutation)
894 VEC (gimple, heap) *tmp_stmts;
895 unsigned int index, i;
900 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
901 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
903 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
904 tmp_stmts = VEC_alloc (gimple, heap, group_size);
906 for (i = 0; i < group_size; i++)
907 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
909 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
911 index = VEC_index (int, permutation, i);
912 VEC_replace (gimple, tmp_stmts, index, stmt);
915 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
916 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
920 /* Check if the required load permutation is supported.
921 LOAD_PERMUTATION contains a list of indices of the loads.
922 In SLP this permutation is relative to the order of strided stores that are
923 the base of the SLP instance. */
926 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
927 VEC (int, heap) *load_permutation)
929 int i = 0, j, prev = -1, next, k, number_of_groups;
930 bool supported, bad_permutation = false;
932 slp_tree node, other_complex_node;
933 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
934 unsigned complex_numbers = 0;
935 struct data_reference *dr;
936 bb_vec_info bb_vinfo;
938 /* FORNOW: permutations are only supported in SLP. */
942 if (vect_print_dump_info (REPORT_SLP))
944 fprintf (vect_dump, "Load permutation ");
945 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
946 fprintf (vect_dump, "%d ", next);
949 /* In case of reduction every load permutation is allowed, since the order
950 of the reduction statements is not important (as opposed to the case of
951 strided stores). The only condition we need to check is that all the
952 load nodes are of the same size and have the same permutation (and then
953 rearrange all the nodes of the SLP instance according to this
956 /* Check that all the load nodes are of the same size. */
957 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
959 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
960 != (unsigned) group_size)
963 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
964 if (is_gimple_assign (stmt)
965 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
966 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
970 /* Complex operands can be swapped as following:
971 real_c = real_b + real_a;
972 imag_c = imag_a + imag_b;
973 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
974 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
975 chains are mixed, they match the above pattern. */
978 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
980 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
986 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
988 if (complex_numbers != 2)
996 other_complex_node = VEC_index (slp_tree,
997 SLP_INSTANCE_LOADS (slp_instn), k);
998 other_node_first = VEC_index (gimple,
999 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1001 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1002 != other_node_first)
1010 /* We checked that this case ok, so there is no need to proceed with
1011 permutation tests. */
1012 if (complex_numbers == 2)
1014 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1015 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1019 node = SLP_INSTANCE_TREE (slp_instn);
1020 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1021 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1022 instance, not all the loads belong to the same node or interleaving
1023 group. Hence, we need to divide them into groups according to
1025 number_of_groups = VEC_length (int, load_permutation) / group_size;
1027 /* Reduction (there are no data-refs in the root).
1028 In reduction chain the order of the loads is important. */
1029 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1030 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1032 int first_group_load_index;
1034 /* Compare all the permutation sequences to the first one. */
1035 for (i = 1; i < number_of_groups; i++)
1038 for (j = i * group_size; j < i * group_size + group_size; j++)
1040 next = VEC_index (int, load_permutation, j);
1041 first_group_load_index = VEC_index (int, load_permutation, k);
1043 if (next != first_group_load_index)
1045 bad_permutation = true;
1052 if (bad_permutation)
1056 if (!bad_permutation)
1058 /* Check that the loads in the first sequence are different and there
1059 are no gaps between them. */
1060 load_index = sbitmap_alloc (group_size);
1061 sbitmap_zero (load_index);
1062 for (k = 0; k < group_size; k++)
1064 first_group_load_index = VEC_index (int, load_permutation, k);
1065 if (TEST_BIT (load_index, first_group_load_index))
1067 bad_permutation = true;
1071 SET_BIT (load_index, first_group_load_index);
1074 if (!bad_permutation)
1075 for (k = 0; k < group_size; k++)
1076 if (!TEST_BIT (load_index, k))
1078 bad_permutation = true;
1082 sbitmap_free (load_index);
1085 if (!bad_permutation)
1087 /* This permutation is valid for reduction. Since the order of the
1088 statements in the nodes is not important unless they are memory
1089 accesses, we can rearrange the statements in all the nodes
1090 according to the order of the loads. */
1091 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1093 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1098 /* In basic block vectorization we allow any subchain of an interleaving
1100 FORNOW: not supported in loop SLP because of realignment compications. */
1101 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1102 bad_permutation = false;
1103 /* Check that for every node in the instance teh loads form a subchain. */
1106 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1110 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1113 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1115 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1117 bad_permutation = true;
1121 if (j != 0 && next_load != load)
1123 bad_permutation = true;
1127 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1130 if (bad_permutation)
1134 /* Check that the alignment of the first load in every subchain, i.e.,
1135 the first statement in every load node, is supported. */
1136 if (!bad_permutation)
1138 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1140 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1142 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1144 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1145 if (vect_supportable_dr_alignment (dr, false)
1146 == dr_unaligned_unsupported)
1148 if (vect_print_dump_info (REPORT_SLP))
1150 fprintf (vect_dump, "unsupported unaligned load ");
1151 print_gimple_stmt (vect_dump, first_load, 0,
1154 bad_permutation = true;
1160 if (!bad_permutation)
1162 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1168 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1169 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1170 well (unless it's reduction). */
1171 if (VEC_length (int, load_permutation)
1172 != (unsigned int) (group_size * group_size))
1176 load_index = sbitmap_alloc (group_size);
1177 sbitmap_zero (load_index);
1178 for (j = 0; j < group_size; j++)
1180 for (i = j * group_size, k = 0;
1181 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1184 if (i != j * group_size && next != prev)
1193 if (TEST_BIT (load_index, prev))
1199 SET_BIT (load_index, prev);
1202 for (j = 0; j < group_size; j++)
1203 if (!TEST_BIT (load_index, j))
1206 sbitmap_free (load_index);
1208 if (supported && i == group_size * group_size
1209 && vect_supported_slp_permutation_p (slp_instn))
1216 /* Find the first load in the loop that belongs to INSTANCE.
1217 When loads are in several SLP nodes, there can be a case in which the first
1218 load does not appear in the first SLP node to be transformed, causing
1219 incorrect order of statements. Since we generate all the loads together,
1220 they must be inserted before the first load of the SLP instance and not
1221 before the first load of the first node of the instance. */
1224 vect_find_first_load_in_slp_instance (slp_instance instance)
1228 gimple first_load = NULL, load;
1230 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1231 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1232 first_load = get_earlier_stmt (load, first_load);
1238 /* Find the last store in SLP INSTANCE. */
1241 vect_find_last_store_in_slp_instance (slp_instance instance)
1245 gimple last_store = NULL, store;
1247 node = SLP_INSTANCE_TREE (instance);
1249 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1251 last_store = get_later_stmt (store, last_store);
1257 /* Analyze an SLP instance starting from a group of strided stores. Call
1258 vect_build_slp_tree to build a tree of packed stmts if possible.
1259 Return FALSE if it's impossible to SLP any stmt in the loop. */
1262 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1265 slp_instance new_instance;
1266 slp_tree node = XNEW (struct _slp_tree);
1267 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1268 unsigned int unrolling_factor = 1, nunits;
1269 tree vectype, scalar_type = NULL_TREE;
1271 unsigned int vectorization_factor = 0;
1272 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1273 unsigned int max_nunits = 0;
1274 VEC (int, heap) *load_permutation;
1275 VEC (slp_tree, heap) *loads;
1276 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1277 bool loads_permuted = false;
1279 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1283 scalar_type = TREE_TYPE (DR_REF (dr));
1284 vectype = get_vectype_for_scalar_type (scalar_type);
1288 gcc_assert (loop_vinfo);
1289 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1292 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1296 gcc_assert (loop_vinfo);
1297 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1298 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1303 if (vect_print_dump_info (REPORT_SLP))
1305 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1306 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1312 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1314 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1316 vectorization_factor = nunits;
1318 /* Calculate the unrolling factor. */
1319 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1320 if (unrolling_factor != 1 && !loop_vinfo)
1322 if (vect_print_dump_info (REPORT_SLP))
1323 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1329 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1330 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1332 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1334 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1337 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1338 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1343 /* Collect reduction statements. */
1344 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1347 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1350 SLP_TREE_VEC_STMTS (node) = NULL;
1351 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1352 SLP_TREE_LEFT (node) = NULL;
1353 SLP_TREE_RIGHT (node) = NULL;
1354 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1355 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1357 /* Calculate the number of vector stmts to create based on the unrolling
1358 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1359 GROUP_SIZE / NUNITS otherwise. */
1360 ncopies_for_cost = unrolling_factor * group_size / nunits;
1362 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1363 loads = VEC_alloc (slp_tree, heap, group_size);
1365 /* Build the tree for the SLP instance. */
1366 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1367 &inside_cost, &outside_cost, ncopies_for_cost,
1368 &max_nunits, &load_permutation, &loads,
1369 vectorization_factor, &loads_permuted))
1371 /* Calculate the unrolling factor based on the smallest type. */
1372 if (max_nunits > nunits)
1373 unrolling_factor = least_common_multiple (max_nunits, group_size)
1376 if (unrolling_factor != 1 && !loop_vinfo)
1378 if (vect_print_dump_info (REPORT_SLP))
1379 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1384 /* Create a new SLP instance. */
1385 new_instance = XNEW (struct _slp_instance);
1386 SLP_INSTANCE_TREE (new_instance) = node;
1387 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1388 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1389 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1390 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1391 SLP_INSTANCE_LOADS (new_instance) = loads;
1392 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1393 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1397 if (!vect_supported_load_permutation_p (new_instance, group_size,
1400 if (vect_print_dump_info (REPORT_SLP))
1402 fprintf (vect_dump, "Build SLP failed: unsupported load "
1404 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1407 vect_free_slp_instance (new_instance);
1411 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1412 = vect_find_first_load_in_slp_instance (new_instance);
1415 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1418 VEC_safe_push (slp_instance, heap,
1419 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1422 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1425 if (vect_print_dump_info (REPORT_SLP))
1426 vect_print_slp_tree (node);
1431 /* Failed to SLP. */
1432 /* Free the allocated memory. */
1433 vect_free_slp_tree (node);
1434 VEC_free (int, heap, load_permutation);
1435 VEC_free (slp_tree, heap, loads);
1441 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1442 trees of packed scalar stmts if SLP is possible. */
1445 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1448 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1449 gimple first_element;
1452 if (vect_print_dump_info (REPORT_SLP))
1453 fprintf (vect_dump, "=== vect_analyze_slp ===");
1457 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1458 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1459 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1462 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1464 /* Find SLP sequences starting from groups of strided stores. */
1465 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1466 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1469 if (bb_vinfo && !ok)
1471 if (vect_print_dump_info (REPORT_SLP))
1472 fprintf (vect_dump, "Failed to SLP the basic block.");
1478 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1480 /* Find SLP sequences starting from reduction chains. */
1481 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1482 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1487 /* Don't try to vectorize SLP reductions if reduction chain was
1492 /* Find SLP sequences starting from groups of reductions. */
1493 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1494 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1495 VEC_index (gimple, reductions, 0)))
1502 /* For each possible SLP instance decide whether to SLP it and calculate overall
1503 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1504 least one instance. */
1507 vect_make_slp_decision (loop_vec_info loop_vinfo)
1509 unsigned int i, unrolling_factor = 1;
1510 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1511 slp_instance instance;
1512 int decided_to_slp = 0;
1514 if (vect_print_dump_info (REPORT_SLP))
1515 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1517 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1519 /* FORNOW: SLP if you can. */
1520 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1521 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1523 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1524 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1525 loop-based vectorization. Such stmts will be marked as HYBRID. */
1526 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1530 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1532 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1533 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1534 decided_to_slp, unrolling_factor);
1536 return (decided_to_slp > 0);
1540 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1541 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1544 vect_detect_hybrid_slp_stmts (slp_tree node)
1548 imm_use_iterator imm_iter;
1550 stmt_vec_info stmt_vinfo;
1555 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1556 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1557 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1558 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1559 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1560 && !STMT_SLP_TYPE (stmt_vinfo)
1561 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1562 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1563 && !(gimple_code (use_stmt) == GIMPLE_PHI
1564 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1565 == vect_reduction_def))
1566 vect_mark_slp_stmts (node, hybrid, i);
1568 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1569 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1573 /* Find stmts that must be both vectorized and SLPed. */
1576 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1579 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1580 slp_instance instance;
1582 if (vect_print_dump_info (REPORT_SLP))
1583 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1585 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1586 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1590 /* Create and initialize a new bb_vec_info struct for BB, as well as
1591 stmt_vec_info structs for all the stmts in it. */
1594 new_bb_vec_info (basic_block bb)
1596 bb_vec_info res = NULL;
1597 gimple_stmt_iterator gsi;
1599 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1600 BB_VINFO_BB (res) = bb;
1602 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1604 gimple stmt = gsi_stmt (gsi);
1605 gimple_set_uid (stmt, 0);
1606 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1609 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1610 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1617 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1618 stmts in the basic block. */
1621 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1624 gimple_stmt_iterator si;
1629 bb = BB_VINFO_BB (bb_vinfo);
1631 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1633 gimple stmt = gsi_stmt (si);
1634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1637 /* Free stmt_vec_info. */
1638 free_stmt_vec_info (stmt);
1641 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1642 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1643 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1644 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1650 /* Analyze statements contained in SLP tree node after recursively analyzing
1651 the subtree. Return TRUE if the operations are supported. */
1654 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1663 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1664 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1667 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1669 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1670 gcc_assert (stmt_info);
1671 gcc_assert (PURE_SLP_STMT (stmt_info));
1673 if (!vect_analyze_stmt (stmt, &dummy, node))
1681 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1682 operations are supported. */
1685 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1687 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1688 slp_instance instance;
1691 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1693 if (!vect_slp_analyze_node_operations (bb_vinfo,
1694 SLP_INSTANCE_TREE (instance)))
1696 vect_free_slp_instance (instance);
1697 VEC_ordered_remove (slp_instance, slp_instances, i);
1703 if (!VEC_length (slp_instance, slp_instances))
1709 /* Check if loads and stores are mixed in the basic block (in that
1710 case if we are not sure that the accesses differ, we can't vectorize the
1711 basic block). Also return FALSE in case that there is statement marked as
1712 not vectorizable. */
1715 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1717 basic_block bb = BB_VINFO_BB (bb_vinfo);
1718 gimple_stmt_iterator si;
1719 bool detected_store = false;
1721 struct data_reference *dr;
1723 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1725 stmt = gsi_stmt (si);
1727 /* We can't allow not analyzed statements, since they may contain data
1729 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1732 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1735 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1736 if (DR_IS_READ (dr) && detected_store)
1739 if (!DR_IS_READ (dr))
1740 detected_store = true;
1746 /* Check if vectorization of the basic block is profitable. */
1749 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1751 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1752 slp_instance instance;
1754 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1755 unsigned int stmt_cost;
1757 gimple_stmt_iterator si;
1758 basic_block bb = BB_VINFO_BB (bb_vinfo);
1759 stmt_vec_info stmt_info = NULL;
1760 tree dummy_type = NULL;
1763 /* Calculate vector costs. */
1764 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1766 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1767 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1770 /* Calculate scalar cost. */
1771 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1773 stmt = gsi_stmt (si);
1774 stmt_info = vinfo_for_stmt (stmt);
1776 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1777 || !PURE_SLP_STMT (stmt_info))
1780 if (STMT_VINFO_DATA_REF (stmt_info))
1782 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1783 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1784 (scalar_load, dummy_type, dummy);
1786 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1787 (scalar_store, dummy_type, dummy);
1790 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1791 (scalar_stmt, dummy_type, dummy);
1793 scalar_cost += stmt_cost;
1796 if (vect_print_dump_info (REPORT_COST))
1798 fprintf (vect_dump, "Cost model analysis: \n");
1799 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1801 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1803 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1806 /* Vectorization is profitable if its cost is less than the cost of scalar
1808 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1814 /* Check if the basic block can be vectorized. */
1817 vect_slp_analyze_bb_1 (basic_block bb)
1819 bb_vec_info bb_vinfo;
1820 VEC (ddr_p, heap) *ddrs;
1821 VEC (slp_instance, heap) *slp_instances;
1822 slp_instance instance;
1825 int max_vf = MAX_VECTORIZATION_FACTOR;
1826 bool data_dependence_in_bb = false;
1828 bb_vinfo = new_bb_vec_info (bb);
1832 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1834 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1835 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1838 destroy_bb_vec_info (bb_vinfo);
1842 ddrs = BB_VINFO_DDRS (bb_vinfo);
1843 if (!VEC_length (ddr_p, ddrs))
1845 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1846 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1849 destroy_bb_vec_info (bb_vinfo);
1853 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1854 &data_dependence_in_bb)
1856 || (data_dependence_in_bb
1857 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1859 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1860 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1861 "in basic block.\n");
1863 destroy_bb_vec_info (bb_vinfo);
1867 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1869 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1870 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1873 destroy_bb_vec_info (bb_vinfo);
1877 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1879 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1880 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1883 destroy_bb_vec_info (bb_vinfo);
1887 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1889 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1890 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1893 destroy_bb_vec_info (bb_vinfo);
1897 /* Check the SLP opportunities in the basic block, analyze and build SLP
1899 if (!vect_analyze_slp (NULL, bb_vinfo))
1901 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1902 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1903 "in basic block.\n");
1905 destroy_bb_vec_info (bb_vinfo);
1909 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1911 /* Mark all the statements that we want to vectorize as pure SLP and
1913 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1915 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1916 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1919 if (!vect_slp_analyze_operations (bb_vinfo))
1921 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1922 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1924 destroy_bb_vec_info (bb_vinfo);
1928 /* Cost model: check if the vectorization is worthwhile. */
1929 if (flag_vect_cost_model
1930 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1932 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1933 fprintf (vect_dump, "not vectorized: vectorization is not "
1936 destroy_bb_vec_info (bb_vinfo);
1940 if (vect_print_dump_info (REPORT_DETAILS))
1941 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1948 vect_slp_analyze_bb (basic_block bb)
1950 bb_vec_info bb_vinfo;
1952 gimple_stmt_iterator gsi;
1953 unsigned int vector_sizes;
1955 if (vect_print_dump_info (REPORT_DETAILS))
1956 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1958 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1960 gimple stmt = gsi_stmt (gsi);
1961 if (!is_gimple_debug (stmt)
1962 && !gimple_nop_p (stmt)
1963 && gimple_code (stmt) != GIMPLE_LABEL)
1967 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1969 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1970 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1976 /* Autodetect first vector size we try. */
1977 current_vector_size = 0;
1978 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1982 bb_vinfo = vect_slp_analyze_bb_1 (bb);
1986 destroy_bb_vec_info (bb_vinfo);
1988 vector_sizes &= ~current_vector_size;
1989 if (vector_sizes == 0
1990 || current_vector_size == 0)
1993 /* Try the next biggest vector size. */
1994 current_vector_size = 1 << floor_log2 (vector_sizes);
1995 if (vect_print_dump_info (REPORT_DETAILS))
1996 fprintf (vect_dump, "***** Re-trying analysis with "
1997 "vector size %d\n", current_vector_size);
2002 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2003 the number of created vector stmts depends on the unrolling factor).
2004 However, the actual number of vector stmts for every SLP node depends on
2005 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2006 should be updated. In this function we assume that the inside costs
2007 calculated in vect_model_xxx_cost are linear in ncopies. */
2010 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2012 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2013 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2014 slp_instance instance;
2016 if (vect_print_dump_info (REPORT_SLP))
2017 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2019 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2020 /* We assume that costs are linear in ncopies. */
2021 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2022 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2026 /* For constant and loop invariant defs of SLP_NODE this function returns
2027 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2028 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2029 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2030 REDUC_INDEX is the index of the reduction operand in the statements, unless
2034 vect_get_constant_vectors (tree op, slp_tree slp_node,
2035 VEC (tree, heap) **vec_oprnds,
2036 unsigned int op_num, unsigned int number_of_vectors,
2039 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2040 gimple stmt = VEC_index (gimple, stmts, 0);
2041 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2045 int j, number_of_places_left_in_vector;
2048 int group_size = VEC_length (gimple, stmts);
2049 unsigned int vec_num, i;
2050 int number_of_copies = 1;
2051 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2052 bool constant_p, is_store;
2053 tree neutral_op = NULL;
2054 enum tree_code code = gimple_assign_rhs_code (stmt);
2058 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2059 && reduc_index != -1)
2061 op_num = reduc_index - 1;
2062 op = gimple_op (stmt, reduc_index);
2063 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2064 we need either neutral operands or the original operands. See
2065 get_initial_def_for_reduction() for details. */
2068 case WIDEN_SUM_EXPR:
2074 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2075 neutral_op = build_real (TREE_TYPE (op), dconst0);
2077 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2082 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2083 neutral_op = build_real (TREE_TYPE (op), dconst1);
2085 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2090 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2095 def_stmt = SSA_NAME_DEF_STMT (op);
2096 loop = (gimple_bb (stmt))->loop_father;
2097 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2098 loop_preheader_edge (loop));
2106 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2109 op = gimple_assign_rhs1 (stmt);
2116 if (CONSTANT_CLASS_P (op))
2121 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2122 gcc_assert (vector_type);
2123 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2125 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2126 created vectors. It is greater than 1 if unrolling is performed.
2128 For example, we have two scalar operands, s1 and s2 (e.g., group of
2129 strided accesses of size two), while NUNITS is four (i.e., four scalars
2130 of this type can be packed in a vector). The output vector will contain
2131 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2134 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2135 containing the operands.
2137 For example, NUNITS is four as before, and the group size is 8
2138 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2139 {s5, s6, s7, s8}. */
2141 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2143 number_of_places_left_in_vector = nunits;
2144 for (j = 0; j < number_of_copies; j++)
2146 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2149 op = gimple_assign_rhs1 (stmt);
2151 op = gimple_op (stmt, op_num + 1);
2153 if (reduc_index != -1)
2155 loop = (gimple_bb (stmt))->loop_father;
2156 def_stmt = SSA_NAME_DEF_STMT (op);
2160 /* Get the def before the loop. In reduction chain we have only
2161 one initial value. */
2162 if ((j != (number_of_copies - 1)
2163 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2168 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2169 loop_preheader_edge (loop));
2172 /* Create 'vect_ = {op0,op1,...,opn}'. */
2173 t = tree_cons (NULL_TREE, op, t);
2175 number_of_places_left_in_vector--;
2177 if (number_of_places_left_in_vector == 0)
2179 number_of_places_left_in_vector = nunits;
2182 vec_cst = build_vector (vector_type, t);
2184 vec_cst = build_constructor_from_list (vector_type, t);
2185 VEC_quick_push (tree, voprnds,
2186 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2192 /* Since the vectors are created in the reverse order, we should invert
2194 vec_num = VEC_length (tree, voprnds);
2195 for (j = vec_num - 1; j >= 0; j--)
2197 vop = VEC_index (tree, voprnds, j);
2198 VEC_quick_push (tree, *vec_oprnds, vop);
2201 VEC_free (tree, heap, voprnds);
2203 /* In case that VF is greater than the unrolling factor needed for the SLP
2204 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2205 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2206 to replicate the vectors. */
2207 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2209 tree neutral_vec = NULL;
2214 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2216 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2220 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2221 VEC_quick_push (tree, *vec_oprnds, vop);
2227 /* Get vectorized definitions from SLP_NODE that contains corresponding
2228 vectorized def-stmts. */
2231 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2234 gimple vec_def_stmt;
2237 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2239 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2241 gcc_assert (vec_def_stmt);
2242 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2243 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2248 /* Get vectorized definitions for SLP_NODE.
2249 If the scalar definitions are loop invariants or constants, collect them and
2250 call vect_get_constant_vectors() to create vector stmts.
2251 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2252 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2253 vect_get_slp_vect_defs() to retrieve them.
2254 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2255 the right node. This is used when the second operand must remain scalar. */
2258 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2259 VEC (tree,heap) **vec_oprnds0,
2260 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2263 enum tree_code code;
2264 int number_of_vects;
2265 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2267 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2268 /* The number of vector defs is determined by the number of vector statements
2269 in the node from which we get those statements. */
2270 if (SLP_TREE_LEFT (slp_node))
2271 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2274 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2275 /* Number of vector stmts was calculated according to LHS in
2276 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2277 necessary. See vect_get_smallest_scalar_type () for details. */
2278 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2280 if (rhs_size_unit != lhs_size_unit)
2282 number_of_vects *= rhs_size_unit;
2283 number_of_vects /= lhs_size_unit;
2287 /* Allocate memory for vectorized defs. */
2288 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2290 /* SLP_NODE corresponds either to a group of stores or to a group of
2291 unary/binary operations. We don't call this function for loads.
2292 For reduction defs we call vect_get_constant_vectors(), since we are
2293 looking for initial loop invariant values. */
2294 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2295 /* The defs are already vectorized. */
2296 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2298 /* Build vectors from scalar defs. */
2299 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
2302 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2303 /* Since we don't call this function with loads, this is a group of
2307 /* For reductions, we only need initial values. */
2308 if (reduc_index != -1)
2311 code = gimple_assign_rhs_code (first_stmt);
2312 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
2315 /* The number of vector defs is determined by the number of vector statements
2316 in the node from which we get those statements. */
2317 if (SLP_TREE_RIGHT (slp_node))
2318 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2320 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2322 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2324 if (SLP_TREE_RIGHT (slp_node))
2325 /* The defs are already vectorized. */
2326 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2328 /* Build vectors from scalar defs. */
2329 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2334 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2335 building a vector of type MASK_TYPE from it) and two input vectors placed in
2336 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2337 shifting by STRIDE elements of DR_CHAIN for every copy.
2338 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2340 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2341 the created stmts must be inserted. */
2344 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2345 tree mask, int first_vec_indx, int second_vec_indx,
2346 gimple_stmt_iterator *gsi, slp_tree node,
2347 tree vectype, VEC(tree,heap) *dr_chain,
2348 int ncopies, int vect_stmts_counter)
2351 gimple perm_stmt = NULL;
2352 stmt_vec_info next_stmt_info;
2354 tree first_vec, second_vec, data_ref;
2356 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2358 /* Initialize the vect stmts of NODE to properly insert the generated
2360 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2361 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2362 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2364 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2365 for (i = 0; i < ncopies; i++)
2367 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2368 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2370 /* Generate the permute statement. */
2371 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2372 first_vec, second_vec, mask);
2373 data_ref = make_ssa_name (perm_dest, perm_stmt);
2374 gimple_set_lhs (perm_stmt, data_ref);
2375 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2377 /* Store the vector statement in NODE. */
2378 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2379 stride * i + vect_stmts_counter, perm_stmt);
2381 first_vec_indx += stride;
2382 second_vec_indx += stride;
2385 /* Mark the scalar stmt as vectorized. */
2386 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2387 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2391 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2392 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2393 representation. Check that the mask is valid and return FALSE if not.
2394 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2395 the next vector, i.e., the current first vector is not needed. */
2398 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2399 int mask_nunits, bool only_one_vec, int index,
2400 int *mask, int *current_mask_element,
2401 bool *need_next_vector, int *number_of_mask_fixes,
2402 bool *mask_fixed, bool *needs_first_vector)
2406 /* Convert to target specific representation. */
2407 *current_mask_element = first_mask_element + m;
2408 /* Adjust the value in case it's a mask for second and third vectors. */
2409 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2411 if (*current_mask_element < mask_nunits)
2412 *needs_first_vector = true;
2414 /* We have only one input vector to permute but the mask accesses values in
2415 the next vector as well. */
2416 if (only_one_vec && *current_mask_element >= mask_nunits)
2418 if (vect_print_dump_info (REPORT_DETAILS))
2420 fprintf (vect_dump, "permutation requires at least two vectors ");
2421 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2427 /* The mask requires the next vector. */
2428 if (*current_mask_element >= mask_nunits * 2)
2430 if (*needs_first_vector || *mask_fixed)
2432 /* We either need the first vector too or have already moved to the
2433 next vector. In both cases, this permutation needs three
2435 if (vect_print_dump_info (REPORT_DETAILS))
2437 fprintf (vect_dump, "permutation requires at "
2438 "least three vectors ");
2439 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2445 /* We move to the next vector, dropping the first one and working with
2446 the second and the third - we need to adjust the values of the mask
2448 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2450 for (i = 0; i < index; i++)
2451 mask[i] -= mask_nunits * *number_of_mask_fixes;
2453 (*number_of_mask_fixes)++;
2457 *need_next_vector = *mask_fixed;
2459 /* This was the last element of this mask. Start a new one. */
2460 if (index == mask_nunits - 1)
2462 *number_of_mask_fixes = 1;
2463 *mask_fixed = false;
2464 *needs_first_vector = false;
2471 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2472 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2473 permute statements for SLP_NODE_INSTANCE. */
2475 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2476 gimple_stmt_iterator *gsi, int vf,
2477 slp_instance slp_node_instance, bool analyze_only)
2479 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2480 tree mask_element_type = NULL_TREE, mask_type;
2481 int i, j, k, nunits, vec_index = 0, scalar_index;
2483 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2484 gimple next_scalar_stmt;
2485 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2486 int first_mask_element;
2487 int index, unroll_factor, *mask, current_mask_element, ncopies;
2488 bool only_one_vec = false, need_next_vector = false;
2489 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2490 int number_of_mask_fixes = 1;
2491 bool mask_fixed = false;
2492 bool needs_first_vector = false;
2494 if (!can_vec_perm_expr_p (vectype, NULL_TREE))
2496 if (vect_print_dump_info (REPORT_DETAILS))
2498 fprintf (vect_dump, "no vect permute for ");
2499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2504 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2505 same size as the vector element being permuted. */
2507 = lang_hooks.types.type_for_size
2508 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2509 mask_type = get_vectype_for_scalar_type (mask_element_type);
2510 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2511 mask = (int *) xmalloc (sizeof (int) * nunits);
2512 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2514 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2515 unrolling factor. */
2516 orig_vec_stmts_num = group_size *
2517 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2518 if (orig_vec_stmts_num == 1)
2519 only_one_vec = true;
2521 /* Number of copies is determined by the final vectorization factor
2522 relatively to SLP_NODE_INSTANCE unrolling factor. */
2523 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2525 /* Generate permutation masks for every NODE. Number of masks for each NODE
2526 is equal to GROUP_SIZE.
2527 E.g., we have a group of three nodes with three loads from the same
2528 location in each node, and the vector size is 4. I.e., we have a
2529 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2530 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2531 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2534 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2535 The last mask is illegal since we assume two operands for permute
2536 operation, and the mask element values can't be outside that range.
2537 Hence, the last mask must be converted into {2,5,5,5}.
2538 For the first two permutations we need the first and the second input
2539 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2540 we need the second and the third vectors: {b1,c1,a2,b2} and
2543 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2547 vect_stmts_counter = 0;
2549 first_vec_index = vec_index++;
2551 second_vec_index = first_vec_index;
2553 second_vec_index = vec_index++;
2555 for (j = 0; j < unroll_factor; j++)
2557 for (k = 0; k < group_size; k++)
2559 first_mask_element = i + j * group_size;
2560 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2561 nunits, only_one_vec, index,
2562 mask, ¤t_mask_element,
2564 &number_of_mask_fixes, &mask_fixed,
2565 &needs_first_vector))
2567 mask[index++] = current_mask_element;
2569 if (index == nunits)
2571 tree mask_vec = NULL;
2573 while (--index >= 0)
2575 tree t = build_int_cst (mask_element_type, mask[index]);
2576 mask_vec = tree_cons (NULL, t, mask_vec);
2578 mask_vec = build_vector (mask_type, mask_vec);
2581 if (!can_vec_perm_expr_p (vectype, mask_vec))
2583 if (vect_print_dump_info (REPORT_DETAILS))
2585 fprintf (vect_dump, "unsupported vect permute ");
2586 print_generic_expr (vect_dump, mask_vec, 0);
2594 if (need_next_vector)
2596 first_vec_index = second_vec_index;
2597 second_vec_index = vec_index;
2600 next_scalar_stmt = VEC_index (gimple,
2601 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2603 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2604 mask_vec, first_vec_index, second_vec_index,
2605 gsi, node, vectype, dr_chain,
2606 ncopies, vect_stmts_counter++);
2619 /* Vectorize SLP instance tree in postorder. */
2622 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2623 unsigned int vectorization_factor)
2626 bool strided_store, is_store;
2627 gimple_stmt_iterator si;
2628 stmt_vec_info stmt_info;
2629 unsigned int vec_stmts_size, nunits, group_size;
2632 slp_tree loads_node;
2637 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2638 vectorization_factor);
2639 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2640 vectorization_factor);
2642 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2643 stmt_info = vinfo_for_stmt (stmt);
2645 /* VECTYPE is the type of the destination. */
2646 vectype = STMT_VINFO_VECTYPE (stmt_info);
2647 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2648 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2650 /* For each SLP instance calculate number of vector stmts to be created
2651 for the scalar stmts in each node of the SLP tree. Number of vector
2652 elements in one vector iteration is the number of scalar elements in
2653 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2655 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2657 /* In case of load permutation we have to allocate vectorized statements for
2658 all the nodes that participate in that permutation. */
2659 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2661 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2663 if (!SLP_TREE_VEC_STMTS (loads_node))
2665 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2667 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2672 if (!SLP_TREE_VEC_STMTS (node))
2674 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2675 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2678 if (vect_print_dump_info (REPORT_DETAILS))
2680 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2681 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2684 /* Loads should be inserted before the first load. */
2685 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2686 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2687 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2688 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2689 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2690 else if (is_pattern_stmt_p (stmt_info))
2691 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2693 si = gsi_for_stmt (stmt);
2695 /* Stores should be inserted just before the last store. */
2696 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2697 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2699 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2700 si = gsi_for_stmt (last_store);
2703 /* Mark the first element of the reduction chain as reduction to properly
2704 transform the node. In the analysis phase only the last element of the
2705 chain is marked as reduction. */
2706 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2707 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2709 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2710 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2713 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2718 /* Generate vector code for all SLP instances in the loop/basic block. */
2721 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2723 VEC (slp_instance, heap) *slp_instances;
2724 slp_instance instance;
2726 bool is_store = false;
2730 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2731 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2735 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2739 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2741 /* Schedule the tree of INSTANCE. */
2742 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2744 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2745 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2746 fprintf (vect_dump, "vectorizing stmts using SLP.");
2749 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2751 slp_tree root = SLP_INSTANCE_TREE (instance);
2754 gimple_stmt_iterator gsi;
2756 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2757 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2759 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2762 /* Free the attached stmt_vec_info and remove the stmt. */
2763 gsi = gsi_for_stmt (store);
2764 gsi_remove (&gsi, true);
2765 free_stmt_vec_info (store);
2773 /* Vectorize the basic block. */
2776 vect_slp_transform_bb (basic_block bb)
2778 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2779 gimple_stmt_iterator si;
2781 gcc_assert (bb_vinfo);
2783 if (vect_print_dump_info (REPORT_DETAILS))
2784 fprintf (vect_dump, "SLPing BB\n");
2786 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2788 gimple stmt = gsi_stmt (si);
2789 stmt_vec_info stmt_info;
2791 if (vect_print_dump_info (REPORT_DETAILS))
2793 fprintf (vect_dump, "------>SLPing statement: ");
2794 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2797 stmt_info = vinfo_for_stmt (stmt);
2798 gcc_assert (stmt_info);
2800 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2801 if (STMT_SLP_TYPE (stmt_info))
2803 vect_schedule_slp (NULL, bb_vinfo);
2808 mark_sym_for_renaming (gimple_vop (cfun));
2809 /* The memory tags and pointers in vectorized statements need to
2810 have their SSA forms updated. FIXME, why can't this be delayed
2811 until all the loops have been transformed? */
2812 update_ssa (TODO_update_ssa);
2814 if (vect_print_dump_info (REPORT_DETAILS))
2815 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2817 destroy_bb_vec_info (bb_vinfo);