1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
45 find_bb_location (basic_block bb)
48 gimple_stmt_iterator si;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 vect_free_slp_tree (slp_tree node)
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
87 /* Free the memory allocated for the SLP instance. */
90 vect_free_slp_instance (slp_instance instance)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
116 unsigned int i, number_of_oprnds;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131 for (i = 0; i < number_of_oprnds; i++)
133 oprnd = gimple_op (stmt, i + 1);
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 || (!def_stmt && dt[i] != vect_constant_def))
139 if (vect_print_dump_info (REPORT_SLP))
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 if (!*first_stmt_dt0)
160 if (i == 1 && !*first_stmt_dt1)
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178 if (*dt == vect_unknown_def_type)
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
185 switch (gimple_code (def_stmt))
188 def = gimple_phi_result (def_stmt);
192 def = gimple_assign_lhs (def_stmt);
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
202 if (!*first_stmt_dt0)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
207 *first_stmt_def0_type = TREE_TYPE (def);
209 *first_stmt_const_oprnd = oprnd;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
222 if (!*first_stmt_dt1 && i == 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
227 *first_stmt_def1_type = TREE_TYPE (def);
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
239 *first_stmt_const_oprnd = oprnd;
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && !types_compatible_p (*first_stmt_def0_type,
252 && (*first_stmt_dt1 != dt[i]
253 || (*first_stmt_def1_type && def
254 && !types_compatible_p (*first_stmt_def1_type,
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
260 if (vect_print_dump_info (REPORT_SLP))
261 fprintf (vect_dump, "Build SLP failed: different types ");
268 /* Check the types of the definitions. */
271 case vect_constant_def:
272 case vect_external_def:
275 case vect_internal_def:
276 case vect_reduction_def:
278 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
280 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
284 /* FORNOW: Not supported. */
285 if (vect_print_dump_info (REPORT_SLP))
287 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
288 print_generic_expr (vect_dump, def, TDF_SLIM);
299 /* Recursively build an SLP tree starting from NODE.
300 Fail (and return FALSE) if def-stmts are not isomorphic, require data
301 permutation or are of unsupported types of operation. Otherwise, return
305 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
306 slp_tree *node, unsigned int group_size,
307 int *inside_cost, int *outside_cost,
308 int ncopies_for_cost, unsigned int *max_nunits,
309 VEC (int, heap) **load_permutation,
310 VEC (slp_tree, heap) **loads,
311 unsigned int vectorization_factor)
313 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
314 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
316 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
317 gimple stmt = VEC_index (gimple, stmts, 0);
318 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
319 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
320 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
321 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
323 bool stop_recursion = false, need_same_oprnds = false;
324 tree vectype, scalar_type, first_op1 = NULL_TREE;
325 unsigned int ncopies;
328 enum machine_mode optab_op2_mode;
329 enum machine_mode vec_mode;
330 tree first_stmt_const_oprnd = NULL_TREE;
331 struct data_reference *first_dr;
332 bool pattern0 = false, pattern1 = false;
334 bool permutation = false;
335 unsigned int load_place;
336 gimple first_load, prev_first_load = NULL;
338 /* For every stmt in NODE find its def stmt/s. */
339 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
341 if (vect_print_dump_info (REPORT_SLP))
343 fprintf (vect_dump, "Build SLP for ");
344 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
347 lhs = gimple_get_lhs (stmt);
348 if (lhs == NULL_TREE)
350 if (vect_print_dump_info (REPORT_SLP))
353 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
354 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
360 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
361 vectype = get_vectype_for_scalar_type (scalar_type);
364 if (vect_print_dump_info (REPORT_SLP))
366 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
367 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
372 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
375 if (vect_print_dump_info (REPORT_SLP))
376 fprintf (vect_dump, "SLP with multiple types ");
378 /* FORNOW: multiple types are unsupported in BB SLP. */
383 /* In case of multiple types we need to detect the smallest type. */
384 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
385 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
387 if (is_gimple_call (stmt))
388 rhs_code = CALL_EXPR;
390 rhs_code = gimple_assign_rhs_code (stmt);
392 /* Check the operation. */
395 first_stmt_code = rhs_code;
397 /* Shift arguments should be equal in all the packed stmts for a
398 vector shift with scalar shift operand. */
399 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
400 || rhs_code == LROTATE_EXPR
401 || rhs_code == RROTATE_EXPR)
403 vec_mode = TYPE_MODE (vectype);
405 /* First see if we have a vector/vector shift. */
406 optab = optab_for_tree_code (rhs_code, vectype,
410 || (optab->handlers[(int) vec_mode].insn_code
411 == CODE_FOR_nothing))
413 /* No vector/vector shift, try for a vector/scalar shift. */
414 optab = optab_for_tree_code (rhs_code, vectype,
419 if (vect_print_dump_info (REPORT_SLP))
420 fprintf (vect_dump, "Build SLP failed: no optab.");
423 icode = (int) optab->handlers[(int) vec_mode].insn_code;
424 if (icode == CODE_FOR_nothing)
426 if (vect_print_dump_info (REPORT_SLP))
427 fprintf (vect_dump, "Build SLP failed: "
428 "op not supported by target.");
431 optab_op2_mode = insn_data[icode].operand[2].mode;
432 if (!VECTOR_MODE_P (optab_op2_mode))
434 need_same_oprnds = true;
435 first_op1 = gimple_assign_rhs2 (stmt);
442 if (first_stmt_code != rhs_code
443 && (first_stmt_code != IMAGPART_EXPR
444 || rhs_code != REALPART_EXPR)
445 && (first_stmt_code != REALPART_EXPR
446 || rhs_code != IMAGPART_EXPR))
448 if (vect_print_dump_info (REPORT_SLP))
451 "Build SLP failed: different operation in stmt ");
452 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
459 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
461 if (vect_print_dump_info (REPORT_SLP))
464 "Build SLP failed: different shift arguments in ");
465 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
472 /* Strided store or load. */
473 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
475 if (REFERENCE_CLASS_P (lhs))
478 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
479 stmt, &def_stmts0, &def_stmts1,
482 &first_stmt_def0_type,
483 &first_stmt_def1_type,
484 &first_stmt_const_oprnd,
486 &pattern0, &pattern1))
492 /* FORNOW: Check that there is no gap between the loads. */
493 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
494 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
495 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
496 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
498 if (vect_print_dump_info (REPORT_SLP))
500 fprintf (vect_dump, "Build SLP failed: strided "
502 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
508 /* Check that the size of interleaved loads group is not
509 greater than the SLP group size. */
510 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
512 if (vect_print_dump_info (REPORT_SLP))
514 fprintf (vect_dump, "Build SLP failed: the number of "
515 "interleaved loads is greater than"
516 " the SLP group size ");
517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
526 /* Check that there are no loads from different interleaving
527 chains in the same node. The only exception is complex
529 if (prev_first_load != first_load
530 && rhs_code != REALPART_EXPR
531 && rhs_code != IMAGPART_EXPR)
533 if (vect_print_dump_info (REPORT_SLP))
535 fprintf (vect_dump, "Build SLP failed: different "
536 "interleaving chains in one node ");
537 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
544 prev_first_load = first_load;
546 if (first_load == stmt)
548 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
549 if (vect_supportable_dr_alignment (first_dr)
550 == dr_unaligned_unsupported)
552 if (vect_print_dump_info (REPORT_SLP))
554 fprintf (vect_dump, "Build SLP failed: unsupported "
556 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
562 /* Analyze costs (for the first stmt in the group). */
563 vect_model_load_cost (vinfo_for_stmt (stmt),
564 ncopies_for_cost, *node);
567 /* Store the place of this load in the interleaving chain. In
568 case that permutation is needed we later decide if a specific
569 permutation is supported. */
570 load_place = vect_get_place_in_interleaving_chain (stmt,
575 VEC_safe_push (int, heap, *load_permutation, load_place);
577 /* We stop the tree when we reach a group of loads. */
578 stop_recursion = true;
581 } /* Strided access. */
584 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
586 /* Not strided load. */
587 if (vect_print_dump_info (REPORT_SLP))
589 fprintf (vect_dump, "Build SLP failed: not strided load ");
590 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
593 /* FORNOW: Not strided loads are not supported. */
597 /* Not memory operation. */
598 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
599 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
601 if (vect_print_dump_info (REPORT_SLP))
603 fprintf (vect_dump, "Build SLP failed: operation");
604 fprintf (vect_dump, " unsupported ");
605 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
611 /* Find the def-stmts. */
612 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
613 &def_stmts0, &def_stmts1,
614 &first_stmt_dt0, &first_stmt_dt1,
615 &first_stmt_def0_type,
616 &first_stmt_def1_type,
617 &first_stmt_const_oprnd,
619 &pattern0, &pattern1))
624 /* Add the costs of the node to the overall instance costs. */
625 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
626 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
628 /* Strided loads were reached - stop the recursion. */
633 VEC_safe_push (slp_tree, heap, *loads, *node);
634 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
640 /* Create SLP_TREE nodes for the definition node/s. */
641 if (first_stmt_dt0 == vect_internal_def)
643 slp_tree left_node = XNEW (struct _slp_tree);
644 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
645 SLP_TREE_VEC_STMTS (left_node) = NULL;
646 SLP_TREE_LEFT (left_node) = NULL;
647 SLP_TREE_RIGHT (left_node) = NULL;
648 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
649 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
650 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
651 inside_cost, outside_cost, ncopies_for_cost,
652 max_nunits, load_permutation, loads,
653 vectorization_factor))
656 SLP_TREE_LEFT (*node) = left_node;
659 if (first_stmt_dt1 == vect_internal_def)
661 slp_tree right_node = XNEW (struct _slp_tree);
662 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
663 SLP_TREE_VEC_STMTS (right_node) = NULL;
664 SLP_TREE_LEFT (right_node) = NULL;
665 SLP_TREE_RIGHT (right_node) = NULL;
666 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
667 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
668 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
669 inside_cost, outside_cost, ncopies_for_cost,
670 max_nunits, load_permutation, loads,
671 vectorization_factor))
674 SLP_TREE_RIGHT (*node) = right_node;
682 vect_print_slp_tree (slp_tree node)
690 fprintf (vect_dump, "node ");
691 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
693 fprintf (vect_dump, "\n\tstmt %d ", i);
694 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
696 fprintf (vect_dump, "\n");
698 vect_print_slp_tree (SLP_TREE_LEFT (node));
699 vect_print_slp_tree (SLP_TREE_RIGHT (node));
703 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
704 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
705 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
706 stmts in NODE are to be marked. */
709 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
717 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
719 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
721 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
722 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
726 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
729 vect_mark_slp_stmts_relevant (slp_tree node)
733 stmt_vec_info stmt_info;
738 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
740 stmt_info = vinfo_for_stmt (stmt);
741 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
742 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
743 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
746 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
747 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
751 /* Check if the permutation required by the SLP INSTANCE is supported.
752 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
755 vect_supported_slp_permutation_p (slp_instance instance)
757 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
758 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
759 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
760 VEC (slp_tree, heap) *sorted_loads = NULL;
762 slp_tree *tmp_loads = NULL;
763 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
766 /* FORNOW: The only supported loads permutation is loads from the same
767 location in all the loads in the node, when the data-refs in
768 nodes of LOADS constitute an interleaving chain.
769 Sort the nodes according to the order of accesses in the chain. */
770 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
772 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
773 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
774 i += group_size, j++)
776 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
777 /* Check that the loads are all in the same interleaving chain. */
778 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
780 if (vect_print_dump_info (REPORT_DETAILS))
782 fprintf (vect_dump, "Build SLP failed: unsupported data "
784 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
791 tmp_loads[index] = load;
794 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
795 for (i = 0; i < group_size; i++)
796 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
798 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
799 SLP_INSTANCE_LOADS (instance) = sorted_loads;
802 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
803 SLP_INSTANCE_UNROLLING_FACTOR (instance),
811 /* Rearrange the statements of NODE according to PERMUTATION. */
814 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
815 VEC (int, heap) *permutation)
818 VEC (gimple, heap) *tmp_stmts;
819 unsigned int index, i;
824 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
825 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
827 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
828 tmp_stmts = VEC_alloc (gimple, heap, group_size);
830 for (i = 0; i < group_size; i++)
831 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
833 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
835 index = VEC_index (int, permutation, i);
836 VEC_replace (gimple, tmp_stmts, index, stmt);
839 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
840 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
844 /* Check if the required load permutation is supported.
845 LOAD_PERMUTATION contains a list of indices of the loads.
846 In SLP this permutation is relative to the order of strided stores that are
847 the base of the SLP instance. */
850 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
851 VEC (int, heap) *load_permutation)
853 int i = 0, j, prev = -1, next, k, number_of_groups;
854 bool supported, bad_permutation = false;
859 /* FORNOW: permutations are only supported in SLP. */
863 if (vect_print_dump_info (REPORT_SLP))
865 fprintf (vect_dump, "Load permutation ");
866 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
867 fprintf (vect_dump, "%d ", next);
870 /* In case of reduction every load permutation is allowed, since the order
871 of the reduction statements is not important (as opposed to the case of
872 strided stores). The only condition we need to check is that all the
873 load nodes are of the same size and have the same permutation (and then
874 rearrange all the nodes of the SLP instance according to this
877 /* Check that all the load nodes are of the same size. */
879 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
881 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
882 != (unsigned) group_size)
885 node = SLP_INSTANCE_TREE (slp_instn);
886 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
887 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
888 instance, not all the loads belong to the same node or interleaving
889 group. Hence, we need to divide them into groups according to
891 number_of_groups = VEC_length (int, load_permutation) / group_size;
893 /* Reduction (there are no data-refs in the root). */
894 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
896 int first_group_load_index;
898 /* Compare all the permutation sequences to the first one. */
899 for (i = 1; i < number_of_groups; i++)
902 for (j = i * group_size; j < i * group_size + group_size; j++)
904 next = VEC_index (int, load_permutation, j);
905 first_group_load_index = VEC_index (int, load_permutation, k);
907 if (next != first_group_load_index)
909 bad_permutation = true;
920 if (!bad_permutation)
922 /* This permutaion is valid for reduction. Since the order of the
923 statements in the nodes is not important unless they are memory
924 accesses, we can rearrange the statements in all the nodes
925 according to the order of the loads. */
926 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
928 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
933 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
934 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
935 well (unless it's reduction). */
936 if (VEC_length (int, load_permutation)
937 != (unsigned int) (group_size * group_size))
941 load_index = sbitmap_alloc (group_size);
942 sbitmap_zero (load_index);
943 for (j = 0; j < group_size; j++)
945 for (i = j * group_size, k = 0;
946 VEC_iterate (int, load_permutation, i, next) && k < group_size;
949 if (i != j * group_size && next != prev)
958 if (TEST_BIT (load_index, prev))
964 SET_BIT (load_index, prev);
967 for (j = 0; j < group_size; j++)
968 if (!TEST_BIT (load_index, j))
971 sbitmap_free (load_index);
973 if (supported && i == group_size * group_size
974 && vect_supported_slp_permutation_p (slp_instn))
981 /* Find the first load in the loop that belongs to INSTANCE.
982 When loads are in several SLP nodes, there can be a case in which the first
983 load does not appear in the first SLP node to be transformed, causing
984 incorrect order of statements. Since we generate all the loads together,
985 they must be inserted before the first load of the SLP instance and not
986 before the first load of the first node of the instance. */
988 vect_find_first_load_in_slp_instance (slp_instance instance)
992 gimple first_load = NULL, load;
995 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
998 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
1000 first_load = get_earlier_stmt (load, first_load);
1006 /* Analyze an SLP instance starting from a group of strided stores. Call
1007 vect_build_slp_tree to build a tree of packed stmts if possible.
1008 Return FALSE if it's impossible to SLP any stmt in the loop. */
1011 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1014 slp_instance new_instance;
1015 slp_tree node = XNEW (struct _slp_tree);
1016 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1017 unsigned int unrolling_factor = 1, nunits;
1018 tree vectype, scalar_type = NULL_TREE;
1020 unsigned int vectorization_factor = 0;
1021 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1022 unsigned int max_nunits = 0;
1023 VEC (int, heap) *load_permutation;
1024 VEC (slp_tree, heap) *loads;
1025 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1029 scalar_type = TREE_TYPE (DR_REF (dr));
1030 vectype = get_vectype_for_scalar_type (scalar_type);
1031 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1035 gcc_assert (loop_vinfo);
1036 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1037 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1042 if (vect_print_dump_info (REPORT_SLP))
1044 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1045 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1051 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1053 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1055 /* No multitypes in BB SLP. */
1056 vectorization_factor = nunits;
1058 /* Calculate the unrolling factor. */
1059 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1060 if (unrolling_factor != 1 && !loop_vinfo)
1062 if (vect_print_dump_info (REPORT_SLP))
1063 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1069 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1070 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1074 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1077 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1078 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1083 /* Collect reduction statements. */
1084 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1088 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1089 if (vect_print_dump_info (REPORT_DETAILS))
1091 fprintf (vect_dump, "pushing reduction into node: ");
1092 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1097 SLP_TREE_VEC_STMTS (node) = NULL;
1098 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1099 SLP_TREE_LEFT (node) = NULL;
1100 SLP_TREE_RIGHT (node) = NULL;
1101 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1102 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1104 /* Calculate the number of vector stmts to create based on the unrolling
1105 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1106 GROUP_SIZE / NUNITS otherwise. */
1107 ncopies_for_cost = unrolling_factor * group_size / nunits;
1109 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1110 loads = VEC_alloc (slp_tree, heap, group_size);
1112 /* Build the tree for the SLP instance. */
1113 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1114 &inside_cost, &outside_cost, ncopies_for_cost,
1115 &max_nunits, &load_permutation, &loads,
1116 vectorization_factor))
1118 /* Create a new SLP instance. */
1119 new_instance = XNEW (struct _slp_instance);
1120 SLP_INSTANCE_TREE (new_instance) = node;
1121 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1122 /* Calculate the unrolling factor based on the smallest type in the
1124 if (max_nunits > nunits)
1125 unrolling_factor = least_common_multiple (max_nunits, group_size)
1128 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1129 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1130 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1131 SLP_INSTANCE_LOADS (new_instance) = loads;
1132 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1133 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1134 if (VEC_length (slp_tree, loads))
1136 if (!vect_supported_load_permutation_p (new_instance, group_size,
1139 if (vect_print_dump_info (REPORT_SLP))
1141 fprintf (vect_dump, "Build SLP failed: unsupported load "
1143 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1146 vect_free_slp_instance (new_instance);
1150 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1151 = vect_find_first_load_in_slp_instance (new_instance);
1154 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1157 VEC_safe_push (slp_instance, heap,
1158 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1161 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1164 if (vect_print_dump_info (REPORT_SLP))
1165 vect_print_slp_tree (node);
1170 /* Failed to SLP. */
1171 /* Free the allocated memory. */
1172 vect_free_slp_tree (node);
1173 VEC_free (int, heap, load_permutation);
1174 VEC_free (slp_tree, heap, loads);
1180 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1181 trees of packed scalar stmts if SLP is possible. */
1184 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1187 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1191 if (vect_print_dump_info (REPORT_SLP))
1192 fprintf (vect_dump, "=== vect_analyze_slp ===");
1196 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1197 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1200 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1202 /* Find SLP sequences starting from groups of strided stores. */
1203 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1204 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1207 if (bb_vinfo && !ok)
1209 if (vect_print_dump_info (REPORT_SLP))
1210 fprintf (vect_dump, "Failed to SLP the basic block.");
1215 /* Find SLP sequences starting from groups of reductions. */
1216 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1217 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1218 VEC_index (gimple, reductions, 0)))
1225 /* For each possible SLP instance decide whether to SLP it and calculate overall
1226 unrolling factor needed to SLP the loop. */
1229 vect_make_slp_decision (loop_vec_info loop_vinfo)
1231 unsigned int i, unrolling_factor = 1;
1232 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1233 slp_instance instance;
1234 int decided_to_slp = 0;
1236 if (vect_print_dump_info (REPORT_SLP))
1237 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1239 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1241 /* FORNOW: SLP if you can. */
1242 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1243 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1245 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1246 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1247 loop-based vectorization. Such stmts will be marked as HYBRID. */
1248 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1252 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1254 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1255 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1256 decided_to_slp, unrolling_factor);
1260 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1261 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1264 vect_detect_hybrid_slp_stmts (slp_tree node)
1268 imm_use_iterator imm_iter;
1270 stmt_vec_info stmt_vinfo;
1275 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1276 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1277 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1278 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1279 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1280 && !STMT_SLP_TYPE (stmt_vinfo)
1281 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1282 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1283 && !(gimple_code (use_stmt) == GIMPLE_PHI
1284 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1285 == vect_reduction_def))
1286 vect_mark_slp_stmts (node, hybrid, i);
1288 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1289 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1293 /* Find stmts that must be both vectorized and SLPed. */
1296 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1299 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1300 slp_instance instance;
1302 if (vect_print_dump_info (REPORT_SLP))
1303 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1305 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1306 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1310 /* Create and initialize a new bb_vec_info struct for BB, as well as
1311 stmt_vec_info structs for all the stmts in it. */
1314 new_bb_vec_info (basic_block bb)
1316 bb_vec_info res = NULL;
1317 gimple_stmt_iterator gsi;
1319 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1320 BB_VINFO_BB (res) = bb;
1322 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1324 gimple stmt = gsi_stmt (gsi);
1325 gimple_set_uid (stmt, 0);
1326 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1329 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1330 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1337 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1338 stmts in the basic block. */
1341 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1344 gimple_stmt_iterator si;
1349 bb = BB_VINFO_BB (bb_vinfo);
1351 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1353 gimple stmt = gsi_stmt (si);
1354 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1357 /* Free stmt_vec_info. */
1358 free_stmt_vec_info (stmt);
1361 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1362 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1368 /* Analyze statements contained in SLP tree node after recursively analyzing
1369 the subtree. Return TRUE if the operations are supported. */
1372 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1381 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1382 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1385 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1388 gcc_assert (stmt_info);
1389 gcc_assert (PURE_SLP_STMT (stmt_info));
1391 if (!vect_analyze_stmt (stmt, &dummy, node))
1399 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1400 operations are supported. */
1403 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1405 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1406 slp_instance instance;
1409 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1411 if (!vect_slp_analyze_node_operations (bb_vinfo,
1412 SLP_INSTANCE_TREE (instance)))
1414 vect_free_slp_instance (instance);
1415 VEC_ordered_remove (slp_instance, slp_instances, i);
1421 if (!VEC_length (slp_instance, slp_instances))
1428 /* Cheick if the basic block can be vectorized. */
1431 vect_slp_analyze_bb (basic_block bb)
1433 bb_vec_info bb_vinfo;
1434 VEC (ddr_p, heap) *ddrs;
1435 VEC (slp_instance, heap) *slp_instances;
1436 slp_instance instance;
1438 gimple_stmt_iterator gsi;
1440 int max_vf = MAX_VECTORIZATION_FACTOR;
1442 if (vect_print_dump_info (REPORT_DETAILS))
1443 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1445 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1447 gimple stmt = gsi_stmt (gsi);
1448 if (!is_gimple_debug (stmt)
1449 && !gimple_nop_p (stmt)
1450 && gimple_code (stmt) != GIMPLE_LABEL)
1454 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1456 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1457 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1463 bb_vinfo = new_bb_vec_info (bb);
1467 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1469 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1470 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1473 destroy_bb_vec_info (bb_vinfo);
1477 ddrs = BB_VINFO_DDRS (bb_vinfo);
1478 if (!VEC_length (ddr_p, ddrs))
1480 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1481 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1484 destroy_bb_vec_info (bb_vinfo);
1488 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1491 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1492 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1493 "in basic block.\n");
1495 destroy_bb_vec_info (bb_vinfo);
1499 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1501 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1502 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1505 destroy_bb_vec_info (bb_vinfo);
1509 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1511 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1512 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1515 destroy_bb_vec_info (bb_vinfo);
1519 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1521 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1522 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1525 destroy_bb_vec_info (bb_vinfo);
1529 /* Check the SLP opportunities in the basic block, analyze and build SLP
1531 if (!vect_analyze_slp (NULL, bb_vinfo))
1533 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1534 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1535 "in basic block.\n");
1537 destroy_bb_vec_info (bb_vinfo);
1541 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1543 /* Mark all the statements that we want to vectorize as pure SLP and
1545 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1547 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1548 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1551 if (!vect_slp_analyze_operations (bb_vinfo))
1553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1554 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1556 destroy_bb_vec_info (bb_vinfo);
1560 if (vect_print_dump_info (REPORT_DETAILS))
1561 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1567 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1568 the number of created vector stmts depends on the unrolling factor). However,
1569 the actual number of vector stmts for every SLP node depends on VF which is
1570 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1571 In this function we assume that the inside costs calculated in
1572 vect_model_xxx_cost are linear in ncopies. */
1575 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1577 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1578 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1579 slp_instance instance;
1581 if (vect_print_dump_info (REPORT_SLP))
1582 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1584 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1585 /* We assume that costs are linear in ncopies. */
1586 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1587 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1591 /* For constant and loop invariant defs of SLP_NODE this function returns
1592 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1593 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1594 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1595 REDUC_INDEX is the index of the reduction operand in the statements, unless
1599 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1600 unsigned int op_num, unsigned int number_of_vectors,
1603 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1604 gimple stmt = VEC_index (gimple, stmts, 0);
1605 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1609 int j, number_of_places_left_in_vector;
1612 int group_size = VEC_length (gimple, stmts);
1613 unsigned int vec_num, i;
1614 int number_of_copies = 1;
1615 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1616 bool constant_p, is_store;
1617 tree neutral_op = NULL;
1619 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1621 enum tree_code code = gimple_assign_rhs_code (stmt);
1622 if (reduc_index == -1)
1624 VEC_free (tree, heap, *vec_oprnds);
1628 op_num = reduc_index - 1;
1629 op = gimple_op (stmt, op_num + 1);
1630 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1631 we need either neutral operands or the original operands. See
1632 get_initial_def_for_reduction() for details. */
1635 case WIDEN_SUM_EXPR:
1641 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1642 neutral_op = build_real (TREE_TYPE (op), dconst0);
1644 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1650 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1651 neutral_op = build_real (TREE_TYPE (op), dconst1);
1653 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1662 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1665 op = gimple_assign_rhs1 (stmt);
1670 op = gimple_op (stmt, op_num + 1);
1673 if (CONSTANT_CLASS_P (op))
1678 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1679 gcc_assert (vector_type);
1681 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1683 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1684 created vectors. It is greater than 1 if unrolling is performed.
1686 For example, we have two scalar operands, s1 and s2 (e.g., group of
1687 strided accesses of size two), while NUNITS is four (i.e., four scalars
1688 of this type can be packed in a vector). The output vector will contain
1689 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1692 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1693 containing the operands.
1695 For example, NUNITS is four as before, and the group size is 8
1696 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1697 {s5, s6, s7, s8}. */
1699 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1701 number_of_places_left_in_vector = nunits;
1702 for (j = 0; j < number_of_copies; j++)
1704 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1707 op = gimple_assign_rhs1 (stmt);
1709 op = gimple_op (stmt, op_num + 1);
1711 if (reduc_index != -1)
1713 struct loop *loop = (gimple_bb (stmt))->loop_father;
1714 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1717 /* Get the def before the loop. */
1718 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1719 loop_preheader_edge (loop));
1720 if (j != (number_of_copies - 1) && neutral_op)
1724 /* Create 'vect_ = {op0,op1,...,opn}'. */
1725 t = tree_cons (NULL_TREE, op, t);
1727 number_of_places_left_in_vector--;
1729 if (number_of_places_left_in_vector == 0)
1731 number_of_places_left_in_vector = nunits;
1734 vec_cst = build_vector (vector_type, t);
1736 vec_cst = build_constructor_from_list (vector_type, t);
1737 VEC_quick_push (tree, voprnds,
1738 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1744 /* Since the vectors are created in the reverse order, we should invert
1746 vec_num = VEC_length (tree, voprnds);
1747 for (j = vec_num - 1; j >= 0; j--)
1749 vop = VEC_index (tree, voprnds, j);
1750 VEC_quick_push (tree, *vec_oprnds, vop);
1753 VEC_free (tree, heap, voprnds);
1755 /* In case that VF is greater than the unrolling factor needed for the SLP
1756 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1757 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1758 to replicate the vectors. */
1759 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1761 tree neutral_vec = NULL;
1768 for (i = 0; i < (unsigned) nunits; i++)
1769 t = tree_cons (NULL_TREE, neutral_op, t);
1770 neutral_vec = build_vector (vector_type, t);
1773 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1777 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1778 VEC_quick_push (tree, *vec_oprnds, vop);
1784 /* Get vectorized definitions from SLP_NODE that contains corresponding
1785 vectorized def-stmts. */
1788 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1791 gimple vec_def_stmt;
1794 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1797 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1800 gcc_assert (vec_def_stmt);
1801 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1802 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1807 /* Get vectorized definitions for SLP_NODE.
1808 If the scalar definitions are loop invariants or constants, collect them and
1809 call vect_get_constant_vectors() to create vector stmts.
1810 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1811 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1812 vect_get_slp_vect_defs() to retrieve them.
1813 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1814 the right node. This is used when the second operand must remain scalar. */
1817 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1818 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1821 enum tree_code code;
1822 int number_of_vects;
1823 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1825 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1826 /* The number of vector defs is determined by the number of vector statements
1827 in the node from which we get those statements. */
1828 if (SLP_TREE_LEFT (slp_node))
1829 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1832 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1833 /* Number of vector stmts was calculated according to LHS in
1834 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1835 necessary. See vect_get_smallest_scalar_type() for details. */
1836 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1838 if (rhs_size_unit != lhs_size_unit)
1840 number_of_vects *= rhs_size_unit;
1841 number_of_vects /= lhs_size_unit;
1845 /* Allocate memory for vectorized defs. */
1846 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1848 /* SLP_NODE corresponds either to a group of stores or to a group of
1849 unary/binary operations. We don't call this function for loads.
1850 For reduction defs we call vect_get_constant_vectors(), since we are
1851 looking for initial loop invariant values. */
1852 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1853 /* The defs are already vectorized. */
1854 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1856 /* Build vectors from scalar defs. */
1857 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1860 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1861 /* Since we don't call this function with loads, this is a group of
1865 /* For reductions, we only need initial values. */
1866 if (reduc_index != -1)
1869 code = gimple_assign_rhs_code (first_stmt);
1870 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1873 /* The number of vector defs is determined by the number of vector statements
1874 in the node from which we get those statements. */
1875 if (SLP_TREE_RIGHT (slp_node))
1876 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1878 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1880 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1882 if (SLP_TREE_RIGHT (slp_node))
1883 /* The defs are already vectorized. */
1884 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1886 /* Build vectors from scalar defs. */
1887 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1891 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1892 building a vector of type MASK_TYPE from it) and two input vectors placed in
1893 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1894 shifting by STRIDE elements of DR_CHAIN for every copy.
1895 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1897 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1898 the created stmts must be inserted. */
1901 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1902 tree mask, int first_vec_indx, int second_vec_indx,
1903 gimple_stmt_iterator *gsi, slp_tree node,
1904 tree builtin_decl, tree vectype,
1905 VEC(tree,heap) *dr_chain,
1906 int ncopies, int vect_stmts_counter)
1909 gimple perm_stmt = NULL;
1910 stmt_vec_info next_stmt_info;
1912 tree first_vec, second_vec, data_ref;
1913 VEC (tree, heap) *params = NULL;
1915 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1917 /* Initialize the vect stmts of NODE to properly insert the generated
1919 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1920 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1921 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1923 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1924 for (i = 0; i < ncopies; i++)
1926 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1927 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1929 /* Build argument list for the vectorized call. */
1930 VEC_free (tree, heap, params);
1931 params = VEC_alloc (tree, heap, 3);
1932 VEC_quick_push (tree, params, first_vec);
1933 VEC_quick_push (tree, params, second_vec);
1934 VEC_quick_push (tree, params, mask);
1936 /* Generate the permute statement. */
1937 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1938 data_ref = make_ssa_name (perm_dest, perm_stmt);
1939 gimple_call_set_lhs (perm_stmt, data_ref);
1940 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1942 /* Store the vector statement in NODE. */
1943 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1944 stride * i + vect_stmts_counter, perm_stmt);
1946 first_vec_indx += stride;
1947 second_vec_indx += stride;
1950 /* Mark the scalar stmt as vectorized. */
1951 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1952 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1956 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1957 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1958 representation. Check that the mask is valid and return FALSE if not.
1959 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1960 the next vector, i.e., the current first vector is not needed. */
1963 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1964 int mask_nunits, bool only_one_vec, int index,
1965 int *mask, int *current_mask_element,
1966 bool *need_next_vector)
1969 static int number_of_mask_fixes = 1;
1970 static bool mask_fixed = false;
1971 static bool needs_first_vector = false;
1973 /* Convert to target specific representation. */
1974 *current_mask_element = first_mask_element + m;
1975 /* Adjust the value in case it's a mask for second and third vectors. */
1976 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1978 if (*current_mask_element < mask_nunits)
1979 needs_first_vector = true;
1981 /* We have only one input vector to permute but the mask accesses values in
1982 the next vector as well. */
1983 if (only_one_vec && *current_mask_element >= mask_nunits)
1985 if (vect_print_dump_info (REPORT_DETAILS))
1987 fprintf (vect_dump, "permutation requires at least two vectors ");
1988 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1994 /* The mask requires the next vector. */
1995 if (*current_mask_element >= mask_nunits * 2)
1997 if (needs_first_vector || mask_fixed)
1999 /* We either need the first vector too or have already moved to the
2000 next vector. In both cases, this permutation needs three
2002 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "permutation requires at "
2005 "least three vectors ");
2006 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2012 /* We move to the next vector, dropping the first one and working with
2013 the second and the third - we need to adjust the values of the mask
2015 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2017 for (i = 0; i < index; i++)
2018 mask[i] -= mask_nunits * number_of_mask_fixes;
2020 (number_of_mask_fixes)++;
2024 *need_next_vector = mask_fixed;
2026 /* This was the last element of this mask. Start a new one. */
2027 if (index == mask_nunits - 1)
2029 number_of_mask_fixes = 1;
2031 needs_first_vector = false;
2038 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2039 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2040 permute statements for SLP_NODE_INSTANCE. */
2042 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2043 gimple_stmt_iterator *gsi, int vf,
2044 slp_instance slp_node_instance, bool analyze_only)
2046 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2047 tree mask_element_type = NULL_TREE, mask_type;
2048 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2050 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2051 gimple next_scalar_stmt;
2052 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2053 int first_mask_element;
2054 int index, unroll_factor, *mask, current_mask_element, ncopies;
2055 bool only_one_vec = false, need_next_vector = false;
2056 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2058 if (!targetm.vectorize.builtin_vec_perm)
2060 if (vect_print_dump_info (REPORT_DETAILS))
2062 fprintf (vect_dump, "no builtin for vect permute for ");
2063 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2069 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2070 &mask_element_type);
2071 if (!builtin_decl || !mask_element_type)
2073 if (vect_print_dump_info (REPORT_DETAILS))
2075 fprintf (vect_dump, "no builtin for vect permute for ");
2076 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2082 mask_type = get_vectype_for_scalar_type (mask_element_type);
2083 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2084 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2085 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2086 scale = mask_nunits / nunits;
2087 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2089 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2090 unrolling factor. */
2091 orig_vec_stmts_num = group_size *
2092 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2093 if (orig_vec_stmts_num == 1)
2094 only_one_vec = true;
2096 /* Number of copies is determined by the final vectorization factor
2097 relatively to SLP_NODE_INSTANCE unrolling factor. */
2098 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2100 /* Generate permutation masks for every NODE. Number of masks for each NODE
2101 is equal to GROUP_SIZE.
2102 E.g., we have a group of three nodes with three loads from the same
2103 location in each node, and the vector size is 4. I.e., we have a
2104 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2105 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2106 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2109 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2110 scpecific type, e.g., in bytes for Altivec.
2111 The last mask is illegal since we assume two operands for permute
2112 operation, and the mask element values can't be outside that range. Hence,
2113 the last mask must be converted into {2,5,5,5}.
2114 For the first two permutations we need the first and the second input
2115 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2116 we need the second and the third vectors: {b1,c1,a2,b2} and
2120 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2126 vect_stmts_counter = 0;
2128 first_vec_index = vec_index++;
2130 second_vec_index = first_vec_index;
2132 second_vec_index = vec_index++;
2134 for (j = 0; j < unroll_factor; j++)
2136 for (k = 0; k < group_size; k++)
2138 first_mask_element = (i + j * group_size) * scale;
2139 for (m = 0; m < scale; m++)
2141 if (!vect_get_mask_element (stmt, first_mask_element, m,
2142 mask_nunits, only_one_vec, index, mask,
2143 ¤t_mask_element, &need_next_vector))
2146 mask[index++] = current_mask_element;
2149 if (index == mask_nunits)
2151 tree mask_vec = NULL;
2153 while (--index >= 0)
2155 tree t = build_int_cst (mask_element_type, mask[index]);
2156 mask_vec = tree_cons (NULL, t, mask_vec);
2158 mask_vec = build_vector (mask_type, mask_vec);
2161 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2164 if (vect_print_dump_info (REPORT_DETAILS))
2166 fprintf (vect_dump, "unsupported vect permute ");
2167 print_generic_expr (vect_dump, mask_vec, 0);
2175 if (need_next_vector)
2177 first_vec_index = second_vec_index;
2178 second_vec_index = vec_index;
2181 next_scalar_stmt = VEC_index (gimple,
2182 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2184 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2185 mask_vec, first_vec_index, second_vec_index,
2186 gsi, node, builtin_decl, vectype, dr_chain,
2187 ncopies, vect_stmts_counter++);
2200 /* Vectorize SLP instance tree in postorder. */
2203 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2204 unsigned int vectorization_factor)
2207 bool strided_store, is_store;
2208 gimple_stmt_iterator si;
2209 stmt_vec_info stmt_info;
2210 unsigned int vec_stmts_size, nunits, group_size;
2213 slp_tree loads_node;
2218 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2219 vectorization_factor);
2220 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2221 vectorization_factor);
2223 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2224 stmt_info = vinfo_for_stmt (stmt);
2226 /* VECTYPE is the type of the destination. */
2227 vectype = STMT_VINFO_VECTYPE (stmt_info);
2228 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2229 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2231 /* For each SLP instance calculate number of vector stmts to be created
2232 for the scalar stmts in each node of the SLP tree. Number of vector
2233 elements in one vector iteration is the number of scalar elements in
2234 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2236 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2238 /* In case of load permutation we have to allocate vectorized statements for
2239 all the nodes that participate in that permutation. */
2240 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2243 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2246 if (!SLP_TREE_VEC_STMTS (loads_node))
2248 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2250 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2255 if (!SLP_TREE_VEC_STMTS (node))
2257 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2258 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2261 if (vect_print_dump_info (REPORT_DETAILS))
2263 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2264 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2267 /* Loads should be inserted before the first load. */
2268 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2269 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2270 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2271 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2273 si = gsi_for_stmt (stmt);
2275 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2281 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2283 VEC (slp_instance, heap) *slp_instances;
2284 slp_instance instance;
2286 bool is_store = false;
2290 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2291 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2295 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2299 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2301 /* Schedule the tree of INSTANCE. */
2302 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2304 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2305 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2306 fprintf (vect_dump, "vectorizing stmts using SLP.");
2309 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2311 slp_tree root = SLP_INSTANCE_TREE (instance);
2314 gimple_stmt_iterator gsi;
2316 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2317 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2319 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2322 /* Free the attached stmt_vec_info and remove the stmt. */
2323 gsi = gsi_for_stmt (store);
2324 gsi_remove (&gsi, true);
2325 free_stmt_vec_info (store);
2333 /* Vectorize the basic block. */
2336 vect_slp_transform_bb (basic_block bb)
2338 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2339 gimple_stmt_iterator si;
2341 gcc_assert (bb_vinfo);
2343 if (vect_print_dump_info (REPORT_DETAILS))
2344 fprintf (vect_dump, "SLPing BB\n");
2346 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2348 gimple stmt = gsi_stmt (si);
2349 stmt_vec_info stmt_info;
2351 if (vect_print_dump_info (REPORT_DETAILS))
2353 fprintf (vect_dump, "------>SLPing statement: ");
2354 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2357 stmt_info = vinfo_for_stmt (stmt);
2358 gcc_assert (stmt_info);
2360 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2361 if (STMT_SLP_TYPE (stmt_info))
2363 vect_schedule_slp (NULL, bb_vinfo);
2368 mark_sym_for_renaming (gimple_vop (cfun));
2369 /* The memory tags and pointers in vectorized statements need to
2370 have their SSA forms updated. FIXME, why can't this be delayed
2371 until all the loops have been transformed? */
2372 update_ssa (TODO_update_ssa);
2374 if (vect_print_dump_info (REPORT_DETAILS))
2375 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2377 destroy_bb_vec_info (bb_vinfo);