1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
46 find_bb_location (basic_block bb)
49 gimple_stmt_iterator si;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
68 vect_free_slp_tree (slp_tree node)
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
88 /* Free the memory allocated for the SLP instance. */
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
117 unsigned int i, number_of_oprnds;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
157 if (!*first_stmt_dt0)
161 if (i == 1 && !*first_stmt_dt1)
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
165 if (vect_print_dump_info (REPORT_DETAILS))
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
179 if (*dt == vect_unknown_def_type)
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
186 switch (gimple_code (def_stmt))
189 def = gimple_phi_result (def_stmt);
193 def = gimple_assign_lhs (def_stmt);
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
203 if (!*first_stmt_dt0)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
208 *first_stmt_def0_type = TREE_TYPE (def);
210 *first_stmt_const_oprnd = oprnd;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
218 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
223 if (!*first_stmt_dt1 && i == 1)
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1 = dt[i];
228 *first_stmt_def1_type = TREE_TYPE (def);
231 /* We assume that the stmt contains only one constant
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd)
235 if (vect_print_dump_info (REPORT_SLP))
236 fprintf (vect_dump, "Build SLP failed: two constant "
240 *first_stmt_const_oprnd = oprnd;
245 /* Not first stmt of the group, check that the def-stmt/s match
246 the def-stmt/s of the first stmt. */
248 && (*first_stmt_dt0 != dt[i]
249 || (*first_stmt_def0_type && def
250 && !types_compatible_p (*first_stmt_def0_type,
253 && (*first_stmt_dt1 != dt[i]
254 || (*first_stmt_def1_type && def
255 && !types_compatible_p (*first_stmt_def1_type,
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
261 if (vect_print_dump_info (REPORT_SLP))
262 fprintf (vect_dump, "Build SLP failed: different types ");
269 /* Check the types of the definitions. */
272 case vect_constant_def:
273 case vect_external_def:
276 case vect_internal_def:
277 case vect_reduction_def:
279 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
281 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
285 /* FORNOW: Not supported. */
286 if (vect_print_dump_info (REPORT_SLP))
288 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump, def, TDF_SLIM);
300 /* Recursively build an SLP tree starting from NODE.
301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
302 permutation or are of unsupported types of operation. Otherwise, return
306 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
307 slp_tree *node, unsigned int group_size,
308 int *inside_cost, int *outside_cost,
309 int ncopies_for_cost, unsigned int *max_nunits,
310 VEC (int, heap) **load_permutation,
311 VEC (slp_tree, heap) **loads,
312 unsigned int vectorization_factor)
314 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
317 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
318 gimple stmt = VEC_index (gimple, stmts, 0);
319 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
320 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
321 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
322 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
324 bool stop_recursion = false, need_same_oprnds = false;
325 tree vectype, scalar_type, first_op1 = NULL_TREE;
326 unsigned int ncopies;
329 enum machine_mode optab_op2_mode;
330 enum machine_mode vec_mode;
331 tree first_stmt_const_oprnd = NULL_TREE;
332 struct data_reference *first_dr;
333 bool pattern0 = false, pattern1 = false;
335 bool permutation = false;
336 unsigned int load_place;
337 gimple first_load, prev_first_load = NULL;
339 /* For every stmt in NODE find its def stmt/s. */
340 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
342 if (vect_print_dump_info (REPORT_SLP))
344 fprintf (vect_dump, "Build SLP for ");
345 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
351 if (vect_print_dump_info (REPORT_SLP))
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
361 lhs = gimple_get_lhs (stmt);
362 if (lhs == NULL_TREE)
364 if (vect_print_dump_info (REPORT_SLP))
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
374 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
375 vectype = get_vectype_for_scalar_type (scalar_type);
378 if (vect_print_dump_info (REPORT_SLP))
380 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
386 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
389 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "SLP with multiple types ");
392 /* FORNOW: multiple types are unsupported in BB SLP. */
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
399 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
401 if (is_gimple_call (stmt))
402 rhs_code = CALL_EXPR;
404 rhs_code = gimple_assign_rhs_code (stmt);
406 /* Check the operation. */
409 first_stmt_code = rhs_code;
411 /* Shift arguments should be equal in all the packed stmts for a
412 vector shift with scalar shift operand. */
413 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
414 || rhs_code == LROTATE_EXPR
415 || rhs_code == RROTATE_EXPR)
417 vec_mode = TYPE_MODE (vectype);
419 /* First see if we have a vector/vector shift. */
420 optab = optab_for_tree_code (rhs_code, vectype,
424 || (optab->handlers[(int) vec_mode].insn_code
425 == CODE_FOR_nothing))
427 /* No vector/vector shift, try for a vector/scalar shift. */
428 optab = optab_for_tree_code (rhs_code, vectype,
433 if (vect_print_dump_info (REPORT_SLP))
434 fprintf (vect_dump, "Build SLP failed: no optab.");
437 icode = (int) optab->handlers[(int) vec_mode].insn_code;
438 if (icode == CODE_FOR_nothing)
440 if (vect_print_dump_info (REPORT_SLP))
441 fprintf (vect_dump, "Build SLP failed: "
442 "op not supported by target.");
445 optab_op2_mode = insn_data[icode].operand[2].mode;
446 if (!VECTOR_MODE_P (optab_op2_mode))
448 need_same_oprnds = true;
449 first_op1 = gimple_assign_rhs2 (stmt);
456 if (first_stmt_code != rhs_code
457 && (first_stmt_code != IMAGPART_EXPR
458 || rhs_code != REALPART_EXPR)
459 && (first_stmt_code != REALPART_EXPR
460 || rhs_code != IMAGPART_EXPR))
462 if (vect_print_dump_info (REPORT_SLP))
465 "Build SLP failed: different operation in stmt ");
466 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
473 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
475 if (vect_print_dump_info (REPORT_SLP))
478 "Build SLP failed: different shift arguments in ");
479 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
486 /* Strided store or load. */
487 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
489 if (REFERENCE_CLASS_P (lhs))
492 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
493 stmt, &def_stmts0, &def_stmts1,
496 &first_stmt_def0_type,
497 &first_stmt_def1_type,
498 &first_stmt_const_oprnd,
500 &pattern0, &pattern1))
506 /* FORNOW: Check that there is no gap between the loads. */
507 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
508 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
509 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
510 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
512 if (vect_print_dump_info (REPORT_SLP))
514 fprintf (vect_dump, "Build SLP failed: strided "
516 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
522 /* Check that the size of interleaved loads group is not
523 greater than the SLP group size. */
524 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
526 if (vect_print_dump_info (REPORT_SLP))
528 fprintf (vect_dump, "Build SLP failed: the number of "
529 "interleaved loads is greater than"
530 " the SLP group size ");
531 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
537 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
540 /* Check that there are no loads from different interleaving
541 chains in the same node. The only exception is complex
543 if (prev_first_load != first_load
544 && rhs_code != REALPART_EXPR
545 && rhs_code != IMAGPART_EXPR)
547 if (vect_print_dump_info (REPORT_SLP))
549 fprintf (vect_dump, "Build SLP failed: different "
550 "interleaving chains in one node ");
551 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
558 prev_first_load = first_load;
560 if (first_load == stmt)
562 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
563 if (vect_supportable_dr_alignment (first_dr)
564 == dr_unaligned_unsupported)
566 if (vect_print_dump_info (REPORT_SLP))
568 fprintf (vect_dump, "Build SLP failed: unsupported "
570 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
576 /* Analyze costs (for the first stmt in the group). */
577 vect_model_load_cost (vinfo_for_stmt (stmt),
578 ncopies_for_cost, *node);
581 /* Store the place of this load in the interleaving chain. In
582 case that permutation is needed we later decide if a specific
583 permutation is supported. */
584 load_place = vect_get_place_in_interleaving_chain (stmt,
589 VEC_safe_push (int, heap, *load_permutation, load_place);
591 /* We stop the tree when we reach a group of loads. */
592 stop_recursion = true;
595 } /* Strided access. */
598 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
600 /* Not strided load. */
601 if (vect_print_dump_info (REPORT_SLP))
603 fprintf (vect_dump, "Build SLP failed: not strided load ");
604 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
607 /* FORNOW: Not strided loads are not supported. */
611 /* Not memory operation. */
612 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
613 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
615 if (vect_print_dump_info (REPORT_SLP))
617 fprintf (vect_dump, "Build SLP failed: operation");
618 fprintf (vect_dump, " unsupported ");
619 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
625 /* Find the def-stmts. */
626 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
627 &def_stmts0, &def_stmts1,
628 &first_stmt_dt0, &first_stmt_dt1,
629 &first_stmt_def0_type,
630 &first_stmt_def1_type,
631 &first_stmt_const_oprnd,
633 &pattern0, &pattern1))
638 /* Add the costs of the node to the overall instance costs. */
639 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
640 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
642 /* Strided loads were reached - stop the recursion. */
647 VEC_safe_push (slp_tree, heap, *loads, *node);
648 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
654 /* Create SLP_TREE nodes for the definition node/s. */
655 if (first_stmt_dt0 == vect_internal_def)
657 slp_tree left_node = XNEW (struct _slp_tree);
658 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
659 SLP_TREE_VEC_STMTS (left_node) = NULL;
660 SLP_TREE_LEFT (left_node) = NULL;
661 SLP_TREE_RIGHT (left_node) = NULL;
662 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
663 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
664 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
665 inside_cost, outside_cost, ncopies_for_cost,
666 max_nunits, load_permutation, loads,
667 vectorization_factor))
670 SLP_TREE_LEFT (*node) = left_node;
673 if (first_stmt_dt1 == vect_internal_def)
675 slp_tree right_node = XNEW (struct _slp_tree);
676 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
677 SLP_TREE_VEC_STMTS (right_node) = NULL;
678 SLP_TREE_LEFT (right_node) = NULL;
679 SLP_TREE_RIGHT (right_node) = NULL;
680 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
681 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
682 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
683 inside_cost, outside_cost, ncopies_for_cost,
684 max_nunits, load_permutation, loads,
685 vectorization_factor))
688 SLP_TREE_RIGHT (*node) = right_node;
696 vect_print_slp_tree (slp_tree node)
704 fprintf (vect_dump, "node ");
705 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
707 fprintf (vect_dump, "\n\tstmt %d ", i);
708 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
710 fprintf (vect_dump, "\n");
712 vect_print_slp_tree (SLP_TREE_LEFT (node));
713 vect_print_slp_tree (SLP_TREE_RIGHT (node));
717 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
718 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
719 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
720 stmts in NODE are to be marked. */
723 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
731 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
733 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
735 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
736 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
740 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
743 vect_mark_slp_stmts_relevant (slp_tree node)
747 stmt_vec_info stmt_info;
752 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
754 stmt_info = vinfo_for_stmt (stmt);
755 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
756 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
757 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
760 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
761 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
765 /* Check if the permutation required by the SLP INSTANCE is supported.
766 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
769 vect_supported_slp_permutation_p (slp_instance instance)
771 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
772 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
773 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
774 VEC (slp_tree, heap) *sorted_loads = NULL;
776 slp_tree *tmp_loads = NULL;
777 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
780 /* FORNOW: The only supported loads permutation is loads from the same
781 location in all the loads in the node, when the data-refs in
782 nodes of LOADS constitute an interleaving chain.
783 Sort the nodes according to the order of accesses in the chain. */
784 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
786 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
787 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
788 i += group_size, j++)
790 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
791 /* Check that the loads are all in the same interleaving chain. */
792 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
794 if (vect_print_dump_info (REPORT_DETAILS))
796 fprintf (vect_dump, "Build SLP failed: unsupported data "
798 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
805 tmp_loads[index] = load;
808 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
809 for (i = 0; i < group_size; i++)
810 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
812 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
813 SLP_INSTANCE_LOADS (instance) = sorted_loads;
816 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
817 SLP_INSTANCE_UNROLLING_FACTOR (instance),
825 /* Rearrange the statements of NODE according to PERMUTATION. */
828 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
829 VEC (int, heap) *permutation)
832 VEC (gimple, heap) *tmp_stmts;
833 unsigned int index, i;
838 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
839 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
841 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
842 tmp_stmts = VEC_alloc (gimple, heap, group_size);
844 for (i = 0; i < group_size; i++)
845 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
847 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
849 index = VEC_index (int, permutation, i);
850 VEC_replace (gimple, tmp_stmts, index, stmt);
853 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
854 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
858 /* Check if the required load permutation is supported.
859 LOAD_PERMUTATION contains a list of indices of the loads.
860 In SLP this permutation is relative to the order of strided stores that are
861 the base of the SLP instance. */
864 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
865 VEC (int, heap) *load_permutation)
867 int i = 0, j, prev = -1, next, k, number_of_groups;
868 bool supported, bad_permutation = false;
873 /* FORNOW: permutations are only supported in SLP. */
877 if (vect_print_dump_info (REPORT_SLP))
879 fprintf (vect_dump, "Load permutation ");
880 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
881 fprintf (vect_dump, "%d ", next);
884 /* In case of reduction every load permutation is allowed, since the order
885 of the reduction statements is not important (as opposed to the case of
886 strided stores). The only condition we need to check is that all the
887 load nodes are of the same size and have the same permutation (and then
888 rearrange all the nodes of the SLP instance according to this
891 /* Check that all the load nodes are of the same size. */
893 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
895 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
896 != (unsigned) group_size)
899 node = SLP_INSTANCE_TREE (slp_instn);
900 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
901 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
902 instance, not all the loads belong to the same node or interleaving
903 group. Hence, we need to divide them into groups according to
905 number_of_groups = VEC_length (int, load_permutation) / group_size;
907 /* Reduction (there are no data-refs in the root). */
908 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
910 int first_group_load_index;
912 /* Compare all the permutation sequences to the first one. */
913 for (i = 1; i < number_of_groups; i++)
916 for (j = i * group_size; j < i * group_size + group_size; j++)
918 next = VEC_index (int, load_permutation, j);
919 first_group_load_index = VEC_index (int, load_permutation, k);
921 if (next != first_group_load_index)
923 bad_permutation = true;
934 if (!bad_permutation)
936 /* This permutaion is valid for reduction. Since the order of the
937 statements in the nodes is not important unless they are memory
938 accesses, we can rearrange the statements in all the nodes
939 according to the order of the loads. */
940 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
942 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
947 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
948 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
949 well (unless it's reduction). */
950 if (VEC_length (int, load_permutation)
951 != (unsigned int) (group_size * group_size))
955 load_index = sbitmap_alloc (group_size);
956 sbitmap_zero (load_index);
957 for (j = 0; j < group_size; j++)
959 for (i = j * group_size, k = 0;
960 VEC_iterate (int, load_permutation, i, next) && k < group_size;
963 if (i != j * group_size && next != prev)
972 if (TEST_BIT (load_index, prev))
978 SET_BIT (load_index, prev);
981 for (j = 0; j < group_size; j++)
982 if (!TEST_BIT (load_index, j))
985 sbitmap_free (load_index);
987 if (supported && i == group_size * group_size
988 && vect_supported_slp_permutation_p (slp_instn))
995 /* Find the first load in the loop that belongs to INSTANCE.
996 When loads are in several SLP nodes, there can be a case in which the first
997 load does not appear in the first SLP node to be transformed, causing
998 incorrect order of statements. Since we generate all the loads together,
999 they must be inserted before the first load of the SLP instance and not
1000 before the first load of the first node of the instance. */
1002 vect_find_first_load_in_slp_instance (slp_instance instance)
1006 gimple first_load = NULL, load;
1009 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
1012 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
1014 first_load = get_earlier_stmt (load, first_load);
1020 /* Analyze an SLP instance starting from a group of strided stores. Call
1021 vect_build_slp_tree to build a tree of packed stmts if possible.
1022 Return FALSE if it's impossible to SLP any stmt in the loop. */
1025 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1028 slp_instance new_instance;
1029 slp_tree node = XNEW (struct _slp_tree);
1030 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1031 unsigned int unrolling_factor = 1, nunits;
1032 tree vectype, scalar_type = NULL_TREE;
1034 unsigned int vectorization_factor = 0;
1035 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1036 unsigned int max_nunits = 0;
1037 VEC (int, heap) *load_permutation;
1038 VEC (slp_tree, heap) *loads;
1039 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1043 scalar_type = TREE_TYPE (DR_REF (dr));
1044 vectype = get_vectype_for_scalar_type (scalar_type);
1045 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1049 gcc_assert (loop_vinfo);
1050 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1051 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1056 if (vect_print_dump_info (REPORT_SLP))
1058 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1059 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1065 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1067 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1069 /* No multitypes in BB SLP. */
1070 vectorization_factor = nunits;
1072 /* Calculate the unrolling factor. */
1073 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1074 if (unrolling_factor != 1 && !loop_vinfo)
1076 if (vect_print_dump_info (REPORT_SLP))
1077 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1083 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1084 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1088 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1091 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1092 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1097 /* Collect reduction statements. */
1098 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1102 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1103 if (vect_print_dump_info (REPORT_DETAILS))
1105 fprintf (vect_dump, "pushing reduction into node: ");
1106 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1111 SLP_TREE_VEC_STMTS (node) = NULL;
1112 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1113 SLP_TREE_LEFT (node) = NULL;
1114 SLP_TREE_RIGHT (node) = NULL;
1115 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1116 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1118 /* Calculate the number of vector stmts to create based on the unrolling
1119 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1120 GROUP_SIZE / NUNITS otherwise. */
1121 ncopies_for_cost = unrolling_factor * group_size / nunits;
1123 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1124 loads = VEC_alloc (slp_tree, heap, group_size);
1126 /* Build the tree for the SLP instance. */
1127 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1128 &inside_cost, &outside_cost, ncopies_for_cost,
1129 &max_nunits, &load_permutation, &loads,
1130 vectorization_factor))
1132 /* Create a new SLP instance. */
1133 new_instance = XNEW (struct _slp_instance);
1134 SLP_INSTANCE_TREE (new_instance) = node;
1135 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1136 /* Calculate the unrolling factor based on the smallest type in the
1138 if (max_nunits > nunits)
1139 unrolling_factor = least_common_multiple (max_nunits, group_size)
1142 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1143 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1144 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1145 SLP_INSTANCE_LOADS (new_instance) = loads;
1146 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1147 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1148 if (VEC_length (slp_tree, loads))
1150 if (!vect_supported_load_permutation_p (new_instance, group_size,
1153 if (vect_print_dump_info (REPORT_SLP))
1155 fprintf (vect_dump, "Build SLP failed: unsupported load "
1157 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1160 vect_free_slp_instance (new_instance);
1164 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1165 = vect_find_first_load_in_slp_instance (new_instance);
1168 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1171 VEC_safe_push (slp_instance, heap,
1172 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1175 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1178 if (vect_print_dump_info (REPORT_SLP))
1179 vect_print_slp_tree (node);
1184 /* Failed to SLP. */
1185 /* Free the allocated memory. */
1186 vect_free_slp_tree (node);
1187 VEC_free (int, heap, load_permutation);
1188 VEC_free (slp_tree, heap, loads);
1194 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1195 trees of packed scalar stmts if SLP is possible. */
1198 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1201 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1205 if (vect_print_dump_info (REPORT_SLP))
1206 fprintf (vect_dump, "=== vect_analyze_slp ===");
1210 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1211 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1214 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1216 /* Find SLP sequences starting from groups of strided stores. */
1217 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1218 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1221 if (bb_vinfo && !ok)
1223 if (vect_print_dump_info (REPORT_SLP))
1224 fprintf (vect_dump, "Failed to SLP the basic block.");
1229 /* Find SLP sequences starting from groups of reductions. */
1230 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1231 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1232 VEC_index (gimple, reductions, 0)))
1239 /* For each possible SLP instance decide whether to SLP it and calculate overall
1240 unrolling factor needed to SLP the loop. */
1243 vect_make_slp_decision (loop_vec_info loop_vinfo)
1245 unsigned int i, unrolling_factor = 1;
1246 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1247 slp_instance instance;
1248 int decided_to_slp = 0;
1250 if (vect_print_dump_info (REPORT_SLP))
1251 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1253 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1255 /* FORNOW: SLP if you can. */
1256 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1257 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1259 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1260 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1261 loop-based vectorization. Such stmts will be marked as HYBRID. */
1262 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1266 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1268 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1269 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1270 decided_to_slp, unrolling_factor);
1274 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1275 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1278 vect_detect_hybrid_slp_stmts (slp_tree node)
1282 imm_use_iterator imm_iter;
1284 stmt_vec_info stmt_vinfo;
1289 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1290 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1291 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1292 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1293 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1294 && !STMT_SLP_TYPE (stmt_vinfo)
1295 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1296 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1297 && !(gimple_code (use_stmt) == GIMPLE_PHI
1298 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1299 == vect_reduction_def))
1300 vect_mark_slp_stmts (node, hybrid, i);
1302 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1303 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1307 /* Find stmts that must be both vectorized and SLPed. */
1310 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1313 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1314 slp_instance instance;
1316 if (vect_print_dump_info (REPORT_SLP))
1317 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1319 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1320 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1324 /* Create and initialize a new bb_vec_info struct for BB, as well as
1325 stmt_vec_info structs for all the stmts in it. */
1328 new_bb_vec_info (basic_block bb)
1330 bb_vec_info res = NULL;
1331 gimple_stmt_iterator gsi;
1333 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1334 BB_VINFO_BB (res) = bb;
1336 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1338 gimple stmt = gsi_stmt (gsi);
1339 gimple_set_uid (stmt, 0);
1340 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1343 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1344 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1351 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1352 stmts in the basic block. */
1355 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1358 gimple_stmt_iterator si;
1363 bb = BB_VINFO_BB (bb_vinfo);
1365 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1367 gimple stmt = gsi_stmt (si);
1368 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1371 /* Free stmt_vec_info. */
1372 free_stmt_vec_info (stmt);
1375 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1376 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1382 /* Analyze statements contained in SLP tree node after recursively analyzing
1383 the subtree. Return TRUE if the operations are supported. */
1386 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1395 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1396 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1399 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1401 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1402 gcc_assert (stmt_info);
1403 gcc_assert (PURE_SLP_STMT (stmt_info));
1405 if (!vect_analyze_stmt (stmt, &dummy, node))
1413 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1414 operations are supported. */
1417 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1419 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1420 slp_instance instance;
1423 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1425 if (!vect_slp_analyze_node_operations (bb_vinfo,
1426 SLP_INSTANCE_TREE (instance)))
1428 vect_free_slp_instance (instance);
1429 VEC_ordered_remove (slp_instance, slp_instances, i);
1435 if (!VEC_length (slp_instance, slp_instances))
1442 /* Cheick if the basic block can be vectorized. */
1445 vect_slp_analyze_bb (basic_block bb)
1447 bb_vec_info bb_vinfo;
1448 VEC (ddr_p, heap) *ddrs;
1449 VEC (slp_instance, heap) *slp_instances;
1450 slp_instance instance;
1452 gimple_stmt_iterator gsi;
1454 int max_vf = MAX_VECTORIZATION_FACTOR;
1456 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1459 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1461 gimple stmt = gsi_stmt (gsi);
1462 if (!is_gimple_debug (stmt)
1463 && !gimple_nop_p (stmt)
1464 && gimple_code (stmt) != GIMPLE_LABEL)
1468 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1470 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1471 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1477 bb_vinfo = new_bb_vec_info (bb);
1481 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1483 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1484 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1487 destroy_bb_vec_info (bb_vinfo);
1491 ddrs = BB_VINFO_DDRS (bb_vinfo);
1492 if (!VEC_length (ddr_p, ddrs))
1494 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1495 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1498 destroy_bb_vec_info (bb_vinfo);
1502 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1505 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1506 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1507 "in basic block.\n");
1509 destroy_bb_vec_info (bb_vinfo);
1513 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1515 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1516 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1519 destroy_bb_vec_info (bb_vinfo);
1523 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1525 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1526 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1529 destroy_bb_vec_info (bb_vinfo);
1533 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1535 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1536 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1539 destroy_bb_vec_info (bb_vinfo);
1543 /* Check the SLP opportunities in the basic block, analyze and build SLP
1545 if (!vect_analyze_slp (NULL, bb_vinfo))
1547 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1548 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1549 "in basic block.\n");
1551 destroy_bb_vec_info (bb_vinfo);
1555 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1557 /* Mark all the statements that we want to vectorize as pure SLP and
1559 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1561 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1562 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1565 if (!vect_slp_analyze_operations (bb_vinfo))
1567 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1568 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1570 destroy_bb_vec_info (bb_vinfo);
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1581 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1582 the number of created vector stmts depends on the unrolling factor). However,
1583 the actual number of vector stmts for every SLP node depends on VF which is
1584 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1585 In this function we assume that the inside costs calculated in
1586 vect_model_xxx_cost are linear in ncopies. */
1589 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1591 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1592 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1593 slp_instance instance;
1595 if (vect_print_dump_info (REPORT_SLP))
1596 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1598 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1599 /* We assume that costs are linear in ncopies. */
1600 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1601 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1605 /* For constant and loop invariant defs of SLP_NODE this function returns
1606 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1607 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1608 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1609 REDUC_INDEX is the index of the reduction operand in the statements, unless
1613 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1614 unsigned int op_num, unsigned int number_of_vectors,
1617 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1618 gimple stmt = VEC_index (gimple, stmts, 0);
1619 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1623 int j, number_of_places_left_in_vector;
1626 int group_size = VEC_length (gimple, stmts);
1627 unsigned int vec_num, i;
1628 int number_of_copies = 1;
1629 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1630 bool constant_p, is_store;
1631 tree neutral_op = NULL;
1633 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1635 enum tree_code code = gimple_assign_rhs_code (stmt);
1636 if (reduc_index == -1)
1638 VEC_free (tree, heap, *vec_oprnds);
1642 op_num = reduc_index - 1;
1643 op = gimple_op (stmt, op_num + 1);
1644 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1645 we need either neutral operands or the original operands. See
1646 get_initial_def_for_reduction() for details. */
1649 case WIDEN_SUM_EXPR:
1655 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1656 neutral_op = build_real (TREE_TYPE (op), dconst0);
1658 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1664 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1665 neutral_op = build_real (TREE_TYPE (op), dconst1);
1667 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1676 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1679 op = gimple_assign_rhs1 (stmt);
1684 op = gimple_op (stmt, op_num + 1);
1687 if (CONSTANT_CLASS_P (op))
1692 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1693 gcc_assert (vector_type);
1695 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1697 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1698 created vectors. It is greater than 1 if unrolling is performed.
1700 For example, we have two scalar operands, s1 and s2 (e.g., group of
1701 strided accesses of size two), while NUNITS is four (i.e., four scalars
1702 of this type can be packed in a vector). The output vector will contain
1703 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1706 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1707 containing the operands.
1709 For example, NUNITS is four as before, and the group size is 8
1710 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1711 {s5, s6, s7, s8}. */
1713 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1715 number_of_places_left_in_vector = nunits;
1716 for (j = 0; j < number_of_copies; j++)
1718 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1721 op = gimple_assign_rhs1 (stmt);
1723 op = gimple_op (stmt, op_num + 1);
1725 if (reduc_index != -1)
1727 struct loop *loop = (gimple_bb (stmt))->loop_father;
1728 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1731 /* Get the def before the loop. */
1732 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1733 loop_preheader_edge (loop));
1734 if (j != (number_of_copies - 1) && neutral_op)
1738 /* Create 'vect_ = {op0,op1,...,opn}'. */
1739 t = tree_cons (NULL_TREE, op, t);
1741 number_of_places_left_in_vector--;
1743 if (number_of_places_left_in_vector == 0)
1745 number_of_places_left_in_vector = nunits;
1748 vec_cst = build_vector (vector_type, t);
1750 vec_cst = build_constructor_from_list (vector_type, t);
1751 VEC_quick_push (tree, voprnds,
1752 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1758 /* Since the vectors are created in the reverse order, we should invert
1760 vec_num = VEC_length (tree, voprnds);
1761 for (j = vec_num - 1; j >= 0; j--)
1763 vop = VEC_index (tree, voprnds, j);
1764 VEC_quick_push (tree, *vec_oprnds, vop);
1767 VEC_free (tree, heap, voprnds);
1769 /* In case that VF is greater than the unrolling factor needed for the SLP
1770 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1771 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1772 to replicate the vectors. */
1773 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1775 tree neutral_vec = NULL;
1782 for (i = 0; i < (unsigned) nunits; i++)
1783 t = tree_cons (NULL_TREE, neutral_op, t);
1784 neutral_vec = build_vector (vector_type, t);
1787 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1791 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1792 VEC_quick_push (tree, *vec_oprnds, vop);
1798 /* Get vectorized definitions from SLP_NODE that contains corresponding
1799 vectorized def-stmts. */
1802 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1805 gimple vec_def_stmt;
1808 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1811 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1814 gcc_assert (vec_def_stmt);
1815 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1816 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1821 /* Get vectorized definitions for SLP_NODE.
1822 If the scalar definitions are loop invariants or constants, collect them and
1823 call vect_get_constant_vectors() to create vector stmts.
1824 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1825 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1826 vect_get_slp_vect_defs() to retrieve them.
1827 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1828 the right node. This is used when the second operand must remain scalar. */
1831 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1832 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1835 enum tree_code code;
1836 int number_of_vects;
1837 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1839 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1840 /* The number of vector defs is determined by the number of vector statements
1841 in the node from which we get those statements. */
1842 if (SLP_TREE_LEFT (slp_node))
1843 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1846 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1847 /* Number of vector stmts was calculated according to LHS in
1848 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1849 necessary. See vect_get_smallest_scalar_type() for details. */
1850 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1852 if (rhs_size_unit != lhs_size_unit)
1854 number_of_vects *= rhs_size_unit;
1855 number_of_vects /= lhs_size_unit;
1859 /* Allocate memory for vectorized defs. */
1860 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1862 /* SLP_NODE corresponds either to a group of stores or to a group of
1863 unary/binary operations. We don't call this function for loads.
1864 For reduction defs we call vect_get_constant_vectors(), since we are
1865 looking for initial loop invariant values. */
1866 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1867 /* The defs are already vectorized. */
1868 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1870 /* Build vectors from scalar defs. */
1871 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1874 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1875 /* Since we don't call this function with loads, this is a group of
1879 /* For reductions, we only need initial values. */
1880 if (reduc_index != -1)
1883 code = gimple_assign_rhs_code (first_stmt);
1884 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1887 /* The number of vector defs is determined by the number of vector statements
1888 in the node from which we get those statements. */
1889 if (SLP_TREE_RIGHT (slp_node))
1890 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1892 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1894 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1896 if (SLP_TREE_RIGHT (slp_node))
1897 /* The defs are already vectorized. */
1898 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1900 /* Build vectors from scalar defs. */
1901 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1905 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1906 building a vector of type MASK_TYPE from it) and two input vectors placed in
1907 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1908 shifting by STRIDE elements of DR_CHAIN for every copy.
1909 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1911 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1912 the created stmts must be inserted. */
1915 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1916 tree mask, int first_vec_indx, int second_vec_indx,
1917 gimple_stmt_iterator *gsi, slp_tree node,
1918 tree builtin_decl, tree vectype,
1919 VEC(tree,heap) *dr_chain,
1920 int ncopies, int vect_stmts_counter)
1923 gimple perm_stmt = NULL;
1924 stmt_vec_info next_stmt_info;
1926 tree first_vec, second_vec, data_ref;
1927 VEC (tree, heap) *params = NULL;
1929 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1931 /* Initialize the vect stmts of NODE to properly insert the generated
1933 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1934 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1935 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1937 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1938 for (i = 0; i < ncopies; i++)
1940 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1941 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1943 /* Build argument list for the vectorized call. */
1944 VEC_free (tree, heap, params);
1945 params = VEC_alloc (tree, heap, 3);
1946 VEC_quick_push (tree, params, first_vec);
1947 VEC_quick_push (tree, params, second_vec);
1948 VEC_quick_push (tree, params, mask);
1950 /* Generate the permute statement. */
1951 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1952 data_ref = make_ssa_name (perm_dest, perm_stmt);
1953 gimple_call_set_lhs (perm_stmt, data_ref);
1954 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1956 /* Store the vector statement in NODE. */
1957 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1958 stride * i + vect_stmts_counter, perm_stmt);
1960 first_vec_indx += stride;
1961 second_vec_indx += stride;
1964 /* Mark the scalar stmt as vectorized. */
1965 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1966 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1970 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1971 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1972 representation. Check that the mask is valid and return FALSE if not.
1973 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1974 the next vector, i.e., the current first vector is not needed. */
1977 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1978 int mask_nunits, bool only_one_vec, int index,
1979 int *mask, int *current_mask_element,
1980 bool *need_next_vector)
1983 static int number_of_mask_fixes = 1;
1984 static bool mask_fixed = false;
1985 static bool needs_first_vector = false;
1987 /* Convert to target specific representation. */
1988 *current_mask_element = first_mask_element + m;
1989 /* Adjust the value in case it's a mask for second and third vectors. */
1990 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1992 if (*current_mask_element < mask_nunits)
1993 needs_first_vector = true;
1995 /* We have only one input vector to permute but the mask accesses values in
1996 the next vector as well. */
1997 if (only_one_vec && *current_mask_element >= mask_nunits)
1999 if (vect_print_dump_info (REPORT_DETAILS))
2001 fprintf (vect_dump, "permutation requires at least two vectors ");
2002 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2008 /* The mask requires the next vector. */
2009 if (*current_mask_element >= mask_nunits * 2)
2011 if (needs_first_vector || mask_fixed)
2013 /* We either need the first vector too or have already moved to the
2014 next vector. In both cases, this permutation needs three
2016 if (vect_print_dump_info (REPORT_DETAILS))
2018 fprintf (vect_dump, "permutation requires at "
2019 "least three vectors ");
2020 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2026 /* We move to the next vector, dropping the first one and working with
2027 the second and the third - we need to adjust the values of the mask
2029 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2031 for (i = 0; i < index; i++)
2032 mask[i] -= mask_nunits * number_of_mask_fixes;
2034 (number_of_mask_fixes)++;
2038 *need_next_vector = mask_fixed;
2040 /* This was the last element of this mask. Start a new one. */
2041 if (index == mask_nunits - 1)
2043 number_of_mask_fixes = 1;
2045 needs_first_vector = false;
2052 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2053 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2054 permute statements for SLP_NODE_INSTANCE. */
2056 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2057 gimple_stmt_iterator *gsi, int vf,
2058 slp_instance slp_node_instance, bool analyze_only)
2060 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2061 tree mask_element_type = NULL_TREE, mask_type;
2062 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2064 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2065 gimple next_scalar_stmt;
2066 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2067 int first_mask_element;
2068 int index, unroll_factor, *mask, current_mask_element, ncopies;
2069 bool only_one_vec = false, need_next_vector = false;
2070 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2072 if (!targetm.vectorize.builtin_vec_perm)
2074 if (vect_print_dump_info (REPORT_DETAILS))
2076 fprintf (vect_dump, "no builtin for vect permute for ");
2077 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2083 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2084 &mask_element_type);
2085 if (!builtin_decl || !mask_element_type)
2087 if (vect_print_dump_info (REPORT_DETAILS))
2089 fprintf (vect_dump, "no builtin for vect permute for ");
2090 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2096 mask_type = get_vectype_for_scalar_type (mask_element_type);
2097 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2098 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2099 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2100 scale = mask_nunits / nunits;
2101 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2103 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2104 unrolling factor. */
2105 orig_vec_stmts_num = group_size *
2106 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2107 if (orig_vec_stmts_num == 1)
2108 only_one_vec = true;
2110 /* Number of copies is determined by the final vectorization factor
2111 relatively to SLP_NODE_INSTANCE unrolling factor. */
2112 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2114 /* Generate permutation masks for every NODE. Number of masks for each NODE
2115 is equal to GROUP_SIZE.
2116 E.g., we have a group of three nodes with three loads from the same
2117 location in each node, and the vector size is 4. I.e., we have a
2118 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2119 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2120 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2123 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2124 scpecific type, e.g., in bytes for Altivec.
2125 The last mask is illegal since we assume two operands for permute
2126 operation, and the mask element values can't be outside that range. Hence,
2127 the last mask must be converted into {2,5,5,5}.
2128 For the first two permutations we need the first and the second input
2129 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2130 we need the second and the third vectors: {b1,c1,a2,b2} and
2134 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2140 vect_stmts_counter = 0;
2142 first_vec_index = vec_index++;
2144 second_vec_index = first_vec_index;
2146 second_vec_index = vec_index++;
2148 for (j = 0; j < unroll_factor; j++)
2150 for (k = 0; k < group_size; k++)
2152 first_mask_element = (i + j * group_size) * scale;
2153 for (m = 0; m < scale; m++)
2155 if (!vect_get_mask_element (stmt, first_mask_element, m,
2156 mask_nunits, only_one_vec, index, mask,
2157 ¤t_mask_element, &need_next_vector))
2160 mask[index++] = current_mask_element;
2163 if (index == mask_nunits)
2165 tree mask_vec = NULL;
2167 while (--index >= 0)
2169 tree t = build_int_cst (mask_element_type, mask[index]);
2170 mask_vec = tree_cons (NULL, t, mask_vec);
2172 mask_vec = build_vector (mask_type, mask_vec);
2175 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2178 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "unsupported vect permute ");
2181 print_generic_expr (vect_dump, mask_vec, 0);
2189 if (need_next_vector)
2191 first_vec_index = second_vec_index;
2192 second_vec_index = vec_index;
2195 next_scalar_stmt = VEC_index (gimple,
2196 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2198 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2199 mask_vec, first_vec_index, second_vec_index,
2200 gsi, node, builtin_decl, vectype, dr_chain,
2201 ncopies, vect_stmts_counter++);
2214 /* Vectorize SLP instance tree in postorder. */
2217 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2218 unsigned int vectorization_factor)
2221 bool strided_store, is_store;
2222 gimple_stmt_iterator si;
2223 stmt_vec_info stmt_info;
2224 unsigned int vec_stmts_size, nunits, group_size;
2227 slp_tree loads_node;
2232 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2233 vectorization_factor);
2234 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2235 vectorization_factor);
2237 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2238 stmt_info = vinfo_for_stmt (stmt);
2240 /* VECTYPE is the type of the destination. */
2241 vectype = STMT_VINFO_VECTYPE (stmt_info);
2242 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2243 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2245 /* For each SLP instance calculate number of vector stmts to be created
2246 for the scalar stmts in each node of the SLP tree. Number of vector
2247 elements in one vector iteration is the number of scalar elements in
2248 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2250 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2252 /* In case of load permutation we have to allocate vectorized statements for
2253 all the nodes that participate in that permutation. */
2254 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2257 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2260 if (!SLP_TREE_VEC_STMTS (loads_node))
2262 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2264 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2269 if (!SLP_TREE_VEC_STMTS (node))
2271 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2272 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2275 if (vect_print_dump_info (REPORT_DETAILS))
2277 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2278 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2281 /* Loads should be inserted before the first load. */
2282 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2283 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2284 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2285 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2287 si = gsi_for_stmt (stmt);
2289 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2295 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2297 VEC (slp_instance, heap) *slp_instances;
2298 slp_instance instance;
2300 bool is_store = false;
2304 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2305 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2309 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2313 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2315 /* Schedule the tree of INSTANCE. */
2316 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2318 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2319 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2320 fprintf (vect_dump, "vectorizing stmts using SLP.");
2323 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2325 slp_tree root = SLP_INSTANCE_TREE (instance);
2328 gimple_stmt_iterator gsi;
2330 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2331 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2333 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2336 /* Free the attached stmt_vec_info and remove the stmt. */
2337 gsi = gsi_for_stmt (store);
2338 gsi_remove (&gsi, true);
2339 free_stmt_vec_info (store);
2347 /* Vectorize the basic block. */
2350 vect_slp_transform_bb (basic_block bb)
2352 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2353 gimple_stmt_iterator si;
2355 gcc_assert (bb_vinfo);
2357 if (vect_print_dump_info (REPORT_DETAILS))
2358 fprintf (vect_dump, "SLPing BB\n");
2360 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2362 gimple stmt = gsi_stmt (si);
2363 stmt_vec_info stmt_info;
2365 if (vect_print_dump_info (REPORT_DETAILS))
2367 fprintf (vect_dump, "------>SLPing statement: ");
2368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2371 stmt_info = vinfo_for_stmt (stmt);
2372 gcc_assert (stmt_info);
2374 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2375 if (STMT_SLP_TYPE (stmt_info))
2377 vect_schedule_slp (NULL, bb_vinfo);
2382 mark_sym_for_renaming (gimple_vop (cfun));
2383 /* The memory tags and pointers in vectorized statements need to
2384 have their SSA forms updated. FIXME, why can't this be delayed
2385 until all the loops have been transformed? */
2386 update_ssa (TODO_update_ssa);
2388 if (vect_print_dump_info (REPORT_DETAILS))
2389 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2391 destroy_bb_vec_info (bb_vinfo);