1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "cfglayout.h"
41 #include "tree-vectorizer.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb)
50 gimple_stmt_iterator si;
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node)
74 if (SLP_TREE_LEFT (node))
75 vect_free_slp_tree (SLP_TREE_LEFT (node));
77 if (SLP_TREE_RIGHT (node))
78 vect_free_slp_tree (SLP_TREE_RIGHT (node));
80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
89 /* Free the memory allocated for the SLP instance. */
92 vect_free_slp_instance (slp_instance instance)
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
100 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
101 they are of a legal type and that they match the defs of the first stmt of
102 the SLP group (stored in FIRST_STMT_...). */
105 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
106 slp_tree slp_node, gimple stmt,
107 VEC (gimple, heap) **def_stmts0,
108 VEC (gimple, heap) **def_stmts1,
109 enum vect_def_type *first_stmt_dt0,
110 enum vect_def_type *first_stmt_dt1,
111 tree *first_stmt_def0_type,
112 tree *first_stmt_def1_type,
113 tree *first_stmt_const_oprnd,
114 int ncopies_for_cost,
115 bool *pattern0, bool *pattern1)
118 unsigned int i, number_of_oprnds;
121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
122 stmt_vec_info stmt_info =
123 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
124 enum gimple_rhs_class rhs_class;
125 struct loop *loop = NULL;
128 loop = LOOP_VINFO_LOOP (loop_vinfo);
130 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
131 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
133 for (i = 0; i < number_of_oprnds; i++)
135 oprnd = gimple_op (stmt, i + 1);
137 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
139 || (!def_stmt && dt[i] != vect_constant_def))
141 if (vect_print_dump_info (REPORT_SLP))
143 fprintf (vect_dump, "Build SLP failed: can't find def for ");
144 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
150 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
151 from the pattern. Check that all the stmts of the node are in the
153 if (loop && def_stmt && gimple_bb (def_stmt)
154 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
155 && vinfo_for_stmt (def_stmt)
156 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
158 if (!*first_stmt_dt0)
162 if (i == 1 && !*first_stmt_dt1)
164 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
166 if (vect_print_dump_info (REPORT_DETAILS))
168 fprintf (vect_dump, "Build SLP failed: some of the stmts"
169 " are in a pattern, and others are not ");
170 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
177 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
178 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
180 if (*dt == vect_unknown_def_type)
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "Unsupported pattern.");
187 switch (gimple_code (def_stmt))
190 def = gimple_phi_result (def_stmt);
194 def = gimple_assign_lhs (def_stmt);
198 if (vect_print_dump_info (REPORT_DETAILS))
199 fprintf (vect_dump, "unsupported defining stmt: ");
204 if (!*first_stmt_dt0)
206 /* op0 of the first stmt of the group - store its info. */
207 *first_stmt_dt0 = dt[i];
209 *first_stmt_def0_type = TREE_TYPE (def);
211 *first_stmt_const_oprnd = oprnd;
213 /* Analyze costs (for the first stmt of the group only). */
214 if (rhs_class != GIMPLE_SINGLE_RHS)
215 /* Not memory operation (we don't call this functions for loads). */
216 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
219 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
224 if (!*first_stmt_dt1 && i == 1)
226 /* op1 of the first stmt of the group - store its info. */
227 *first_stmt_dt1 = dt[i];
229 *first_stmt_def1_type = TREE_TYPE (def);
232 /* We assume that the stmt contains only one constant
233 operand. We fail otherwise, to be on the safe side. */
234 if (*first_stmt_const_oprnd)
236 if (vect_print_dump_info (REPORT_SLP))
237 fprintf (vect_dump, "Build SLP failed: two constant "
241 *first_stmt_const_oprnd = oprnd;
246 /* Not first stmt of the group, check that the def-stmt/s match
247 the def-stmt/s of the first stmt. */
249 && (*first_stmt_dt0 != dt[i]
250 || (*first_stmt_def0_type && def
251 && !types_compatible_p (*first_stmt_def0_type,
254 && (*first_stmt_dt1 != dt[i]
255 || (*first_stmt_def1_type && def
256 && !types_compatible_p (*first_stmt_def1_type,
259 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
262 if (vect_print_dump_info (REPORT_SLP))
263 fprintf (vect_dump, "Build SLP failed: different types ");
270 /* Check the types of the definitions. */
273 case vect_constant_def:
274 case vect_external_def:
277 case vect_internal_def:
278 case vect_reduction_def:
280 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
282 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
286 /* FORNOW: Not supported. */
287 if (vect_print_dump_info (REPORT_SLP))
289 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
290 print_generic_expr (vect_dump, def, TDF_SLIM);
301 /* Recursively build an SLP tree starting from NODE.
302 Fail (and return FALSE) if def-stmts are not isomorphic, require data
303 permutation or are of unsupported types of operation. Otherwise, return
307 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
308 slp_tree *node, unsigned int group_size,
309 int *inside_cost, int *outside_cost,
310 int ncopies_for_cost, unsigned int *max_nunits,
311 VEC (int, heap) **load_permutation,
312 VEC (slp_tree, heap) **loads,
313 unsigned int vectorization_factor)
315 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
316 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
318 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
319 gimple stmt = VEC_index (gimple, stmts, 0);
320 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
321 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
322 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
323 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
325 bool stop_recursion = false, need_same_oprnds = false;
326 tree vectype, scalar_type, first_op1 = NULL_TREE;
327 unsigned int ncopies;
330 enum machine_mode optab_op2_mode;
331 enum machine_mode vec_mode;
332 tree first_stmt_const_oprnd = NULL_TREE;
333 struct data_reference *first_dr;
334 bool pattern0 = false, pattern1 = false;
336 bool permutation = false;
337 unsigned int load_place;
338 gimple first_load, prev_first_load = NULL;
340 /* For every stmt in NODE find its def stmt/s. */
341 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
343 if (vect_print_dump_info (REPORT_SLP))
345 fprintf (vect_dump, "Build SLP for ");
346 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
349 /* Fail to vectorize statements marked as unvectorizable. */
350 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
352 if (vect_print_dump_info (REPORT_SLP))
355 "Build SLP failed: unvectorizable statement ");
356 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
362 lhs = gimple_get_lhs (stmt);
363 if (lhs == NULL_TREE)
365 if (vect_print_dump_info (REPORT_SLP))
368 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
369 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
375 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
376 vectype = get_vectype_for_scalar_type (scalar_type);
379 if (vect_print_dump_info (REPORT_SLP))
381 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
382 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
387 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
390 if (vect_print_dump_info (REPORT_SLP))
391 fprintf (vect_dump, "SLP with multiple types ");
393 /* FORNOW: multiple types are unsupported in BB SLP. */
398 /* In case of multiple types we need to detect the smallest type. */
399 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
400 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
402 if (is_gimple_call (stmt))
403 rhs_code = CALL_EXPR;
405 rhs_code = gimple_assign_rhs_code (stmt);
407 /* Check the operation. */
410 first_stmt_code = rhs_code;
412 /* Shift arguments should be equal in all the packed stmts for a
413 vector shift with scalar shift operand. */
414 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
415 || rhs_code == LROTATE_EXPR
416 || rhs_code == RROTATE_EXPR)
418 vec_mode = TYPE_MODE (vectype);
420 /* First see if we have a vector/vector shift. */
421 optab = optab_for_tree_code (rhs_code, vectype,
425 || (optab->handlers[(int) vec_mode].insn_code
426 == CODE_FOR_nothing))
428 /* No vector/vector shift, try for a vector/scalar shift. */
429 optab = optab_for_tree_code (rhs_code, vectype,
434 if (vect_print_dump_info (REPORT_SLP))
435 fprintf (vect_dump, "Build SLP failed: no optab.");
438 icode = (int) optab->handlers[(int) vec_mode].insn_code;
439 if (icode == CODE_FOR_nothing)
441 if (vect_print_dump_info (REPORT_SLP))
442 fprintf (vect_dump, "Build SLP failed: "
443 "op not supported by target.");
446 optab_op2_mode = insn_data[icode].operand[2].mode;
447 if (!VECTOR_MODE_P (optab_op2_mode))
449 need_same_oprnds = true;
450 first_op1 = gimple_assign_rhs2 (stmt);
457 if (first_stmt_code != rhs_code
458 && (first_stmt_code != IMAGPART_EXPR
459 || rhs_code != REALPART_EXPR)
460 && (first_stmt_code != REALPART_EXPR
461 || rhs_code != IMAGPART_EXPR))
463 if (vect_print_dump_info (REPORT_SLP))
466 "Build SLP failed: different operation in stmt ");
467 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
474 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
476 if (vect_print_dump_info (REPORT_SLP))
479 "Build SLP failed: different shift arguments in ");
480 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
487 /* Strided store or load. */
488 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
490 if (REFERENCE_CLASS_P (lhs))
493 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
494 stmt, &def_stmts0, &def_stmts1,
497 &first_stmt_def0_type,
498 &first_stmt_def1_type,
499 &first_stmt_const_oprnd,
501 &pattern0, &pattern1))
507 /* FORNOW: Check that there is no gap between the loads. */
508 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
509 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
510 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
511 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
513 if (vect_print_dump_info (REPORT_SLP))
515 fprintf (vect_dump, "Build SLP failed: strided "
517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 /* Check that the size of interleaved loads group is not
524 greater than the SLP group size. */
525 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
527 if (vect_print_dump_info (REPORT_SLP))
529 fprintf (vect_dump, "Build SLP failed: the number of "
530 "interleaved loads is greater than"
531 " the SLP group size ");
532 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
538 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
541 /* Check that there are no loads from different interleaving
542 chains in the same node. The only exception is complex
544 if (prev_first_load != first_load
545 && rhs_code != REALPART_EXPR
546 && rhs_code != IMAGPART_EXPR)
548 if (vect_print_dump_info (REPORT_SLP))
550 fprintf (vect_dump, "Build SLP failed: different "
551 "interleaving chains in one node ");
552 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
559 prev_first_load = first_load;
561 if (first_load == stmt)
563 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
564 if (vect_supportable_dr_alignment (first_dr)
565 == dr_unaligned_unsupported)
567 if (vect_print_dump_info (REPORT_SLP))
569 fprintf (vect_dump, "Build SLP failed: unsupported "
571 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
577 /* Analyze costs (for the first stmt in the group). */
578 vect_model_load_cost (vinfo_for_stmt (stmt),
579 ncopies_for_cost, *node);
582 /* Store the place of this load in the interleaving chain. In
583 case that permutation is needed we later decide if a specific
584 permutation is supported. */
585 load_place = vect_get_place_in_interleaving_chain (stmt,
590 VEC_safe_push (int, heap, *load_permutation, load_place);
592 /* We stop the tree when we reach a group of loads. */
593 stop_recursion = true;
596 } /* Strided access. */
599 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
601 /* Not strided load. */
602 if (vect_print_dump_info (REPORT_SLP))
604 fprintf (vect_dump, "Build SLP failed: not strided load ");
605 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
608 /* FORNOW: Not strided loads are not supported. */
612 /* Not memory operation. */
613 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
614 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
616 if (vect_print_dump_info (REPORT_SLP))
618 fprintf (vect_dump, "Build SLP failed: operation");
619 fprintf (vect_dump, " unsupported ");
620 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
626 /* Find the def-stmts. */
627 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
628 &def_stmts0, &def_stmts1,
629 &first_stmt_dt0, &first_stmt_dt1,
630 &first_stmt_def0_type,
631 &first_stmt_def1_type,
632 &first_stmt_const_oprnd,
634 &pattern0, &pattern1))
639 /* Add the costs of the node to the overall instance costs. */
640 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
641 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
643 /* Strided loads were reached - stop the recursion. */
648 VEC_safe_push (slp_tree, heap, *loads, *node);
649 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
655 /* Create SLP_TREE nodes for the definition node/s. */
656 if (first_stmt_dt0 == vect_internal_def)
658 slp_tree left_node = XNEW (struct _slp_tree);
659 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
660 SLP_TREE_VEC_STMTS (left_node) = NULL;
661 SLP_TREE_LEFT (left_node) = NULL;
662 SLP_TREE_RIGHT (left_node) = NULL;
663 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
664 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
665 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
666 inside_cost, outside_cost, ncopies_for_cost,
667 max_nunits, load_permutation, loads,
668 vectorization_factor))
671 SLP_TREE_LEFT (*node) = left_node;
674 if (first_stmt_dt1 == vect_internal_def)
676 slp_tree right_node = XNEW (struct _slp_tree);
677 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
678 SLP_TREE_VEC_STMTS (right_node) = NULL;
679 SLP_TREE_LEFT (right_node) = NULL;
680 SLP_TREE_RIGHT (right_node) = NULL;
681 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
682 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
683 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
684 inside_cost, outside_cost, ncopies_for_cost,
685 max_nunits, load_permutation, loads,
686 vectorization_factor))
689 SLP_TREE_RIGHT (*node) = right_node;
697 vect_print_slp_tree (slp_tree node)
705 fprintf (vect_dump, "node ");
706 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
708 fprintf (vect_dump, "\n\tstmt %d ", i);
709 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
711 fprintf (vect_dump, "\n");
713 vect_print_slp_tree (SLP_TREE_LEFT (node));
714 vect_print_slp_tree (SLP_TREE_RIGHT (node));
718 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
719 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
720 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
721 stmts in NODE are to be marked. */
724 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
732 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
734 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
736 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
737 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
741 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
744 vect_mark_slp_stmts_relevant (slp_tree node)
748 stmt_vec_info stmt_info;
753 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
755 stmt_info = vinfo_for_stmt (stmt);
756 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
757 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
758 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
761 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
762 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
766 /* Check if the permutation required by the SLP INSTANCE is supported.
767 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
770 vect_supported_slp_permutation_p (slp_instance instance)
772 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
773 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
774 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
775 VEC (slp_tree, heap) *sorted_loads = NULL;
777 slp_tree *tmp_loads = NULL;
778 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
781 /* FORNOW: The only supported loads permutation is loads from the same
782 location in all the loads in the node, when the data-refs in
783 nodes of LOADS constitute an interleaving chain.
784 Sort the nodes according to the order of accesses in the chain. */
785 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
787 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
788 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
789 i += group_size, j++)
791 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
792 /* Check that the loads are all in the same interleaving chain. */
793 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
795 if (vect_print_dump_info (REPORT_DETAILS))
797 fprintf (vect_dump, "Build SLP failed: unsupported data "
799 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
806 tmp_loads[index] = load;
809 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
810 for (i = 0; i < group_size; i++)
811 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
813 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
814 SLP_INSTANCE_LOADS (instance) = sorted_loads;
817 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
818 SLP_INSTANCE_UNROLLING_FACTOR (instance),
826 /* Rearrange the statements of NODE according to PERMUTATION. */
829 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
830 VEC (int, heap) *permutation)
833 VEC (gimple, heap) *tmp_stmts;
834 unsigned int index, i;
839 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
840 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
842 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
843 tmp_stmts = VEC_alloc (gimple, heap, group_size);
845 for (i = 0; i < group_size; i++)
846 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
848 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
850 index = VEC_index (int, permutation, i);
851 VEC_replace (gimple, tmp_stmts, index, stmt);
854 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
855 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
859 /* Check if the required load permutation is supported.
860 LOAD_PERMUTATION contains a list of indices of the loads.
861 In SLP this permutation is relative to the order of strided stores that are
862 the base of the SLP instance. */
865 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
866 VEC (int, heap) *load_permutation)
868 int i = 0, j, prev = -1, next, k, number_of_groups;
869 bool supported, bad_permutation = false;
874 /* FORNOW: permutations are only supported in SLP. */
878 if (vect_print_dump_info (REPORT_SLP))
880 fprintf (vect_dump, "Load permutation ");
881 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
882 fprintf (vect_dump, "%d ", next);
885 /* In case of reduction every load permutation is allowed, since the order
886 of the reduction statements is not important (as opposed to the case of
887 strided stores). The only condition we need to check is that all the
888 load nodes are of the same size and have the same permutation (and then
889 rearrange all the nodes of the SLP instance according to this
892 /* Check that all the load nodes are of the same size. */
894 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
896 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
897 != (unsigned) group_size)
900 node = SLP_INSTANCE_TREE (slp_instn);
901 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
902 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
903 instance, not all the loads belong to the same node or interleaving
904 group. Hence, we need to divide them into groups according to
906 number_of_groups = VEC_length (int, load_permutation) / group_size;
908 /* Reduction (there are no data-refs in the root). */
909 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
911 int first_group_load_index;
913 /* Compare all the permutation sequences to the first one. */
914 for (i = 1; i < number_of_groups; i++)
917 for (j = i * group_size; j < i * group_size + group_size; j++)
919 next = VEC_index (int, load_permutation, j);
920 first_group_load_index = VEC_index (int, load_permutation, k);
922 if (next != first_group_load_index)
924 bad_permutation = true;
935 if (!bad_permutation)
937 /* This permutaion is valid for reduction. Since the order of the
938 statements in the nodes is not important unless they are memory
939 accesses, we can rearrange the statements in all the nodes
940 according to the order of the loads. */
941 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
943 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
948 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
949 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
950 well (unless it's reduction). */
951 if (VEC_length (int, load_permutation)
952 != (unsigned int) (group_size * group_size))
956 load_index = sbitmap_alloc (group_size);
957 sbitmap_zero (load_index);
958 for (j = 0; j < group_size; j++)
960 for (i = j * group_size, k = 0;
961 VEC_iterate (int, load_permutation, i, next) && k < group_size;
964 if (i != j * group_size && next != prev)
973 if (TEST_BIT (load_index, prev))
979 SET_BIT (load_index, prev);
982 for (j = 0; j < group_size; j++)
983 if (!TEST_BIT (load_index, j))
986 sbitmap_free (load_index);
988 if (supported && i == group_size * group_size
989 && vect_supported_slp_permutation_p (slp_instn))
996 /* Find the first load in the loop that belongs to INSTANCE.
997 When loads are in several SLP nodes, there can be a case in which the first
998 load does not appear in the first SLP node to be transformed, causing
999 incorrect order of statements. Since we generate all the loads together,
1000 they must be inserted before the first load of the SLP instance and not
1001 before the first load of the first node of the instance. */
1003 vect_find_first_load_in_slp_instance (slp_instance instance)
1007 gimple first_load = NULL, load;
1010 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
1013 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
1015 first_load = get_earlier_stmt (load, first_load);
1021 /* Analyze an SLP instance starting from a group of strided stores. Call
1022 vect_build_slp_tree to build a tree of packed stmts if possible.
1023 Return FALSE if it's impossible to SLP any stmt in the loop. */
1026 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1029 slp_instance new_instance;
1030 slp_tree node = XNEW (struct _slp_tree);
1031 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1032 unsigned int unrolling_factor = 1, nunits;
1033 tree vectype, scalar_type = NULL_TREE;
1035 unsigned int vectorization_factor = 0;
1036 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1037 unsigned int max_nunits = 0;
1038 VEC (int, heap) *load_permutation;
1039 VEC (slp_tree, heap) *loads;
1040 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1044 scalar_type = TREE_TYPE (DR_REF (dr));
1045 vectype = get_vectype_for_scalar_type (scalar_type);
1046 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1050 gcc_assert (loop_vinfo);
1051 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1052 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1057 if (vect_print_dump_info (REPORT_SLP))
1059 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1060 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1066 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1068 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1070 /* No multitypes in BB SLP. */
1071 vectorization_factor = nunits;
1073 /* Calculate the unrolling factor. */
1074 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1075 if (unrolling_factor != 1 && !loop_vinfo)
1077 if (vect_print_dump_info (REPORT_SLP))
1078 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1084 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1085 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1089 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1092 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1093 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1098 /* Collect reduction statements. */
1099 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1103 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1104 if (vect_print_dump_info (REPORT_DETAILS))
1106 fprintf (vect_dump, "pushing reduction into node: ");
1107 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1112 SLP_TREE_VEC_STMTS (node) = NULL;
1113 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1114 SLP_TREE_LEFT (node) = NULL;
1115 SLP_TREE_RIGHT (node) = NULL;
1116 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1117 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1119 /* Calculate the number of vector stmts to create based on the unrolling
1120 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1121 GROUP_SIZE / NUNITS otherwise. */
1122 ncopies_for_cost = unrolling_factor * group_size / nunits;
1124 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1125 loads = VEC_alloc (slp_tree, heap, group_size);
1127 /* Build the tree for the SLP instance. */
1128 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1129 &inside_cost, &outside_cost, ncopies_for_cost,
1130 &max_nunits, &load_permutation, &loads,
1131 vectorization_factor))
1133 /* Create a new SLP instance. */
1134 new_instance = XNEW (struct _slp_instance);
1135 SLP_INSTANCE_TREE (new_instance) = node;
1136 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1137 /* Calculate the unrolling factor based on the smallest type in the
1139 if (max_nunits > nunits)
1140 unrolling_factor = least_common_multiple (max_nunits, group_size)
1143 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1144 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1145 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1146 SLP_INSTANCE_LOADS (new_instance) = loads;
1147 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1148 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1149 if (VEC_length (slp_tree, loads))
1151 if (!vect_supported_load_permutation_p (new_instance, group_size,
1154 if (vect_print_dump_info (REPORT_SLP))
1156 fprintf (vect_dump, "Build SLP failed: unsupported load "
1158 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1161 vect_free_slp_instance (new_instance);
1165 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1166 = vect_find_first_load_in_slp_instance (new_instance);
1169 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1172 VEC_safe_push (slp_instance, heap,
1173 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1176 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1179 if (vect_print_dump_info (REPORT_SLP))
1180 vect_print_slp_tree (node);
1185 /* Failed to SLP. */
1186 /* Free the allocated memory. */
1187 vect_free_slp_tree (node);
1188 VEC_free (int, heap, load_permutation);
1189 VEC_free (slp_tree, heap, loads);
1195 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1196 trees of packed scalar stmts if SLP is possible. */
1199 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1202 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1206 if (vect_print_dump_info (REPORT_SLP))
1207 fprintf (vect_dump, "=== vect_analyze_slp ===");
1211 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1212 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1215 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1217 /* Find SLP sequences starting from groups of strided stores. */
1218 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1219 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1222 if (bb_vinfo && !ok)
1224 if (vect_print_dump_info (REPORT_SLP))
1225 fprintf (vect_dump, "Failed to SLP the basic block.");
1230 /* Find SLP sequences starting from groups of reductions. */
1231 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1232 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1233 VEC_index (gimple, reductions, 0)))
1240 /* For each possible SLP instance decide whether to SLP it and calculate overall
1241 unrolling factor needed to SLP the loop. */
1244 vect_make_slp_decision (loop_vec_info loop_vinfo)
1246 unsigned int i, unrolling_factor = 1;
1247 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1248 slp_instance instance;
1249 int decided_to_slp = 0;
1251 if (vect_print_dump_info (REPORT_SLP))
1252 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1254 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1256 /* FORNOW: SLP if you can. */
1257 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1258 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1260 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1261 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1262 loop-based vectorization. Such stmts will be marked as HYBRID. */
1263 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1267 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1269 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1270 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1271 decided_to_slp, unrolling_factor);
1275 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1276 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1279 vect_detect_hybrid_slp_stmts (slp_tree node)
1283 imm_use_iterator imm_iter;
1285 stmt_vec_info stmt_vinfo;
1290 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1291 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1292 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1293 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1294 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1295 && !STMT_SLP_TYPE (stmt_vinfo)
1296 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1297 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1298 && !(gimple_code (use_stmt) == GIMPLE_PHI
1299 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1300 == vect_reduction_def))
1301 vect_mark_slp_stmts (node, hybrid, i);
1303 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1304 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1308 /* Find stmts that must be both vectorized and SLPed. */
1311 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1314 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1315 slp_instance instance;
1317 if (vect_print_dump_info (REPORT_SLP))
1318 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1320 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1321 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1325 /* Create and initialize a new bb_vec_info struct for BB, as well as
1326 stmt_vec_info structs for all the stmts in it. */
1329 new_bb_vec_info (basic_block bb)
1331 bb_vec_info res = NULL;
1332 gimple_stmt_iterator gsi;
1334 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1335 BB_VINFO_BB (res) = bb;
1337 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1339 gimple stmt = gsi_stmt (gsi);
1340 gimple_set_uid (stmt, 0);
1341 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1344 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1345 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1352 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1353 stmts in the basic block. */
1356 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1359 gimple_stmt_iterator si;
1364 bb = BB_VINFO_BB (bb_vinfo);
1366 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1368 gimple stmt = gsi_stmt (si);
1369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1372 /* Free stmt_vec_info. */
1373 free_stmt_vec_info (stmt);
1376 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1377 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1383 /* Analyze statements contained in SLP tree node after recursively analyzing
1384 the subtree. Return TRUE if the operations are supported. */
1387 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1396 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1397 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1400 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1402 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1403 gcc_assert (stmt_info);
1404 gcc_assert (PURE_SLP_STMT (stmt_info));
1406 if (!vect_analyze_stmt (stmt, &dummy, node))
1414 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1415 operations are supported. */
1418 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1420 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1421 slp_instance instance;
1424 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1426 if (!vect_slp_analyze_node_operations (bb_vinfo,
1427 SLP_INSTANCE_TREE (instance)))
1429 vect_free_slp_instance (instance);
1430 VEC_ordered_remove (slp_instance, slp_instances, i);
1436 if (!VEC_length (slp_instance, slp_instances))
1443 /* Cheick if the basic block can be vectorized. */
1446 vect_slp_analyze_bb (basic_block bb)
1448 bb_vec_info bb_vinfo;
1449 VEC (ddr_p, heap) *ddrs;
1450 VEC (slp_instance, heap) *slp_instances;
1451 slp_instance instance;
1453 gimple_stmt_iterator gsi;
1455 int max_vf = MAX_VECTORIZATION_FACTOR;
1457 if (vect_print_dump_info (REPORT_DETAILS))
1458 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1460 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1462 gimple stmt = gsi_stmt (gsi);
1463 if (!is_gimple_debug (stmt)
1464 && !gimple_nop_p (stmt)
1465 && gimple_code (stmt) != GIMPLE_LABEL)
1469 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1471 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1472 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1478 bb_vinfo = new_bb_vec_info (bb);
1482 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1484 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1485 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1488 destroy_bb_vec_info (bb_vinfo);
1492 ddrs = BB_VINFO_DDRS (bb_vinfo);
1493 if (!VEC_length (ddr_p, ddrs))
1495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1496 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1499 destroy_bb_vec_info (bb_vinfo);
1503 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1506 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1507 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1508 "in basic block.\n");
1510 destroy_bb_vec_info (bb_vinfo);
1514 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1516 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1517 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1520 destroy_bb_vec_info (bb_vinfo);
1524 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1526 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1527 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1530 destroy_bb_vec_info (bb_vinfo);
1534 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1536 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1537 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1540 destroy_bb_vec_info (bb_vinfo);
1544 /* Check the SLP opportunities in the basic block, analyze and build SLP
1546 if (!vect_analyze_slp (NULL, bb_vinfo))
1548 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1549 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1550 "in basic block.\n");
1552 destroy_bb_vec_info (bb_vinfo);
1556 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1558 /* Mark all the statements that we want to vectorize as pure SLP and
1560 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1562 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1563 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1566 if (!vect_slp_analyze_operations (bb_vinfo))
1568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1569 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1571 destroy_bb_vec_info (bb_vinfo);
1575 if (vect_print_dump_info (REPORT_DETAILS))
1576 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1582 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1583 the number of created vector stmts depends on the unrolling factor). However,
1584 the actual number of vector stmts for every SLP node depends on VF which is
1585 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1586 In this function we assume that the inside costs calculated in
1587 vect_model_xxx_cost are linear in ncopies. */
1590 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1592 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1593 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1594 slp_instance instance;
1596 if (vect_print_dump_info (REPORT_SLP))
1597 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1599 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1600 /* We assume that costs are linear in ncopies. */
1601 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1602 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1606 /* For constant and loop invariant defs of SLP_NODE this function returns
1607 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1608 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1609 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1610 REDUC_INDEX is the index of the reduction operand in the statements, unless
1614 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1615 unsigned int op_num, unsigned int number_of_vectors,
1618 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1619 gimple stmt = VEC_index (gimple, stmts, 0);
1620 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1624 int j, number_of_places_left_in_vector;
1627 int group_size = VEC_length (gimple, stmts);
1628 unsigned int vec_num, i;
1629 int number_of_copies = 1;
1630 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1631 bool constant_p, is_store;
1632 tree neutral_op = NULL;
1634 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1636 enum tree_code code = gimple_assign_rhs_code (stmt);
1637 if (reduc_index == -1)
1639 VEC_free (tree, heap, *vec_oprnds);
1643 op_num = reduc_index - 1;
1644 op = gimple_op (stmt, op_num + 1);
1645 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1646 we need either neutral operands or the original operands. See
1647 get_initial_def_for_reduction() for details. */
1650 case WIDEN_SUM_EXPR:
1656 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1657 neutral_op = build_real (TREE_TYPE (op), dconst0);
1659 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1665 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1666 neutral_op = build_real (TREE_TYPE (op), dconst1);
1668 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1677 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1680 op = gimple_assign_rhs1 (stmt);
1685 op = gimple_op (stmt, op_num + 1);
1688 if (CONSTANT_CLASS_P (op))
1693 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1694 gcc_assert (vector_type);
1696 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1698 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1699 created vectors. It is greater than 1 if unrolling is performed.
1701 For example, we have two scalar operands, s1 and s2 (e.g., group of
1702 strided accesses of size two), while NUNITS is four (i.e., four scalars
1703 of this type can be packed in a vector). The output vector will contain
1704 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1707 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1708 containing the operands.
1710 For example, NUNITS is four as before, and the group size is 8
1711 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1712 {s5, s6, s7, s8}. */
1714 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1716 number_of_places_left_in_vector = nunits;
1717 for (j = 0; j < number_of_copies; j++)
1719 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1722 op = gimple_assign_rhs1 (stmt);
1724 op = gimple_op (stmt, op_num + 1);
1726 if (reduc_index != -1)
1728 struct loop *loop = (gimple_bb (stmt))->loop_father;
1729 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1732 /* Get the def before the loop. */
1733 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1734 loop_preheader_edge (loop));
1735 if (j != (number_of_copies - 1) && neutral_op)
1739 /* Create 'vect_ = {op0,op1,...,opn}'. */
1740 t = tree_cons (NULL_TREE, op, t);
1742 number_of_places_left_in_vector--;
1744 if (number_of_places_left_in_vector == 0)
1746 number_of_places_left_in_vector = nunits;
1749 vec_cst = build_vector (vector_type, t);
1751 vec_cst = build_constructor_from_list (vector_type, t);
1752 VEC_quick_push (tree, voprnds,
1753 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1759 /* Since the vectors are created in the reverse order, we should invert
1761 vec_num = VEC_length (tree, voprnds);
1762 for (j = vec_num - 1; j >= 0; j--)
1764 vop = VEC_index (tree, voprnds, j);
1765 VEC_quick_push (tree, *vec_oprnds, vop);
1768 VEC_free (tree, heap, voprnds);
1770 /* In case that VF is greater than the unrolling factor needed for the SLP
1771 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1772 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1773 to replicate the vectors. */
1774 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1776 tree neutral_vec = NULL;
1783 for (i = 0; i < (unsigned) nunits; i++)
1784 t = tree_cons (NULL_TREE, neutral_op, t);
1785 neutral_vec = build_vector (vector_type, t);
1788 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1792 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1793 VEC_quick_push (tree, *vec_oprnds, vop);
1799 /* Get vectorized definitions from SLP_NODE that contains corresponding
1800 vectorized def-stmts. */
1803 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1806 gimple vec_def_stmt;
1809 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1812 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1815 gcc_assert (vec_def_stmt);
1816 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1817 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1822 /* Get vectorized definitions for SLP_NODE.
1823 If the scalar definitions are loop invariants or constants, collect them and
1824 call vect_get_constant_vectors() to create vector stmts.
1825 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1826 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1827 vect_get_slp_vect_defs() to retrieve them.
1828 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1829 the right node. This is used when the second operand must remain scalar. */
1832 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1833 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1836 enum tree_code code;
1837 int number_of_vects;
1838 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1840 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1841 /* The number of vector defs is determined by the number of vector statements
1842 in the node from which we get those statements. */
1843 if (SLP_TREE_LEFT (slp_node))
1844 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1847 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1848 /* Number of vector stmts was calculated according to LHS in
1849 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1850 necessary. See vect_get_smallest_scalar_type() for details. */
1851 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1853 if (rhs_size_unit != lhs_size_unit)
1855 number_of_vects *= rhs_size_unit;
1856 number_of_vects /= lhs_size_unit;
1860 /* Allocate memory for vectorized defs. */
1861 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1863 /* SLP_NODE corresponds either to a group of stores or to a group of
1864 unary/binary operations. We don't call this function for loads.
1865 For reduction defs we call vect_get_constant_vectors(), since we are
1866 looking for initial loop invariant values. */
1867 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1868 /* The defs are already vectorized. */
1869 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1871 /* Build vectors from scalar defs. */
1872 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1875 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1876 /* Since we don't call this function with loads, this is a group of
1880 /* For reductions, we only need initial values. */
1881 if (reduc_index != -1)
1884 code = gimple_assign_rhs_code (first_stmt);
1885 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1888 /* The number of vector defs is determined by the number of vector statements
1889 in the node from which we get those statements. */
1890 if (SLP_TREE_RIGHT (slp_node))
1891 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1893 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1895 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1897 if (SLP_TREE_RIGHT (slp_node))
1898 /* The defs are already vectorized. */
1899 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1901 /* Build vectors from scalar defs. */
1902 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1906 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1907 building a vector of type MASK_TYPE from it) and two input vectors placed in
1908 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1909 shifting by STRIDE elements of DR_CHAIN for every copy.
1910 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1912 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1913 the created stmts must be inserted. */
1916 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1917 tree mask, int first_vec_indx, int second_vec_indx,
1918 gimple_stmt_iterator *gsi, slp_tree node,
1919 tree builtin_decl, tree vectype,
1920 VEC(tree,heap) *dr_chain,
1921 int ncopies, int vect_stmts_counter)
1924 gimple perm_stmt = NULL;
1925 stmt_vec_info next_stmt_info;
1927 tree first_vec, second_vec, data_ref;
1928 VEC (tree, heap) *params = NULL;
1930 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1932 /* Initialize the vect stmts of NODE to properly insert the generated
1934 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1935 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1936 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1938 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1939 for (i = 0; i < ncopies; i++)
1941 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1942 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1944 /* Build argument list for the vectorized call. */
1945 VEC_free (tree, heap, params);
1946 params = VEC_alloc (tree, heap, 3);
1947 VEC_quick_push (tree, params, first_vec);
1948 VEC_quick_push (tree, params, second_vec);
1949 VEC_quick_push (tree, params, mask);
1951 /* Generate the permute statement. */
1952 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1953 data_ref = make_ssa_name (perm_dest, perm_stmt);
1954 gimple_call_set_lhs (perm_stmt, data_ref);
1955 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1957 /* Store the vector statement in NODE. */
1958 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1959 stride * i + vect_stmts_counter, perm_stmt);
1961 first_vec_indx += stride;
1962 second_vec_indx += stride;
1965 /* Mark the scalar stmt as vectorized. */
1966 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1967 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1971 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1972 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1973 representation. Check that the mask is valid and return FALSE if not.
1974 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1975 the next vector, i.e., the current first vector is not needed. */
1978 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1979 int mask_nunits, bool only_one_vec, int index,
1980 int *mask, int *current_mask_element,
1981 bool *need_next_vector)
1984 static int number_of_mask_fixes = 1;
1985 static bool mask_fixed = false;
1986 static bool needs_first_vector = false;
1988 /* Convert to target specific representation. */
1989 *current_mask_element = first_mask_element + m;
1990 /* Adjust the value in case it's a mask for second and third vectors. */
1991 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1993 if (*current_mask_element < mask_nunits)
1994 needs_first_vector = true;
1996 /* We have only one input vector to permute but the mask accesses values in
1997 the next vector as well. */
1998 if (only_one_vec && *current_mask_element >= mask_nunits)
2000 if (vect_print_dump_info (REPORT_DETAILS))
2002 fprintf (vect_dump, "permutation requires at least two vectors ");
2003 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2009 /* The mask requires the next vector. */
2010 if (*current_mask_element >= mask_nunits * 2)
2012 if (needs_first_vector || mask_fixed)
2014 /* We either need the first vector too or have already moved to the
2015 next vector. In both cases, this permutation needs three
2017 if (vect_print_dump_info (REPORT_DETAILS))
2019 fprintf (vect_dump, "permutation requires at "
2020 "least three vectors ");
2021 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2027 /* We move to the next vector, dropping the first one and working with
2028 the second and the third - we need to adjust the values of the mask
2030 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2032 for (i = 0; i < index; i++)
2033 mask[i] -= mask_nunits * number_of_mask_fixes;
2035 (number_of_mask_fixes)++;
2039 *need_next_vector = mask_fixed;
2041 /* This was the last element of this mask. Start a new one. */
2042 if (index == mask_nunits - 1)
2044 number_of_mask_fixes = 1;
2046 needs_first_vector = false;
2053 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2054 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2055 permute statements for SLP_NODE_INSTANCE. */
2057 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2058 gimple_stmt_iterator *gsi, int vf,
2059 slp_instance slp_node_instance, bool analyze_only)
2061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2062 tree mask_element_type = NULL_TREE, mask_type;
2063 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2065 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2066 gimple next_scalar_stmt;
2067 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2068 int first_mask_element;
2069 int index, unroll_factor, *mask, current_mask_element, ncopies;
2070 bool only_one_vec = false, need_next_vector = false;
2071 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2073 if (!targetm.vectorize.builtin_vec_perm)
2075 if (vect_print_dump_info (REPORT_DETAILS))
2077 fprintf (vect_dump, "no builtin for vect permute for ");
2078 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2084 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2085 &mask_element_type);
2086 if (!builtin_decl || !mask_element_type)
2088 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "no builtin for vect permute for ");
2091 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2097 mask_type = get_vectype_for_scalar_type (mask_element_type);
2098 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2099 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2100 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2101 scale = mask_nunits / nunits;
2102 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2104 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2105 unrolling factor. */
2106 orig_vec_stmts_num = group_size *
2107 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2108 if (orig_vec_stmts_num == 1)
2109 only_one_vec = true;
2111 /* Number of copies is determined by the final vectorization factor
2112 relatively to SLP_NODE_INSTANCE unrolling factor. */
2113 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2115 /* Generate permutation masks for every NODE. Number of masks for each NODE
2116 is equal to GROUP_SIZE.
2117 E.g., we have a group of three nodes with three loads from the same
2118 location in each node, and the vector size is 4. I.e., we have a
2119 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2120 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2121 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2124 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2125 scpecific type, e.g., in bytes for Altivec.
2126 The last mask is illegal since we assume two operands for permute
2127 operation, and the mask element values can't be outside that range. Hence,
2128 the last mask must be converted into {2,5,5,5}.
2129 For the first two permutations we need the first and the second input
2130 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2131 we need the second and the third vectors: {b1,c1,a2,b2} and
2135 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2141 vect_stmts_counter = 0;
2143 first_vec_index = vec_index++;
2145 second_vec_index = first_vec_index;
2147 second_vec_index = vec_index++;
2149 for (j = 0; j < unroll_factor; j++)
2151 for (k = 0; k < group_size; k++)
2153 first_mask_element = (i + j * group_size) * scale;
2154 for (m = 0; m < scale; m++)
2156 if (!vect_get_mask_element (stmt, first_mask_element, m,
2157 mask_nunits, only_one_vec, index, mask,
2158 ¤t_mask_element, &need_next_vector))
2161 mask[index++] = current_mask_element;
2164 if (index == mask_nunits)
2166 tree mask_vec = NULL;
2168 while (--index >= 0)
2170 tree t = build_int_cst (mask_element_type, mask[index]);
2171 mask_vec = tree_cons (NULL, t, mask_vec);
2173 mask_vec = build_vector (mask_type, mask_vec);
2176 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2179 if (vect_print_dump_info (REPORT_DETAILS))
2181 fprintf (vect_dump, "unsupported vect permute ");
2182 print_generic_expr (vect_dump, mask_vec, 0);
2190 if (need_next_vector)
2192 first_vec_index = second_vec_index;
2193 second_vec_index = vec_index;
2196 next_scalar_stmt = VEC_index (gimple,
2197 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2199 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2200 mask_vec, first_vec_index, second_vec_index,
2201 gsi, node, builtin_decl, vectype, dr_chain,
2202 ncopies, vect_stmts_counter++);
2215 /* Vectorize SLP instance tree in postorder. */
2218 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2219 unsigned int vectorization_factor)
2222 bool strided_store, is_store;
2223 gimple_stmt_iterator si;
2224 stmt_vec_info stmt_info;
2225 unsigned int vec_stmts_size, nunits, group_size;
2228 slp_tree loads_node;
2233 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2234 vectorization_factor);
2235 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2236 vectorization_factor);
2238 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2239 stmt_info = vinfo_for_stmt (stmt);
2241 /* VECTYPE is the type of the destination. */
2242 vectype = STMT_VINFO_VECTYPE (stmt_info);
2243 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2244 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2246 /* For each SLP instance calculate number of vector stmts to be created
2247 for the scalar stmts in each node of the SLP tree. Number of vector
2248 elements in one vector iteration is the number of scalar elements in
2249 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2251 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2253 /* In case of load permutation we have to allocate vectorized statements for
2254 all the nodes that participate in that permutation. */
2255 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2258 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2261 if (!SLP_TREE_VEC_STMTS (loads_node))
2263 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2265 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2270 if (!SLP_TREE_VEC_STMTS (node))
2272 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2273 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2276 if (vect_print_dump_info (REPORT_DETAILS))
2278 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2279 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2282 /* Loads should be inserted before the first load. */
2283 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2284 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2285 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2286 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2288 si = gsi_for_stmt (stmt);
2290 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2296 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2298 VEC (slp_instance, heap) *slp_instances;
2299 slp_instance instance;
2301 bool is_store = false;
2305 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2306 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2310 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2314 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2316 /* Schedule the tree of INSTANCE. */
2317 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2319 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2320 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2321 fprintf (vect_dump, "vectorizing stmts using SLP.");
2324 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2326 slp_tree root = SLP_INSTANCE_TREE (instance);
2329 gimple_stmt_iterator gsi;
2331 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2332 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2334 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2337 /* Free the attached stmt_vec_info and remove the stmt. */
2338 gsi = gsi_for_stmt (store);
2339 gsi_remove (&gsi, true);
2340 free_stmt_vec_info (store);
2348 /* Vectorize the basic block. */
2351 vect_slp_transform_bb (basic_block bb)
2353 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2354 gimple_stmt_iterator si;
2356 gcc_assert (bb_vinfo);
2358 if (vect_print_dump_info (REPORT_DETAILS))
2359 fprintf (vect_dump, "SLPing BB\n");
2361 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2363 gimple stmt = gsi_stmt (si);
2364 stmt_vec_info stmt_info;
2366 if (vect_print_dump_info (REPORT_DETAILS))
2368 fprintf (vect_dump, "------>SLPing statement: ");
2369 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2372 stmt_info = vinfo_for_stmt (stmt);
2373 gcc_assert (stmt_info);
2375 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2376 if (STMT_SLP_TYPE (stmt_info))
2378 vect_schedule_slp (NULL, bb_vinfo);
2383 mark_sym_for_renaming (gimple_vop (cfun));
2384 /* The memory tags and pointers in vectorized statements need to
2385 have their SSA forms updated. FIXME, why can't this be delayed
2386 until all the loops have been transformed? */
2387 update_ssa (TODO_update_ssa);
2389 if (vect_print_dump_info (REPORT_DETAILS))
2390 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2392 destroy_bb_vec_info (bb_vinfo);