1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
45 find_bb_location (basic_block bb)
48 gimple_stmt_iterator si;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 vect_free_slp_tree (slp_tree node)
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
87 /* Free the memory allocated for the SLP instance. */
90 vect_free_slp_instance (slp_instance instance)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
116 unsigned int i, number_of_oprnds;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131 for (i = 0; i < number_of_oprnds; i++)
133 oprnd = gimple_op (stmt, i + 1);
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 || (!def_stmt && dt[i] != vect_constant_def))
139 if (vect_print_dump_info (REPORT_SLP))
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 if (!*first_stmt_dt0)
160 if (i == 1 && !*first_stmt_dt1)
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178 if (*dt == vect_unknown_def_type)
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
185 switch (gimple_code (def_stmt))
188 def = gimple_phi_result (def_stmt);
192 def = gimple_assign_lhs (def_stmt);
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
202 if (!*first_stmt_dt0)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
207 *first_stmt_def0_type = TREE_TYPE (def);
209 *first_stmt_const_oprnd = oprnd;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
222 if (!*first_stmt_dt1 && i == 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
227 *first_stmt_def1_type = TREE_TYPE (def);
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
239 *first_stmt_const_oprnd = oprnd;
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && !types_compatible_p (*first_stmt_def0_type,
252 && (*first_stmt_dt1 != dt[i]
253 || (*first_stmt_def1_type && def
254 && !types_compatible_p (*first_stmt_def1_type,
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
260 if (vect_print_dump_info (REPORT_SLP))
261 fprintf (vect_dump, "Build SLP failed: different types ");
268 /* Check the types of the definitions. */
271 case vect_constant_def:
272 case vect_external_def:
275 case vect_internal_def:
277 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
279 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
283 /* FORNOW: Not supported. */
284 if (vect_print_dump_info (REPORT_SLP))
286 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
287 print_generic_expr (vect_dump, def, TDF_SLIM);
298 /* Recursively build an SLP tree starting from NODE.
299 Fail (and return FALSE) if def-stmts are not isomorphic, require data
300 permutation or are of unsupported types of operation. Otherwise, return
304 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
305 slp_tree *node, unsigned int group_size,
306 int *inside_cost, int *outside_cost,
307 int ncopies_for_cost, unsigned int *max_nunits,
308 VEC (int, heap) **load_permutation,
309 VEC (slp_tree, heap) **loads,
310 unsigned int vectorization_factor)
312 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
313 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
316 gimple stmt = VEC_index (gimple, stmts, 0);
317 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
318 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
319 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
320 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
322 bool stop_recursion = false, need_same_oprnds = false;
323 tree vectype, scalar_type, first_op1 = NULL_TREE;
324 unsigned int ncopies;
327 enum machine_mode optab_op2_mode;
328 enum machine_mode vec_mode;
329 tree first_stmt_const_oprnd = NULL_TREE;
330 struct data_reference *first_dr;
331 bool pattern0 = false, pattern1 = false;
333 bool permutation = false;
334 unsigned int load_place;
337 /* For every stmt in NODE find its def stmt/s. */
338 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
340 if (vect_print_dump_info (REPORT_SLP))
342 fprintf (vect_dump, "Build SLP for ");
343 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
346 lhs = gimple_get_lhs (stmt);
347 if (lhs == NULL_TREE)
349 if (vect_print_dump_info (REPORT_SLP))
352 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
353 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
359 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
360 vectype = get_vectype_for_scalar_type (scalar_type);
363 if (vect_print_dump_info (REPORT_SLP))
365 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
366 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
371 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
374 if (vect_print_dump_info (REPORT_SLP))
375 fprintf (vect_dump, "SLP with multiple types ");
377 /* FORNOW: multiple types are unsupported in BB SLP. */
382 /* In case of multiple types we need to detect the smallest type. */
383 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
384 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
386 if (is_gimple_call (stmt))
387 rhs_code = CALL_EXPR;
389 rhs_code = gimple_assign_rhs_code (stmt);
391 /* Check the operation. */
394 first_stmt_code = rhs_code;
396 /* Shift arguments should be equal in all the packed stmts for a
397 vector shift with scalar shift operand. */
398 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
399 || rhs_code == LROTATE_EXPR
400 || rhs_code == RROTATE_EXPR)
402 vec_mode = TYPE_MODE (vectype);
404 /* First see if we have a vector/vector shift. */
405 optab = optab_for_tree_code (rhs_code, vectype,
409 || (optab->handlers[(int) vec_mode].insn_code
410 == CODE_FOR_nothing))
412 /* No vector/vector shift, try for a vector/scalar shift. */
413 optab = optab_for_tree_code (rhs_code, vectype,
418 if (vect_print_dump_info (REPORT_SLP))
419 fprintf (vect_dump, "Build SLP failed: no optab.");
422 icode = (int) optab->handlers[(int) vec_mode].insn_code;
423 if (icode == CODE_FOR_nothing)
425 if (vect_print_dump_info (REPORT_SLP))
426 fprintf (vect_dump, "Build SLP failed: "
427 "op not supported by target.");
430 optab_op2_mode = insn_data[icode].operand[2].mode;
431 if (!VECTOR_MODE_P (optab_op2_mode))
433 need_same_oprnds = true;
434 first_op1 = gimple_assign_rhs2 (stmt);
441 if (first_stmt_code != rhs_code
442 && (first_stmt_code != IMAGPART_EXPR
443 || rhs_code != REALPART_EXPR)
444 && (first_stmt_code != REALPART_EXPR
445 || rhs_code != IMAGPART_EXPR))
447 if (vect_print_dump_info (REPORT_SLP))
450 "Build SLP failed: different operation in stmt ");
451 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
458 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
460 if (vect_print_dump_info (REPORT_SLP))
463 "Build SLP failed: different shift arguments in ");
464 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
471 /* Strided store or load. */
472 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
474 if (REFERENCE_CLASS_P (lhs))
477 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
478 stmt, &def_stmts0, &def_stmts1,
481 &first_stmt_def0_type,
482 &first_stmt_def1_type,
483 &first_stmt_const_oprnd,
485 &pattern0, &pattern1))
491 /* FORNOW: Check that there is no gap between the loads. */
492 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
494 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
495 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
497 if (vect_print_dump_info (REPORT_SLP))
499 fprintf (vect_dump, "Build SLP failed: strided "
501 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
507 /* Check that the size of interleaved loads group is not
508 greater than the SLP group size. */
509 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
510 > ncopies * group_size)
512 if (vect_print_dump_info (REPORT_SLP))
514 fprintf (vect_dump, "Build SLP failed: the number of "
515 "interleaved loads is greater than"
516 " the SLP group size ");
517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
525 if (first_load == stmt)
527 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
528 if (vect_supportable_dr_alignment (first_dr)
529 == dr_unaligned_unsupported)
531 if (vect_print_dump_info (REPORT_SLP))
533 fprintf (vect_dump, "Build SLP failed: unsupported "
535 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
541 /* Analyze costs (for the first stmt in the group). */
542 vect_model_load_cost (vinfo_for_stmt (stmt),
543 ncopies_for_cost, *node);
546 /* Store the place of this load in the interleaving chain. In
547 case that permutation is needed we later decide if a specific
548 permutation is supported. */
549 load_place = vect_get_place_in_interleaving_chain (stmt,
554 VEC_safe_push (int, heap, *load_permutation, load_place);
556 /* We stop the tree when we reach a group of loads. */
557 stop_recursion = true;
560 } /* Strided access. */
563 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
565 /* Not strided load. */
566 if (vect_print_dump_info (REPORT_SLP))
568 fprintf (vect_dump, "Build SLP failed: not strided load ");
569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 /* FORNOW: Not strided loads are not supported. */
576 /* Not memory operation. */
577 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
578 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
580 if (vect_print_dump_info (REPORT_SLP))
582 fprintf (vect_dump, "Build SLP failed: operation");
583 fprintf (vect_dump, " unsupported ");
584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
590 /* Find the def-stmts. */
591 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
592 &def_stmts0, &def_stmts1,
593 &first_stmt_dt0, &first_stmt_dt1,
594 &first_stmt_def0_type,
595 &first_stmt_def1_type,
596 &first_stmt_const_oprnd,
598 &pattern0, &pattern1))
603 /* Add the costs of the node to the overall instance costs. */
604 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
605 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
607 /* Strided loads were reached - stop the recursion. */
612 VEC_safe_push (slp_tree, heap, *loads, *node);
613 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
619 /* Create SLP_TREE nodes for the definition node/s. */
620 if (first_stmt_dt0 == vect_internal_def)
622 slp_tree left_node = XNEW (struct _slp_tree);
623 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
624 SLP_TREE_VEC_STMTS (left_node) = NULL;
625 SLP_TREE_LEFT (left_node) = NULL;
626 SLP_TREE_RIGHT (left_node) = NULL;
627 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
628 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
629 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
630 inside_cost, outside_cost, ncopies_for_cost,
631 max_nunits, load_permutation, loads,
632 vectorization_factor))
635 SLP_TREE_LEFT (*node) = left_node;
638 if (first_stmt_dt1 == vect_internal_def)
640 slp_tree right_node = XNEW (struct _slp_tree);
641 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
642 SLP_TREE_VEC_STMTS (right_node) = NULL;
643 SLP_TREE_LEFT (right_node) = NULL;
644 SLP_TREE_RIGHT (right_node) = NULL;
645 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
646 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
647 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
648 inside_cost, outside_cost, ncopies_for_cost,
649 max_nunits, load_permutation, loads,
650 vectorization_factor))
653 SLP_TREE_RIGHT (*node) = right_node;
661 vect_print_slp_tree (slp_tree node)
669 fprintf (vect_dump, "node ");
670 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
672 fprintf (vect_dump, "\n\tstmt %d ", i);
673 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
675 fprintf (vect_dump, "\n");
677 vect_print_slp_tree (SLP_TREE_LEFT (node));
678 vect_print_slp_tree (SLP_TREE_RIGHT (node));
682 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
683 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
684 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
685 stmts in NODE are to be marked. */
688 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
696 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
698 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
700 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
701 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
705 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
708 vect_mark_slp_stmts_relevant (slp_tree node)
712 stmt_vec_info stmt_info;
717 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
719 stmt_info = vinfo_for_stmt (stmt);
720 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
721 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
722 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
725 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
726 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
730 /* Check if the permutation required by the SLP INSTANCE is supported.
731 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
734 vect_supported_slp_permutation_p (slp_instance instance)
736 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
737 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
738 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
739 VEC (slp_tree, heap) *sorted_loads = NULL;
741 slp_tree *tmp_loads = NULL;
742 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
745 /* FORNOW: The only supported loads permutation is loads from the same
746 location in all the loads in the node, when the data-refs in
747 nodes of LOADS constitute an interleaving chain.
748 Sort the nodes according to the order of accesses in the chain. */
749 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
751 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
752 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
753 i += group_size, j++)
755 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
756 /* Check that the loads are all in the same interleaving chain. */
757 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
759 if (vect_print_dump_info (REPORT_DETAILS))
761 fprintf (vect_dump, "Build SLP failed: unsupported data "
763 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
770 tmp_loads[index] = load;
773 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
774 for (i = 0; i < group_size; i++)
775 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
777 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
778 SLP_INSTANCE_LOADS (instance) = sorted_loads;
781 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
782 SLP_INSTANCE_UNROLLING_FACTOR (instance),
790 /* Check if the required load permutation is supported.
791 LOAD_PERMUTATION contains a list of indices of the loads.
792 In SLP this permutation is relative to the order of strided stores that are
793 the base of the SLP instance. */
796 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
797 VEC (int, heap) *load_permutation)
799 int i = 0, j, prev = -1, next, k;
803 /* FORNOW: permutations are only supported in SLP. */
807 if (vect_print_dump_info (REPORT_SLP))
809 fprintf (vect_dump, "Load permutation ");
810 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
811 fprintf (vect_dump, "%d ", next);
814 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
815 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
817 if (VEC_length (int, load_permutation)
818 != (unsigned int) (group_size * group_size))
822 load_index = sbitmap_alloc (group_size);
823 sbitmap_zero (load_index);
824 for (j = 0; j < group_size; j++)
826 for (i = j * group_size, k = 0;
827 VEC_iterate (int, load_permutation, i, next) && k < group_size;
830 if (i != j * group_size && next != prev)
839 if (TEST_BIT (load_index, prev))
845 SET_BIT (load_index, prev);
848 sbitmap_free (load_index);
850 if (supported && i == group_size * group_size
851 && vect_supported_slp_permutation_p (slp_instn))
858 /* Find the first load in the loop that belongs to INSTANCE.
859 When loads are in several SLP nodes, there can be a case in which the first
860 load does not appear in the first SLP node to be transformed, causing
861 incorrect order of statements. Since we generate all the loads together,
862 they must be inserted before the first load of the SLP instance and not
863 before the first load of the first node of the instance. */
865 vect_find_first_load_in_slp_instance (slp_instance instance)
869 gimple first_load = NULL, load;
872 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
875 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
877 first_load = get_earlier_stmt (load, first_load);
883 /* Analyze an SLP instance starting from a group of strided stores. Call
884 vect_build_slp_tree to build a tree of packed stmts if possible.
885 Return FALSE if it's impossible to SLP any stmt in the loop. */
888 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
891 slp_instance new_instance;
892 slp_tree node = XNEW (struct _slp_tree);
893 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
894 unsigned int unrolling_factor = 1, nunits;
895 tree vectype, scalar_type;
897 unsigned int vectorization_factor = 0;
898 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
899 unsigned int max_nunits = 0;
900 VEC (int, heap) *load_permutation;
901 VEC (slp_tree, heap) *loads;
903 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
904 vinfo_for_stmt (stmt))));
905 vectype = get_vectype_for_scalar_type (scalar_type);
908 if (vect_print_dump_info (REPORT_SLP))
910 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
911 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
916 nunits = TYPE_VECTOR_SUBPARTS (vectype);
918 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
920 /* No multitypes in BB SLP. */
921 vectorization_factor = nunits;
923 /* Calculate the unrolling factor. */
924 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
925 if (unrolling_factor != 1 && !loop_vinfo)
927 if (vect_print_dump_info (REPORT_SLP))
928 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
934 /* Create a node (a root of the SLP tree) for the packed strided stores. */
935 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
937 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
940 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
941 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
944 SLP_TREE_VEC_STMTS (node) = NULL;
945 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
946 SLP_TREE_LEFT (node) = NULL;
947 SLP_TREE_RIGHT (node) = NULL;
948 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
949 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
951 /* Calculate the number of vector stmts to create based on the unrolling
952 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
953 GROUP_SIZE / NUNITS otherwise. */
954 ncopies_for_cost = unrolling_factor * group_size / nunits;
956 load_permutation = VEC_alloc (int, heap, group_size * group_size);
957 loads = VEC_alloc (slp_tree, heap, group_size);
959 /* Build the tree for the SLP instance. */
960 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
961 &inside_cost, &outside_cost, ncopies_for_cost,
962 &max_nunits, &load_permutation, &loads,
963 vectorization_factor))
965 /* Create a new SLP instance. */
966 new_instance = XNEW (struct _slp_instance);
967 SLP_INSTANCE_TREE (new_instance) = node;
968 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
969 /* Calculate the unrolling factor based on the smallest type in the
971 if (max_nunits > nunits)
972 unrolling_factor = least_common_multiple (max_nunits, group_size)
975 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
976 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
977 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
978 SLP_INSTANCE_LOADS (new_instance) = loads;
979 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
980 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
981 if (VEC_length (slp_tree, loads))
983 if (!vect_supported_load_permutation_p (new_instance, group_size,
986 if (vect_print_dump_info (REPORT_SLP))
988 fprintf (vect_dump, "Build SLP failed: unsupported load "
990 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
993 vect_free_slp_instance (new_instance);
997 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
998 = vect_find_first_load_in_slp_instance (new_instance);
1001 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1004 VEC_safe_push (slp_instance, heap,
1005 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1008 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1011 if (vect_print_dump_info (REPORT_SLP))
1012 vect_print_slp_tree (node);
1017 /* Failed to SLP. */
1018 /* Free the allocated memory. */
1019 vect_free_slp_tree (node);
1020 VEC_free (int, heap, load_permutation);
1021 VEC_free (slp_tree, heap, loads);
1027 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1028 trees of packed scalar stmts if SLP is possible. */
1031 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1034 VEC (gimple, heap) *strided_stores;
1038 if (vect_print_dump_info (REPORT_SLP))
1039 fprintf (vect_dump, "=== vect_analyze_slp ===");
1042 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1044 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1046 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1047 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1050 if (bb_vinfo && !ok)
1052 if (vect_print_dump_info (REPORT_SLP))
1053 fprintf (vect_dump, "Failed to SLP the basic block.");
1062 /* For each possible SLP instance decide whether to SLP it and calculate overall
1063 unrolling factor needed to SLP the loop. */
1066 vect_make_slp_decision (loop_vec_info loop_vinfo)
1068 unsigned int i, unrolling_factor = 1;
1069 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1070 slp_instance instance;
1071 int decided_to_slp = 0;
1073 if (vect_print_dump_info (REPORT_SLP))
1074 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1076 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1078 /* FORNOW: SLP if you can. */
1079 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1080 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1082 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1083 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1084 loop-based vectorization. Such stmts will be marked as HYBRID. */
1085 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1089 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1091 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1092 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1093 decided_to_slp, unrolling_factor);
1097 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1098 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1101 vect_detect_hybrid_slp_stmts (slp_tree node)
1105 imm_use_iterator imm_iter;
1107 stmt_vec_info stmt_vinfo;
1112 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1113 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1114 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1115 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1116 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1117 && !STMT_SLP_TYPE (stmt_vinfo)
1118 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1119 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
1120 vect_mark_slp_stmts (node, hybrid, i);
1122 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1123 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1127 /* Find stmts that must be both vectorized and SLPed. */
1130 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1133 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1134 slp_instance instance;
1136 if (vect_print_dump_info (REPORT_SLP))
1137 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1139 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1140 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1144 /* Create and initialize a new bb_vec_info struct for BB, as well as
1145 stmt_vec_info structs for all the stmts in it. */
1148 new_bb_vec_info (basic_block bb)
1150 bb_vec_info res = NULL;
1151 gimple_stmt_iterator gsi;
1153 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1154 BB_VINFO_BB (res) = bb;
1156 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1158 gimple stmt = gsi_stmt (gsi);
1159 gimple_set_uid (stmt, 0);
1160 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1163 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1164 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1171 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1172 stmts in the basic block. */
1175 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1178 gimple_stmt_iterator si;
1183 bb = BB_VINFO_BB (bb_vinfo);
1185 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1187 gimple stmt = gsi_stmt (si);
1188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1191 /* Free stmt_vec_info. */
1192 free_stmt_vec_info (stmt);
1195 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1196 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1202 /* Analyze statements contained in SLP tree node after recursively analyzing
1203 the subtree. Return TRUE if the operations are supported. */
1206 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1215 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1216 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1219 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1221 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1222 gcc_assert (stmt_info);
1223 gcc_assert (PURE_SLP_STMT (stmt_info));
1225 if (!vect_analyze_stmt (stmt, &dummy, node))
1233 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1234 operations are supported. */
1237 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1239 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1240 slp_instance instance;
1243 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1245 if (!vect_slp_analyze_node_operations (bb_vinfo,
1246 SLP_INSTANCE_TREE (instance)))
1248 vect_free_slp_instance (instance);
1249 VEC_ordered_remove (slp_instance, slp_instances, i);
1255 if (!VEC_length (slp_instance, slp_instances))
1262 /* Cheick if the basic block can be vectorized. */
1265 vect_slp_analyze_bb (basic_block bb)
1267 bb_vec_info bb_vinfo;
1268 VEC (ddr_p, heap) *ddrs;
1269 VEC (slp_instance, heap) *slp_instances;
1270 slp_instance instance;
1272 gimple_stmt_iterator gsi;
1274 if (vect_print_dump_info (REPORT_DETAILS))
1275 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1277 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1279 gimple stmt = gsi_stmt (gsi);
1280 if (!is_gimple_debug (stmt)
1281 && !gimple_nop_p (stmt)
1282 && gimple_code (stmt) != GIMPLE_LABEL)
1286 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1288 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1289 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1295 bb_vinfo = new_bb_vec_info (bb);
1299 if (!vect_analyze_data_refs (NULL, bb_vinfo))
1301 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1302 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1305 destroy_bb_vec_info (bb_vinfo);
1309 ddrs = BB_VINFO_DDRS (bb_vinfo);
1310 if (!VEC_length (ddr_p, ddrs))
1312 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1313 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1316 destroy_bb_vec_info (bb_vinfo);
1320 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1323 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1326 destroy_bb_vec_info (bb_vinfo);
1330 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1332 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1333 fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1336 destroy_bb_vec_info (bb_vinfo);
1340 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1342 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1343 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1346 destroy_bb_vec_info (bb_vinfo);
1350 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1352 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1353 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1356 destroy_bb_vec_info (bb_vinfo);
1360 /* Check the SLP opportunities in the basic block, analyze and build SLP
1362 if (!vect_analyze_slp (NULL, bb_vinfo))
1364 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1365 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1366 "in basic block.\n");
1368 destroy_bb_vec_info (bb_vinfo);
1372 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1374 /* Mark all the statements that we want to vectorize as pure SLP and
1376 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1378 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1379 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1382 if (!vect_slp_analyze_operations (bb_vinfo))
1384 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1385 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1387 destroy_bb_vec_info (bb_vinfo);
1391 if (vect_print_dump_info (REPORT_DETAILS))
1392 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1398 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1399 the number of created vector stmts depends on the unrolling factor). However,
1400 the actual number of vector stmts for every SLP node depends on VF which is
1401 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1402 In this function we assume that the inside costs calculated in
1403 vect_model_xxx_cost are linear in ncopies. */
1406 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1408 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1409 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1410 slp_instance instance;
1412 if (vect_print_dump_info (REPORT_SLP))
1413 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1415 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1416 /* We assume that costs are linear in ncopies. */
1417 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1418 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1422 /* For constant and loop invariant defs of SLP_NODE this function returns
1423 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1424 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1425 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1428 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1429 unsigned int op_num, unsigned int number_of_vectors)
1431 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1432 gimple stmt = VEC_index (gimple, stmts, 0);
1433 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1437 int j, number_of_places_left_in_vector;
1440 int group_size = VEC_length (gimple, stmts);
1441 unsigned int vec_num, i;
1442 int number_of_copies = 1;
1443 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1444 bool constant_p, is_store;
1446 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1449 op = gimple_assign_rhs1 (stmt);
1454 op = gimple_op (stmt, op_num + 1);
1457 if (CONSTANT_CLASS_P (op))
1462 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1463 gcc_assert (vector_type);
1465 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1467 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1468 created vectors. It is greater than 1 if unrolling is performed.
1470 For example, we have two scalar operands, s1 and s2 (e.g., group of
1471 strided accesses of size two), while NUNITS is four (i.e., four scalars
1472 of this type can be packed in a vector). The output vector will contain
1473 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1476 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1477 containing the operands.
1479 For example, NUNITS is four as before, and the group size is 8
1480 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1481 {s5, s6, s7, s8}. */
1483 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1485 number_of_places_left_in_vector = nunits;
1486 for (j = 0; j < number_of_copies; j++)
1488 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1491 op = gimple_assign_rhs1 (stmt);
1493 op = gimple_op (stmt, op_num + 1);
1495 /* Create 'vect_ = {op0,op1,...,opn}'. */
1496 t = tree_cons (NULL_TREE, op, t);
1498 number_of_places_left_in_vector--;
1500 if (number_of_places_left_in_vector == 0)
1502 number_of_places_left_in_vector = nunits;
1505 vec_cst = build_vector (vector_type, t);
1507 vec_cst = build_constructor_from_list (vector_type, t);
1508 VEC_quick_push (tree, voprnds,
1509 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1515 /* Since the vectors are created in the reverse order, we should invert
1517 vec_num = VEC_length (tree, voprnds);
1518 for (j = vec_num - 1; j >= 0; j--)
1520 vop = VEC_index (tree, voprnds, j);
1521 VEC_quick_push (tree, *vec_oprnds, vop);
1524 VEC_free (tree, heap, voprnds);
1526 /* In case that VF is greater than the unrolling factor needed for the SLP
1527 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1528 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1529 to replicate the vectors. */
1530 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1532 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1533 VEC_quick_push (tree, *vec_oprnds, vop);
1538 /* Get vectorized definitions from SLP_NODE that contains corresponding
1539 vectorized def-stmts. */
1542 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1545 gimple vec_def_stmt;
1548 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1551 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1554 gcc_assert (vec_def_stmt);
1555 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1556 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1561 /* Get vectorized definitions for SLP_NODE.
1562 If the scalar definitions are loop invariants or constants, collect them and
1563 call vect_get_constant_vectors() to create vector stmts.
1564 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1565 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1566 vect_get_slp_vect_defs() to retrieve them.
1567 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1568 the right node. This is used when the second operand must remain scalar. */
1571 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1572 VEC (tree,heap) **vec_oprnds1)
1575 enum tree_code code;
1576 int number_of_vects;
1577 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1579 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1580 /* The number of vector defs is determined by the number of vector statements
1581 in the node from which we get those statements. */
1582 if (SLP_TREE_LEFT (slp_node))
1583 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1586 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1587 /* Number of vector stmts was calculated according to LHS in
1588 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1589 necessary. See vect_get_smallest_scalar_type() for details. */
1590 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1592 if (rhs_size_unit != lhs_size_unit)
1594 number_of_vects *= rhs_size_unit;
1595 number_of_vects /= lhs_size_unit;
1599 /* Allocate memory for vectorized defs. */
1600 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1602 /* SLP_NODE corresponds either to a group of stores or to a group of
1603 unary/binary operations. We don't call this function for loads. */
1604 if (SLP_TREE_LEFT (slp_node))
1605 /* The defs are already vectorized. */
1606 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1608 /* Build vectors from scalar defs. */
1609 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1611 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1612 /* Since we don't call this function with loads, this is a group of
1616 code = gimple_assign_rhs_code (first_stmt);
1617 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1620 /* The number of vector defs is determined by the number of vector statements
1621 in the node from which we get those statements. */
1622 if (SLP_TREE_RIGHT (slp_node))
1623 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1625 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1627 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1629 if (SLP_TREE_RIGHT (slp_node))
1630 /* The defs are already vectorized. */
1631 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1633 /* Build vectors from scalar defs. */
1634 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1638 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1639 building a vector of type MASK_TYPE from it) and two input vectors placed in
1640 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1641 shifting by STRIDE elements of DR_CHAIN for every copy.
1642 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1644 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1645 the created stmts must be inserted. */
1648 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1649 tree mask, int first_vec_indx, int second_vec_indx,
1650 gimple_stmt_iterator *gsi, slp_tree node,
1651 tree builtin_decl, tree vectype,
1652 VEC(tree,heap) *dr_chain,
1653 int ncopies, int vect_stmts_counter)
1656 gimple perm_stmt = NULL;
1657 stmt_vec_info next_stmt_info;
1659 tree first_vec, second_vec, data_ref;
1660 VEC (tree, heap) *params = NULL;
1662 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1664 /* Initialize the vect stmts of NODE to properly insert the generated
1666 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1667 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1668 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1670 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1671 for (i = 0; i < ncopies; i++)
1673 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1674 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1676 /* Build argument list for the vectorized call. */
1677 VEC_free (tree, heap, params);
1678 params = VEC_alloc (tree, heap, 3);
1679 VEC_quick_push (tree, params, first_vec);
1680 VEC_quick_push (tree, params, second_vec);
1681 VEC_quick_push (tree, params, mask);
1683 /* Generate the permute statement. */
1684 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1685 data_ref = make_ssa_name (perm_dest, perm_stmt);
1686 gimple_call_set_lhs (perm_stmt, data_ref);
1687 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1689 /* Store the vector statement in NODE. */
1690 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1691 stride * i + vect_stmts_counter, perm_stmt);
1693 first_vec_indx += stride;
1694 second_vec_indx += stride;
1697 /* Mark the scalar stmt as vectorized. */
1698 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1699 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1703 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1704 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1705 representation. Check that the mask is valid and return FALSE if not.
1706 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1707 the next vector, i.e., the current first vector is not needed. */
1710 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1711 int mask_nunits, bool only_one_vec, int index,
1712 int *mask, int *current_mask_element,
1713 bool *need_next_vector)
1716 static int number_of_mask_fixes = 1;
1717 static bool mask_fixed = false;
1718 static bool needs_first_vector = false;
1720 /* Convert to target specific representation. */
1721 *current_mask_element = first_mask_element + m;
1722 /* Adjust the value in case it's a mask for second and third vectors. */
1723 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1725 if (*current_mask_element < mask_nunits)
1726 needs_first_vector = true;
1728 /* We have only one input vector to permute but the mask accesses values in
1729 the next vector as well. */
1730 if (only_one_vec && *current_mask_element >= mask_nunits)
1732 if (vect_print_dump_info (REPORT_DETAILS))
1734 fprintf (vect_dump, "permutation requires at least two vectors ");
1735 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1741 /* The mask requires the next vector. */
1742 if (*current_mask_element >= mask_nunits * 2)
1744 if (needs_first_vector || mask_fixed)
1746 /* We either need the first vector too or have already moved to the
1747 next vector. In both cases, this permutation needs three
1749 if (vect_print_dump_info (REPORT_DETAILS))
1751 fprintf (vect_dump, "permutation requires at "
1752 "least three vectors ");
1753 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1759 /* We move to the next vector, dropping the first one and working with
1760 the second and the third - we need to adjust the values of the mask
1762 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1764 for (i = 0; i < index; i++)
1765 mask[i] -= mask_nunits * number_of_mask_fixes;
1767 (number_of_mask_fixes)++;
1771 *need_next_vector = mask_fixed;
1773 /* This was the last element of this mask. Start a new one. */
1774 if (index == mask_nunits - 1)
1776 number_of_mask_fixes = 1;
1778 needs_first_vector = false;
1785 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1786 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1787 permute statements for SLP_NODE_INSTANCE. */
1789 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1790 gimple_stmt_iterator *gsi, int vf,
1791 slp_instance slp_node_instance, bool analyze_only)
1793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1794 tree mask_element_type = NULL_TREE, mask_type;
1795 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1797 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1798 gimple next_scalar_stmt;
1799 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1800 int first_mask_element;
1801 int index, unroll_factor, *mask, current_mask_element, ncopies;
1802 bool only_one_vec = false, need_next_vector = false;
1803 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1805 if (!targetm.vectorize.builtin_vec_perm)
1807 if (vect_print_dump_info (REPORT_DETAILS))
1809 fprintf (vect_dump, "no builtin for vect permute for ");
1810 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1816 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1817 &mask_element_type);
1818 if (!builtin_decl || !mask_element_type)
1820 if (vect_print_dump_info (REPORT_DETAILS))
1822 fprintf (vect_dump, "no builtin for vect permute for ");
1823 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1829 mask_type = get_vectype_for_scalar_type (mask_element_type);
1830 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1831 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1832 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1833 scale = mask_nunits / nunits;
1834 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1836 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1837 unrolling factor. */
1838 orig_vec_stmts_num = group_size *
1839 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1840 if (orig_vec_stmts_num == 1)
1841 only_one_vec = true;
1843 /* Number of copies is determined by the final vectorization factor
1844 relatively to SLP_NODE_INSTANCE unrolling factor. */
1845 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1847 /* Generate permutation masks for every NODE. Number of masks for each NODE
1848 is equal to GROUP_SIZE.
1849 E.g., we have a group of three nodes with three loads from the same
1850 location in each node, and the vector size is 4. I.e., we have a
1851 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1852 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1853 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1856 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1857 scpecific type, e.g., in bytes for Altivec.
1858 The last mask is illegal since we assume two operands for permute
1859 operation, and the mask element values can't be outside that range. Hence,
1860 the last mask must be converted into {2,5,5,5}.
1861 For the first two permutations we need the first and the second input
1862 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1863 we need the second and the third vectors: {b1,c1,a2,b2} and
1867 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1873 vect_stmts_counter = 0;
1875 first_vec_index = vec_index++;
1877 second_vec_index = first_vec_index;
1879 second_vec_index = vec_index++;
1881 for (j = 0; j < unroll_factor; j++)
1883 for (k = 0; k < group_size; k++)
1885 first_mask_element = (i + j * group_size) * scale;
1886 for (m = 0; m < scale; m++)
1888 if (!vect_get_mask_element (stmt, first_mask_element, m,
1889 mask_nunits, only_one_vec, index, mask,
1890 ¤t_mask_element, &need_next_vector))
1893 mask[index++] = current_mask_element;
1896 if (index == mask_nunits)
1898 tree mask_vec = NULL;
1900 while (--index >= 0)
1902 tree t = build_int_cst (mask_element_type, mask[index]);
1903 mask_vec = tree_cons (NULL, t, mask_vec);
1905 mask_vec = build_vector (mask_type, mask_vec);
1908 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
1911 if (vect_print_dump_info (REPORT_DETAILS))
1913 fprintf (vect_dump, "unsupported vect permute ");
1914 print_generic_expr (vect_dump, mask_vec, 0);
1922 if (need_next_vector)
1924 first_vec_index = second_vec_index;
1925 second_vec_index = vec_index;
1928 next_scalar_stmt = VEC_index (gimple,
1929 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1931 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1932 mask_vec, first_vec_index, second_vec_index,
1933 gsi, node, builtin_decl, vectype, dr_chain,
1934 ncopies, vect_stmts_counter++);
1947 /* Vectorize SLP instance tree in postorder. */
1950 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1951 unsigned int vectorization_factor)
1954 bool strided_store, is_store;
1955 gimple_stmt_iterator si;
1956 stmt_vec_info stmt_info;
1957 unsigned int vec_stmts_size, nunits, group_size;
1960 slp_tree loads_node;
1965 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1966 vectorization_factor);
1967 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1968 vectorization_factor);
1970 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1971 stmt_info = vinfo_for_stmt (stmt);
1973 /* VECTYPE is the type of the destination. */
1974 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1975 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1976 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1978 /* For each SLP instance calculate number of vector stmts to be created
1979 for the scalar stmts in each node of the SLP tree. Number of vector
1980 elements in one vector iteration is the number of scalar elements in
1981 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1983 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1985 /* In case of load permutation we have to allocate vectorized statements for
1986 all the nodes that participate in that permutation. */
1987 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1990 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1993 if (!SLP_TREE_VEC_STMTS (loads_node))
1995 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
1997 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2002 if (!SLP_TREE_VEC_STMTS (node))
2004 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2005 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2008 if (vect_print_dump_info (REPORT_DETAILS))
2010 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2011 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2014 /* Loads should be inserted before the first load. */
2015 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2016 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2017 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2018 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2020 si = gsi_for_stmt (stmt);
2022 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2025 if (DR_GROUP_FIRST_DR (stmt_info))
2026 /* If IS_STORE is TRUE, the vectorization of the
2027 interleaving chain was completed - free all the stores in
2029 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2031 /* FORNOW: SLP originates only from strided stores. */
2037 /* FORNOW: SLP originates only from strided stores. */
2043 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2045 VEC (slp_instance, heap) *slp_instances;
2046 slp_instance instance;
2048 bool is_store = false;
2052 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2053 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2057 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2061 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2063 /* Schedule the tree of INSTANCE. */
2064 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2066 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2067 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2068 fprintf (vect_dump, "vectorizing stmts using SLP.");
2075 /* Vectorize the basic block. */
2078 vect_slp_transform_bb (basic_block bb)
2080 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2081 gimple_stmt_iterator si;
2083 gcc_assert (bb_vinfo);
2085 if (vect_print_dump_info (REPORT_DETAILS))
2086 fprintf (vect_dump, "SLPing BB\n");
2088 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2090 gimple stmt = gsi_stmt (si);
2091 stmt_vec_info stmt_info;
2093 if (vect_print_dump_info (REPORT_DETAILS))
2095 fprintf (vect_dump, "------>SLPing statement: ");
2096 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2099 stmt_info = vinfo_for_stmt (stmt);
2100 gcc_assert (stmt_info);
2102 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2103 if (STMT_SLP_TYPE (stmt_info))
2105 vect_schedule_slp (NULL, bb_vinfo);
2110 mark_sym_for_renaming (gimple_vop (cfun));
2111 /* The memory tags and pointers in vectorized statements need to
2112 have their SSA forms updated. FIXME, why can't this be delayed
2113 until all the loops have been transformed? */
2114 update_ssa (TODO_update_ssa);
2116 if (vect_print_dump_info (REPORT_DETAILS))
2117 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2119 destroy_bb_vec_info (bb_vinfo);