1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
45 find_bb_location (basic_block bb)
48 gimple_stmt_iterator si;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 vect_free_slp_tree (slp_tree node)
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
87 /* Free the memory allocated for the SLP instance. */
90 vect_free_slp_instance (slp_instance instance)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
116 unsigned int i, number_of_oprnds;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131 for (i = 0; i < number_of_oprnds; i++)
133 oprnd = gimple_op (stmt, i + 1);
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 || (!def_stmt && dt[i] != vect_constant_def))
139 if (vect_print_dump_info (REPORT_SLP))
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 if (!*first_stmt_dt0)
160 if (i == 1 && !*first_stmt_dt1)
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178 if (*dt == vect_unknown_def_type)
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
185 switch (gimple_code (def_stmt))
188 def = gimple_phi_result (def_stmt);
192 def = gimple_assign_lhs (def_stmt);
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
202 if (!*first_stmt_dt0)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
207 *first_stmt_def0_type = TREE_TYPE (def);
209 *first_stmt_const_oprnd = oprnd;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
222 if (!*first_stmt_dt1 && i == 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
227 *first_stmt_def1_type = TREE_TYPE (def);
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
239 *first_stmt_const_oprnd = oprnd;
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && !types_compatible_p (*first_stmt_def0_type,
252 && (*first_stmt_dt1 != dt[i]
253 || (*first_stmt_def1_type && def
254 && !types_compatible_p (*first_stmt_def1_type,
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
260 if (vect_print_dump_info (REPORT_SLP))
261 fprintf (vect_dump, "Build SLP failed: different types ");
268 /* Check the types of the definitions. */
271 case vect_constant_def:
272 case vect_external_def:
275 case vect_internal_def:
277 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
279 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
283 /* FORNOW: Not supported. */
284 if (vect_print_dump_info (REPORT_SLP))
286 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
287 print_generic_expr (vect_dump, def, TDF_SLIM);
298 /* Recursively build an SLP tree starting from NODE.
299 Fail (and return FALSE) if def-stmts are not isomorphic, require data
300 permutation or are of unsupported types of operation. Otherwise, return
304 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
305 slp_tree *node, unsigned int group_size,
306 int *inside_cost, int *outside_cost,
307 int ncopies_for_cost, unsigned int *max_nunits,
308 VEC (int, heap) **load_permutation,
309 VEC (slp_tree, heap) **loads,
310 unsigned int vectorization_factor)
312 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
313 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
316 gimple stmt = VEC_index (gimple, stmts, 0);
317 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
318 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
319 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
320 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
322 bool stop_recursion = false, need_same_oprnds = false;
323 tree vectype, scalar_type, first_op1 = NULL_TREE;
324 unsigned int ncopies;
327 enum machine_mode optab_op2_mode;
328 enum machine_mode vec_mode;
329 tree first_stmt_const_oprnd = NULL_TREE;
330 struct data_reference *first_dr;
331 bool pattern0 = false, pattern1 = false;
333 bool permutation = false;
334 unsigned int load_place;
337 /* For every stmt in NODE find its def stmt/s. */
338 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
340 if (vect_print_dump_info (REPORT_SLP))
342 fprintf (vect_dump, "Build SLP for ");
343 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
346 lhs = gimple_get_lhs (stmt);
347 if (lhs == NULL_TREE)
349 if (vect_print_dump_info (REPORT_SLP))
352 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
353 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
359 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
360 vectype = get_vectype_for_scalar_type (scalar_type);
363 if (vect_print_dump_info (REPORT_SLP))
365 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
366 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
371 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
374 if (vect_print_dump_info (REPORT_SLP))
375 fprintf (vect_dump, "SLP with multiple types ");
377 /* FORNOW: multiple types are unsupported in BB SLP. */
382 /* In case of multiple types we need to detect the smallest type. */
383 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
384 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
386 if (is_gimple_call (stmt))
387 rhs_code = CALL_EXPR;
389 rhs_code = gimple_assign_rhs_code (stmt);
391 /* Check the operation. */
394 first_stmt_code = rhs_code;
396 /* Shift arguments should be equal in all the packed stmts for a
397 vector shift with scalar shift operand. */
398 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
399 || rhs_code == LROTATE_EXPR
400 || rhs_code == RROTATE_EXPR)
402 vec_mode = TYPE_MODE (vectype);
404 /* First see if we have a vector/vector shift. */
405 optab = optab_for_tree_code (rhs_code, vectype,
409 || (optab->handlers[(int) vec_mode].insn_code
410 == CODE_FOR_nothing))
412 /* No vector/vector shift, try for a vector/scalar shift. */
413 optab = optab_for_tree_code (rhs_code, vectype,
418 if (vect_print_dump_info (REPORT_SLP))
419 fprintf (vect_dump, "Build SLP failed: no optab.");
422 icode = (int) optab->handlers[(int) vec_mode].insn_code;
423 if (icode == CODE_FOR_nothing)
425 if (vect_print_dump_info (REPORT_SLP))
426 fprintf (vect_dump, "Build SLP failed: "
427 "op not supported by target.");
430 optab_op2_mode = insn_data[icode].operand[2].mode;
431 if (!VECTOR_MODE_P (optab_op2_mode))
433 need_same_oprnds = true;
434 first_op1 = gimple_assign_rhs2 (stmt);
441 if (first_stmt_code != rhs_code
442 && (first_stmt_code != IMAGPART_EXPR
443 || rhs_code != REALPART_EXPR)
444 && (first_stmt_code != REALPART_EXPR
445 || rhs_code != IMAGPART_EXPR))
447 if (vect_print_dump_info (REPORT_SLP))
450 "Build SLP failed: different operation in stmt ");
451 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
458 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
460 if (vect_print_dump_info (REPORT_SLP))
463 "Build SLP failed: different shift arguments in ");
464 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
471 /* Strided store or load. */
472 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
474 if (REFERENCE_CLASS_P (lhs))
477 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
478 stmt, &def_stmts0, &def_stmts1,
481 &first_stmt_def0_type,
482 &first_stmt_def1_type,
483 &first_stmt_const_oprnd,
485 &pattern0, &pattern1))
491 /* FORNOW: Check that there is no gap between the loads. */
492 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
494 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
495 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
497 if (vect_print_dump_info (REPORT_SLP))
499 fprintf (vect_dump, "Build SLP failed: strided "
501 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
507 /* Check that the size of interleaved loads group is not
508 greater than the SLP group size. */
509 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
510 > ncopies * group_size)
512 if (vect_print_dump_info (REPORT_SLP))
514 fprintf (vect_dump, "Build SLP failed: the number of "
515 "interleaved loads is greater than"
516 " the SLP group size ");
517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
525 if (first_load == stmt)
527 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
528 if (vect_supportable_dr_alignment (first_dr)
529 == dr_unaligned_unsupported)
531 if (vect_print_dump_info (REPORT_SLP))
533 fprintf (vect_dump, "Build SLP failed: unsupported "
535 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
541 /* Analyze costs (for the first stmt in the group). */
542 vect_model_load_cost (vinfo_for_stmt (stmt),
543 ncopies_for_cost, *node);
546 /* Store the place of this load in the interleaving chain. In
547 case that permutation is needed we later decide if a specific
548 permutation is supported. */
549 load_place = vect_get_place_in_interleaving_chain (stmt,
554 VEC_safe_push (int, heap, *load_permutation, load_place);
556 /* We stop the tree when we reach a group of loads. */
557 stop_recursion = true;
560 } /* Strided access. */
563 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
565 /* Not strided load. */
566 if (vect_print_dump_info (REPORT_SLP))
568 fprintf (vect_dump, "Build SLP failed: not strided load ");
569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 /* FORNOW: Not strided loads are not supported. */
576 /* Not memory operation. */
577 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
578 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
580 if (vect_print_dump_info (REPORT_SLP))
582 fprintf (vect_dump, "Build SLP failed: operation");
583 fprintf (vect_dump, " unsupported ");
584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
590 /* Find the def-stmts. */
591 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
592 &def_stmts0, &def_stmts1,
593 &first_stmt_dt0, &first_stmt_dt1,
594 &first_stmt_def0_type,
595 &first_stmt_def1_type,
596 &first_stmt_const_oprnd,
598 &pattern0, &pattern1))
603 /* Add the costs of the node to the overall instance costs. */
604 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
605 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
607 /* Strided loads were reached - stop the recursion. */
612 VEC_safe_push (slp_tree, heap, *loads, *node);
613 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
619 /* Create SLP_TREE nodes for the definition node/s. */
620 if (first_stmt_dt0 == vect_internal_def)
622 slp_tree left_node = XNEW (struct _slp_tree);
623 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
624 SLP_TREE_VEC_STMTS (left_node) = NULL;
625 SLP_TREE_LEFT (left_node) = NULL;
626 SLP_TREE_RIGHT (left_node) = NULL;
627 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
628 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
629 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
630 inside_cost, outside_cost, ncopies_for_cost,
631 max_nunits, load_permutation, loads,
632 vectorization_factor))
635 SLP_TREE_LEFT (*node) = left_node;
638 if (first_stmt_dt1 == vect_internal_def)
640 slp_tree right_node = XNEW (struct _slp_tree);
641 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
642 SLP_TREE_VEC_STMTS (right_node) = NULL;
643 SLP_TREE_LEFT (right_node) = NULL;
644 SLP_TREE_RIGHT (right_node) = NULL;
645 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
646 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
647 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
648 inside_cost, outside_cost, ncopies_for_cost,
649 max_nunits, load_permutation, loads,
650 vectorization_factor))
653 SLP_TREE_RIGHT (*node) = right_node;
661 vect_print_slp_tree (slp_tree node)
669 fprintf (vect_dump, "node ");
670 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
672 fprintf (vect_dump, "\n\tstmt %d ", i);
673 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
675 fprintf (vect_dump, "\n");
677 vect_print_slp_tree (SLP_TREE_LEFT (node));
678 vect_print_slp_tree (SLP_TREE_RIGHT (node));
682 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
683 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
684 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
685 stmts in NODE are to be marked. */
688 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
696 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
698 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
700 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
701 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
705 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
708 vect_mark_slp_stmts_relevant (slp_tree node)
712 stmt_vec_info stmt_info;
717 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
719 stmt_info = vinfo_for_stmt (stmt);
720 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
721 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
722 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
725 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
726 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
730 /* Check if the permutation required by the SLP INSTANCE is supported.
731 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
734 vect_supported_slp_permutation_p (slp_instance instance)
736 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
737 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
738 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
739 VEC (slp_tree, heap) *sorted_loads = NULL;
741 slp_tree *tmp_loads = NULL;
742 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
745 /* FORNOW: The only supported loads permutation is loads from the same
746 location in all the loads in the node, when the data-refs in
747 nodes of LOADS constitute an interleaving chain.
748 Sort the nodes according to the order of accesses in the chain. */
749 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
751 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
752 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
753 i += group_size, j++)
755 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
756 /* Check that the loads are all in the same interleaving chain. */
757 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
759 if (vect_print_dump_info (REPORT_DETAILS))
761 fprintf (vect_dump, "Build SLP failed: unsupported data "
763 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
770 tmp_loads[index] = load;
773 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
774 for (i = 0; i < group_size; i++)
775 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
777 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
778 SLP_INSTANCE_LOADS (instance) = sorted_loads;
781 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
782 SLP_INSTANCE_UNROLLING_FACTOR (instance),
790 /* Check if the required load permutation is supported.
791 LOAD_PERMUTATION contains a list of indices of the loads.
792 In SLP this permutation is relative to the order of strided stores that are
793 the base of the SLP instance. */
796 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
797 VEC (int, heap) *load_permutation)
799 int i = 0, j, prev = -1, next, k;
803 /* FORNOW: permutations are only supported in SLP. */
807 if (vect_print_dump_info (REPORT_SLP))
809 fprintf (vect_dump, "Load permutation ");
810 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
811 fprintf (vect_dump, "%d ", next);
814 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
815 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
817 if (VEC_length (int, load_permutation)
818 != (unsigned int) (group_size * group_size))
822 load_index = sbitmap_alloc (group_size);
823 sbitmap_zero (load_index);
824 for (j = 0; j < group_size; j++)
826 for (i = j * group_size, k = 0;
827 VEC_iterate (int, load_permutation, i, next) && k < group_size;
830 if (i != j * group_size && next != prev)
839 if (TEST_BIT (load_index, prev))
845 SET_BIT (load_index, prev);
848 for (j = 0; j < group_size; j++)
849 if (!TEST_BIT (load_index, j))
852 sbitmap_free (load_index);
854 if (supported && i == group_size * group_size
855 && vect_supported_slp_permutation_p (slp_instn))
862 /* Find the first load in the loop that belongs to INSTANCE.
863 When loads are in several SLP nodes, there can be a case in which the first
864 load does not appear in the first SLP node to be transformed, causing
865 incorrect order of statements. Since we generate all the loads together,
866 they must be inserted before the first load of the SLP instance and not
867 before the first load of the first node of the instance. */
869 vect_find_first_load_in_slp_instance (slp_instance instance)
873 gimple first_load = NULL, load;
876 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
879 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
881 first_load = get_earlier_stmt (load, first_load);
887 /* Analyze an SLP instance starting from a group of strided stores. Call
888 vect_build_slp_tree to build a tree of packed stmts if possible.
889 Return FALSE if it's impossible to SLP any stmt in the loop. */
892 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
895 slp_instance new_instance;
896 slp_tree node = XNEW (struct _slp_tree);
897 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
898 unsigned int unrolling_factor = 1, nunits;
899 tree vectype, scalar_type;
901 unsigned int vectorization_factor = 0;
902 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
903 unsigned int max_nunits = 0;
904 VEC (int, heap) *load_permutation;
905 VEC (slp_tree, heap) *loads;
907 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
908 vinfo_for_stmt (stmt))));
909 vectype = get_vectype_for_scalar_type (scalar_type);
912 if (vect_print_dump_info (REPORT_SLP))
914 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
915 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
920 nunits = TYPE_VECTOR_SUBPARTS (vectype);
922 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
924 /* No multitypes in BB SLP. */
925 vectorization_factor = nunits;
927 /* Calculate the unrolling factor. */
928 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
929 if (unrolling_factor != 1 && !loop_vinfo)
931 if (vect_print_dump_info (REPORT_SLP))
932 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
938 /* Create a node (a root of the SLP tree) for the packed strided stores. */
939 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
941 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
944 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
945 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
948 SLP_TREE_VEC_STMTS (node) = NULL;
949 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
950 SLP_TREE_LEFT (node) = NULL;
951 SLP_TREE_RIGHT (node) = NULL;
952 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
953 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
955 /* Calculate the number of vector stmts to create based on the unrolling
956 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
957 GROUP_SIZE / NUNITS otherwise. */
958 ncopies_for_cost = unrolling_factor * group_size / nunits;
960 load_permutation = VEC_alloc (int, heap, group_size * group_size);
961 loads = VEC_alloc (slp_tree, heap, group_size);
963 /* Build the tree for the SLP instance. */
964 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
965 &inside_cost, &outside_cost, ncopies_for_cost,
966 &max_nunits, &load_permutation, &loads,
967 vectorization_factor))
969 /* Create a new SLP instance. */
970 new_instance = XNEW (struct _slp_instance);
971 SLP_INSTANCE_TREE (new_instance) = node;
972 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
973 /* Calculate the unrolling factor based on the smallest type in the
975 if (max_nunits > nunits)
976 unrolling_factor = least_common_multiple (max_nunits, group_size)
979 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
980 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
981 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
982 SLP_INSTANCE_LOADS (new_instance) = loads;
983 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
984 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
985 if (VEC_length (slp_tree, loads))
987 if (!vect_supported_load_permutation_p (new_instance, group_size,
990 if (vect_print_dump_info (REPORT_SLP))
992 fprintf (vect_dump, "Build SLP failed: unsupported load "
994 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
997 vect_free_slp_instance (new_instance);
1001 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1002 = vect_find_first_load_in_slp_instance (new_instance);
1005 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1008 VEC_safe_push (slp_instance, heap,
1009 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1012 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1015 if (vect_print_dump_info (REPORT_SLP))
1016 vect_print_slp_tree (node);
1021 /* Failed to SLP. */
1022 /* Free the allocated memory. */
1023 vect_free_slp_tree (node);
1024 VEC_free (int, heap, load_permutation);
1025 VEC_free (slp_tree, heap, loads);
1031 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1032 trees of packed scalar stmts if SLP is possible. */
1035 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1038 VEC (gimple, heap) *strided_stores;
1042 if (vect_print_dump_info (REPORT_SLP))
1043 fprintf (vect_dump, "=== vect_analyze_slp ===");
1046 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1048 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1050 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1051 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1054 if (bb_vinfo && !ok)
1056 if (vect_print_dump_info (REPORT_SLP))
1057 fprintf (vect_dump, "Failed to SLP the basic block.");
1066 /* For each possible SLP instance decide whether to SLP it and calculate overall
1067 unrolling factor needed to SLP the loop. */
1070 vect_make_slp_decision (loop_vec_info loop_vinfo)
1072 unsigned int i, unrolling_factor = 1;
1073 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1074 slp_instance instance;
1075 int decided_to_slp = 0;
1077 if (vect_print_dump_info (REPORT_SLP))
1078 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1080 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1082 /* FORNOW: SLP if you can. */
1083 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1084 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1086 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1087 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1088 loop-based vectorization. Such stmts will be marked as HYBRID. */
1089 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1093 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1095 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1096 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1097 decided_to_slp, unrolling_factor);
1101 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1102 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1105 vect_detect_hybrid_slp_stmts (slp_tree node)
1109 imm_use_iterator imm_iter;
1111 stmt_vec_info stmt_vinfo;
1116 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1117 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1118 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1119 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1120 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1121 && !STMT_SLP_TYPE (stmt_vinfo)
1122 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1123 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
1124 vect_mark_slp_stmts (node, hybrid, i);
1126 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1127 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1131 /* Find stmts that must be both vectorized and SLPed. */
1134 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1137 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1138 slp_instance instance;
1140 if (vect_print_dump_info (REPORT_SLP))
1141 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1143 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1144 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1148 /* Create and initialize a new bb_vec_info struct for BB, as well as
1149 stmt_vec_info structs for all the stmts in it. */
1152 new_bb_vec_info (basic_block bb)
1154 bb_vec_info res = NULL;
1155 gimple_stmt_iterator gsi;
1157 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1158 BB_VINFO_BB (res) = bb;
1160 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1162 gimple stmt = gsi_stmt (gsi);
1163 gimple_set_uid (stmt, 0);
1164 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1167 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1168 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1175 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1176 stmts in the basic block. */
1179 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1182 gimple_stmt_iterator si;
1187 bb = BB_VINFO_BB (bb_vinfo);
1189 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1191 gimple stmt = gsi_stmt (si);
1192 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1195 /* Free stmt_vec_info. */
1196 free_stmt_vec_info (stmt);
1199 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1200 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1206 /* Analyze statements contained in SLP tree node after recursively analyzing
1207 the subtree. Return TRUE if the operations are supported. */
1210 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1219 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1220 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1223 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1226 gcc_assert (stmt_info);
1227 gcc_assert (PURE_SLP_STMT (stmt_info));
1229 if (!vect_analyze_stmt (stmt, &dummy, node))
1237 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1238 operations are supported. */
1241 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1243 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1244 slp_instance instance;
1247 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1249 if (!vect_slp_analyze_node_operations (bb_vinfo,
1250 SLP_INSTANCE_TREE (instance)))
1252 vect_free_slp_instance (instance);
1253 VEC_ordered_remove (slp_instance, slp_instances, i);
1259 if (!VEC_length (slp_instance, slp_instances))
1266 /* Cheick if the basic block can be vectorized. */
1269 vect_slp_analyze_bb (basic_block bb)
1271 bb_vec_info bb_vinfo;
1272 VEC (ddr_p, heap) *ddrs;
1273 VEC (slp_instance, heap) *slp_instances;
1274 slp_instance instance;
1276 gimple_stmt_iterator gsi;
1278 int max_vf = MAX_VECTORIZATION_FACTOR;
1280 if (vect_print_dump_info (REPORT_DETAILS))
1281 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1283 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1285 gimple stmt = gsi_stmt (gsi);
1286 if (!is_gimple_debug (stmt)
1287 && !gimple_nop_p (stmt)
1288 && gimple_code (stmt) != GIMPLE_LABEL)
1292 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1294 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1295 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1301 bb_vinfo = new_bb_vec_info (bb);
1305 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1307 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1308 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1311 destroy_bb_vec_info (bb_vinfo);
1315 ddrs = BB_VINFO_DDRS (bb_vinfo);
1316 if (!VEC_length (ddr_p, ddrs))
1318 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1319 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1322 destroy_bb_vec_info (bb_vinfo);
1326 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1329 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1330 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1331 "in basic block.\n");
1333 destroy_bb_vec_info (bb_vinfo);
1337 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1339 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1340 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1343 destroy_bb_vec_info (bb_vinfo);
1347 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1349 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1350 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1353 destroy_bb_vec_info (bb_vinfo);
1357 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1359 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1360 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1363 destroy_bb_vec_info (bb_vinfo);
1367 /* Check the SLP opportunities in the basic block, analyze and build SLP
1369 if (!vect_analyze_slp (NULL, bb_vinfo))
1371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1372 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1373 "in basic block.\n");
1375 destroy_bb_vec_info (bb_vinfo);
1379 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1381 /* Mark all the statements that we want to vectorize as pure SLP and
1383 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1385 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1386 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1389 if (!vect_slp_analyze_operations (bb_vinfo))
1391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1392 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1394 destroy_bb_vec_info (bb_vinfo);
1398 if (vect_print_dump_info (REPORT_DETAILS))
1399 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1405 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1406 the number of created vector stmts depends on the unrolling factor). However,
1407 the actual number of vector stmts for every SLP node depends on VF which is
1408 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1409 In this function we assume that the inside costs calculated in
1410 vect_model_xxx_cost are linear in ncopies. */
1413 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1415 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1416 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1417 slp_instance instance;
1419 if (vect_print_dump_info (REPORT_SLP))
1420 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1422 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1423 /* We assume that costs are linear in ncopies. */
1424 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1425 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1429 /* For constant and loop invariant defs of SLP_NODE this function returns
1430 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1431 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1432 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1435 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1436 unsigned int op_num, unsigned int number_of_vectors)
1438 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1439 gimple stmt = VEC_index (gimple, stmts, 0);
1440 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1444 int j, number_of_places_left_in_vector;
1447 int group_size = VEC_length (gimple, stmts);
1448 unsigned int vec_num, i;
1449 int number_of_copies = 1;
1450 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1451 bool constant_p, is_store;
1453 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1456 op = gimple_assign_rhs1 (stmt);
1461 op = gimple_op (stmt, op_num + 1);
1464 if (CONSTANT_CLASS_P (op))
1469 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1470 gcc_assert (vector_type);
1472 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1474 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1475 created vectors. It is greater than 1 if unrolling is performed.
1477 For example, we have two scalar operands, s1 and s2 (e.g., group of
1478 strided accesses of size two), while NUNITS is four (i.e., four scalars
1479 of this type can be packed in a vector). The output vector will contain
1480 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1483 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1484 containing the operands.
1486 For example, NUNITS is four as before, and the group size is 8
1487 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1488 {s5, s6, s7, s8}. */
1490 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1492 number_of_places_left_in_vector = nunits;
1493 for (j = 0; j < number_of_copies; j++)
1495 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1498 op = gimple_assign_rhs1 (stmt);
1500 op = gimple_op (stmt, op_num + 1);
1502 /* Create 'vect_ = {op0,op1,...,opn}'. */
1503 t = tree_cons (NULL_TREE, op, t);
1505 number_of_places_left_in_vector--;
1507 if (number_of_places_left_in_vector == 0)
1509 number_of_places_left_in_vector = nunits;
1512 vec_cst = build_vector (vector_type, t);
1514 vec_cst = build_constructor_from_list (vector_type, t);
1515 VEC_quick_push (tree, voprnds,
1516 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1522 /* Since the vectors are created in the reverse order, we should invert
1524 vec_num = VEC_length (tree, voprnds);
1525 for (j = vec_num - 1; j >= 0; j--)
1527 vop = VEC_index (tree, voprnds, j);
1528 VEC_quick_push (tree, *vec_oprnds, vop);
1531 VEC_free (tree, heap, voprnds);
1533 /* In case that VF is greater than the unrolling factor needed for the SLP
1534 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1535 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1536 to replicate the vectors. */
1537 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1539 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1540 VEC_quick_push (tree, *vec_oprnds, vop);
1545 /* Get vectorized definitions from SLP_NODE that contains corresponding
1546 vectorized def-stmts. */
1549 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1552 gimple vec_def_stmt;
1555 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1558 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1561 gcc_assert (vec_def_stmt);
1562 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1563 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1568 /* Get vectorized definitions for SLP_NODE.
1569 If the scalar definitions are loop invariants or constants, collect them and
1570 call vect_get_constant_vectors() to create vector stmts.
1571 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1572 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1573 vect_get_slp_vect_defs() to retrieve them.
1574 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1575 the right node. This is used when the second operand must remain scalar. */
1578 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1579 VEC (tree,heap) **vec_oprnds1)
1582 enum tree_code code;
1583 int number_of_vects;
1584 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1586 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1587 /* The number of vector defs is determined by the number of vector statements
1588 in the node from which we get those statements. */
1589 if (SLP_TREE_LEFT (slp_node))
1590 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1593 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1594 /* Number of vector stmts was calculated according to LHS in
1595 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1596 necessary. See vect_get_smallest_scalar_type() for details. */
1597 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1599 if (rhs_size_unit != lhs_size_unit)
1601 number_of_vects *= rhs_size_unit;
1602 number_of_vects /= lhs_size_unit;
1606 /* Allocate memory for vectorized defs. */
1607 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1609 /* SLP_NODE corresponds either to a group of stores or to a group of
1610 unary/binary operations. We don't call this function for loads. */
1611 if (SLP_TREE_LEFT (slp_node))
1612 /* The defs are already vectorized. */
1613 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1615 /* Build vectors from scalar defs. */
1616 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1618 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1619 /* Since we don't call this function with loads, this is a group of
1623 code = gimple_assign_rhs_code (first_stmt);
1624 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1627 /* The number of vector defs is determined by the number of vector statements
1628 in the node from which we get those statements. */
1629 if (SLP_TREE_RIGHT (slp_node))
1630 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1632 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1634 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1636 if (SLP_TREE_RIGHT (slp_node))
1637 /* The defs are already vectorized. */
1638 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1640 /* Build vectors from scalar defs. */
1641 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1645 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1646 building a vector of type MASK_TYPE from it) and two input vectors placed in
1647 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1648 shifting by STRIDE elements of DR_CHAIN for every copy.
1649 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1651 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1652 the created stmts must be inserted. */
1655 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1656 tree mask, int first_vec_indx, int second_vec_indx,
1657 gimple_stmt_iterator *gsi, slp_tree node,
1658 tree builtin_decl, tree vectype,
1659 VEC(tree,heap) *dr_chain,
1660 int ncopies, int vect_stmts_counter)
1663 gimple perm_stmt = NULL;
1664 stmt_vec_info next_stmt_info;
1666 tree first_vec, second_vec, data_ref;
1667 VEC (tree, heap) *params = NULL;
1669 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1671 /* Initialize the vect stmts of NODE to properly insert the generated
1673 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1674 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1675 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1677 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1678 for (i = 0; i < ncopies; i++)
1680 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1681 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1683 /* Build argument list for the vectorized call. */
1684 VEC_free (tree, heap, params);
1685 params = VEC_alloc (tree, heap, 3);
1686 VEC_quick_push (tree, params, first_vec);
1687 VEC_quick_push (tree, params, second_vec);
1688 VEC_quick_push (tree, params, mask);
1690 /* Generate the permute statement. */
1691 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1692 data_ref = make_ssa_name (perm_dest, perm_stmt);
1693 gimple_call_set_lhs (perm_stmt, data_ref);
1694 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1696 /* Store the vector statement in NODE. */
1697 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1698 stride * i + vect_stmts_counter, perm_stmt);
1700 first_vec_indx += stride;
1701 second_vec_indx += stride;
1704 /* Mark the scalar stmt as vectorized. */
1705 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1706 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1710 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1711 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1712 representation. Check that the mask is valid and return FALSE if not.
1713 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1714 the next vector, i.e., the current first vector is not needed. */
1717 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1718 int mask_nunits, bool only_one_vec, int index,
1719 int *mask, int *current_mask_element,
1720 bool *need_next_vector)
1723 static int number_of_mask_fixes = 1;
1724 static bool mask_fixed = false;
1725 static bool needs_first_vector = false;
1727 /* Convert to target specific representation. */
1728 *current_mask_element = first_mask_element + m;
1729 /* Adjust the value in case it's a mask for second and third vectors. */
1730 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1732 if (*current_mask_element < mask_nunits)
1733 needs_first_vector = true;
1735 /* We have only one input vector to permute but the mask accesses values in
1736 the next vector as well. */
1737 if (only_one_vec && *current_mask_element >= mask_nunits)
1739 if (vect_print_dump_info (REPORT_DETAILS))
1741 fprintf (vect_dump, "permutation requires at least two vectors ");
1742 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1748 /* The mask requires the next vector. */
1749 if (*current_mask_element >= mask_nunits * 2)
1751 if (needs_first_vector || mask_fixed)
1753 /* We either need the first vector too or have already moved to the
1754 next vector. In both cases, this permutation needs three
1756 if (vect_print_dump_info (REPORT_DETAILS))
1758 fprintf (vect_dump, "permutation requires at "
1759 "least three vectors ");
1760 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1766 /* We move to the next vector, dropping the first one and working with
1767 the second and the third - we need to adjust the values of the mask
1769 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1771 for (i = 0; i < index; i++)
1772 mask[i] -= mask_nunits * number_of_mask_fixes;
1774 (number_of_mask_fixes)++;
1778 *need_next_vector = mask_fixed;
1780 /* This was the last element of this mask. Start a new one. */
1781 if (index == mask_nunits - 1)
1783 number_of_mask_fixes = 1;
1785 needs_first_vector = false;
1792 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1793 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1794 permute statements for SLP_NODE_INSTANCE. */
1796 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1797 gimple_stmt_iterator *gsi, int vf,
1798 slp_instance slp_node_instance, bool analyze_only)
1800 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1801 tree mask_element_type = NULL_TREE, mask_type;
1802 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1804 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1805 gimple next_scalar_stmt;
1806 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1807 int first_mask_element;
1808 int index, unroll_factor, *mask, current_mask_element, ncopies;
1809 bool only_one_vec = false, need_next_vector = false;
1810 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1812 if (!targetm.vectorize.builtin_vec_perm)
1814 if (vect_print_dump_info (REPORT_DETAILS))
1816 fprintf (vect_dump, "no builtin for vect permute for ");
1817 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1823 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1824 &mask_element_type);
1825 if (!builtin_decl || !mask_element_type)
1827 if (vect_print_dump_info (REPORT_DETAILS))
1829 fprintf (vect_dump, "no builtin for vect permute for ");
1830 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1836 mask_type = get_vectype_for_scalar_type (mask_element_type);
1837 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1838 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1839 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1840 scale = mask_nunits / nunits;
1841 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1843 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1844 unrolling factor. */
1845 orig_vec_stmts_num = group_size *
1846 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1847 if (orig_vec_stmts_num == 1)
1848 only_one_vec = true;
1850 /* Number of copies is determined by the final vectorization factor
1851 relatively to SLP_NODE_INSTANCE unrolling factor. */
1852 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1854 /* Generate permutation masks for every NODE. Number of masks for each NODE
1855 is equal to GROUP_SIZE.
1856 E.g., we have a group of three nodes with three loads from the same
1857 location in each node, and the vector size is 4. I.e., we have a
1858 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1859 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1860 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1863 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1864 scpecific type, e.g., in bytes for Altivec.
1865 The last mask is illegal since we assume two operands for permute
1866 operation, and the mask element values can't be outside that range. Hence,
1867 the last mask must be converted into {2,5,5,5}.
1868 For the first two permutations we need the first and the second input
1869 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1870 we need the second and the third vectors: {b1,c1,a2,b2} and
1874 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1880 vect_stmts_counter = 0;
1882 first_vec_index = vec_index++;
1884 second_vec_index = first_vec_index;
1886 second_vec_index = vec_index++;
1888 for (j = 0; j < unroll_factor; j++)
1890 for (k = 0; k < group_size; k++)
1892 first_mask_element = (i + j * group_size) * scale;
1893 for (m = 0; m < scale; m++)
1895 if (!vect_get_mask_element (stmt, first_mask_element, m,
1896 mask_nunits, only_one_vec, index, mask,
1897 ¤t_mask_element, &need_next_vector))
1900 mask[index++] = current_mask_element;
1903 if (index == mask_nunits)
1905 tree mask_vec = NULL;
1907 while (--index >= 0)
1909 tree t = build_int_cst (mask_element_type, mask[index]);
1910 mask_vec = tree_cons (NULL, t, mask_vec);
1912 mask_vec = build_vector (mask_type, mask_vec);
1915 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
1918 if (vect_print_dump_info (REPORT_DETAILS))
1920 fprintf (vect_dump, "unsupported vect permute ");
1921 print_generic_expr (vect_dump, mask_vec, 0);
1929 if (need_next_vector)
1931 first_vec_index = second_vec_index;
1932 second_vec_index = vec_index;
1935 next_scalar_stmt = VEC_index (gimple,
1936 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1938 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1939 mask_vec, first_vec_index, second_vec_index,
1940 gsi, node, builtin_decl, vectype, dr_chain,
1941 ncopies, vect_stmts_counter++);
1954 /* Vectorize SLP instance tree in postorder. */
1957 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1958 unsigned int vectorization_factor)
1961 bool strided_store, is_store;
1962 gimple_stmt_iterator si;
1963 stmt_vec_info stmt_info;
1964 unsigned int vec_stmts_size, nunits, group_size;
1967 slp_tree loads_node;
1972 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1973 vectorization_factor);
1974 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1975 vectorization_factor);
1977 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1978 stmt_info = vinfo_for_stmt (stmt);
1980 /* VECTYPE is the type of the destination. */
1981 vectype = STMT_VINFO_VECTYPE (stmt_info);
1982 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1983 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1985 /* For each SLP instance calculate number of vector stmts to be created
1986 for the scalar stmts in each node of the SLP tree. Number of vector
1987 elements in one vector iteration is the number of scalar elements in
1988 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1990 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1992 /* In case of load permutation we have to allocate vectorized statements for
1993 all the nodes that participate in that permutation. */
1994 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1997 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2000 if (!SLP_TREE_VEC_STMTS (loads_node))
2002 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2004 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2009 if (!SLP_TREE_VEC_STMTS (node))
2011 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2012 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2015 if (vect_print_dump_info (REPORT_DETAILS))
2017 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2018 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2021 /* Loads should be inserted before the first load. */
2022 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2023 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2024 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2025 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2027 si = gsi_for_stmt (stmt);
2029 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2032 if (DR_GROUP_FIRST_DR (stmt_info))
2033 /* If IS_STORE is TRUE, the vectorization of the
2034 interleaving chain was completed - free all the stores in
2036 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2038 /* FORNOW: SLP originates only from strided stores. */
2044 /* FORNOW: SLP originates only from strided stores. */
2050 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2052 VEC (slp_instance, heap) *slp_instances;
2053 slp_instance instance;
2055 bool is_store = false;
2059 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2060 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2064 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2068 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2070 /* Schedule the tree of INSTANCE. */
2071 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2073 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2074 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2075 fprintf (vect_dump, "vectorizing stmts using SLP.");
2082 /* Vectorize the basic block. */
2085 vect_slp_transform_bb (basic_block bb)
2087 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2088 gimple_stmt_iterator si;
2090 gcc_assert (bb_vinfo);
2092 if (vect_print_dump_info (REPORT_DETAILS))
2093 fprintf (vect_dump, "SLPing BB\n");
2095 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2097 gimple stmt = gsi_stmt (si);
2098 stmt_vec_info stmt_info;
2100 if (vect_print_dump_info (REPORT_DETAILS))
2102 fprintf (vect_dump, "------>SLPing statement: ");
2103 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2106 stmt_info = vinfo_for_stmt (stmt);
2107 gcc_assert (stmt_info);
2109 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2110 if (STMT_SLP_TYPE (stmt_info))
2112 vect_schedule_slp (NULL, bb_vinfo);
2117 mark_sym_for_renaming (gimple_vop (cfun));
2118 /* The memory tags and pointers in vectorized statements need to
2119 have their SSA forms updated. FIXME, why can't this be delayed
2120 until all the loops have been transformed? */
2121 update_ssa (TODO_update_ssa);
2123 if (vect_print_dump_info (REPORT_DETAILS))
2124 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2126 destroy_bb_vec_info (bb_vinfo);