1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
45 find_bb_location (basic_block bb)
48 gimple_stmt_iterator si;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 vect_free_slp_tree (slp_tree node)
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
87 /* Free the memory allocated for the SLP instance. */
90 vect_free_slp_instance (slp_instance instance)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
116 unsigned int i, number_of_oprnds;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131 for (i = 0; i < number_of_oprnds; i++)
133 oprnd = gimple_op (stmt, i + 1);
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 || (!def_stmt && dt[i] != vect_constant_def))
139 if (vect_print_dump_info (REPORT_SLP))
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 if (!*first_stmt_dt0)
160 if (i == 1 && !*first_stmt_dt1)
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178 if (*dt == vect_unknown_def_type)
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
185 switch (gimple_code (def_stmt))
188 def = gimple_phi_result (def_stmt);
192 def = gimple_assign_lhs (def_stmt);
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
202 if (!*first_stmt_dt0)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
207 *first_stmt_def0_type = TREE_TYPE (def);
209 *first_stmt_const_oprnd = oprnd;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
222 if (!*first_stmt_dt1 && i == 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
227 *first_stmt_def1_type = TREE_TYPE (def);
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
239 *first_stmt_const_oprnd = oprnd;
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && !types_compatible_p (*first_stmt_def0_type,
252 && (*first_stmt_dt1 != dt[i]
253 || (*first_stmt_def1_type && def
254 && !types_compatible_p (*first_stmt_def1_type,
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
260 if (vect_print_dump_info (REPORT_SLP))
261 fprintf (vect_dump, "Build SLP failed: different types ");
268 /* Check the types of the definitions. */
271 case vect_constant_def:
272 case vect_external_def:
275 case vect_internal_def:
277 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
279 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
283 /* FORNOW: Not supported. */
284 if (vect_print_dump_info (REPORT_SLP))
286 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
287 print_generic_expr (vect_dump, def, TDF_SLIM);
298 /* Recursively build an SLP tree starting from NODE.
299 Fail (and return FALSE) if def-stmts are not isomorphic, require data
300 permutation or are of unsupported types of operation. Otherwise, return
304 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
305 slp_tree *node, unsigned int group_size,
306 int *inside_cost, int *outside_cost,
307 int ncopies_for_cost, unsigned int *max_nunits,
308 VEC (int, heap) **load_permutation,
309 VEC (slp_tree, heap) **loads,
310 unsigned int vectorization_factor)
312 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
313 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
316 gimple stmt = VEC_index (gimple, stmts, 0);
317 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
318 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
319 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
320 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
322 bool stop_recursion = false, need_same_oprnds = false;
323 tree vectype, scalar_type, first_op1 = NULL_TREE;
324 unsigned int ncopies;
327 enum machine_mode optab_op2_mode;
328 enum machine_mode vec_mode;
329 tree first_stmt_const_oprnd = NULL_TREE;
330 struct data_reference *first_dr;
331 bool pattern0 = false, pattern1 = false;
333 bool permutation = false;
334 unsigned int load_place;
337 /* For every stmt in NODE find its def stmt/s. */
338 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
340 if (vect_print_dump_info (REPORT_SLP))
342 fprintf (vect_dump, "Build SLP for ");
343 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
346 lhs = gimple_get_lhs (stmt);
347 if (lhs == NULL_TREE)
349 if (vect_print_dump_info (REPORT_SLP))
352 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
353 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
359 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
360 vectype = get_vectype_for_scalar_type (scalar_type);
363 if (vect_print_dump_info (REPORT_SLP))
365 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
366 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
371 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
374 if (vect_print_dump_info (REPORT_SLP))
375 fprintf (vect_dump, "SLP with multiple types ");
377 /* FORNOW: multiple types are unsupported in BB SLP. */
382 /* In case of multiple types we need to detect the smallest type. */
383 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
384 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
386 if (is_gimple_call (stmt))
387 rhs_code = CALL_EXPR;
389 rhs_code = gimple_assign_rhs_code (stmt);
391 /* Check the operation. */
394 first_stmt_code = rhs_code;
396 /* Shift arguments should be equal in all the packed stmts for a
397 vector shift with scalar shift operand. */
398 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
399 || rhs_code == LROTATE_EXPR
400 || rhs_code == RROTATE_EXPR)
402 vec_mode = TYPE_MODE (vectype);
404 /* First see if we have a vector/vector shift. */
405 optab = optab_for_tree_code (rhs_code, vectype,
409 || (optab->handlers[(int) vec_mode].insn_code
410 == CODE_FOR_nothing))
412 /* No vector/vector shift, try for a vector/scalar shift. */
413 optab = optab_for_tree_code (rhs_code, vectype,
418 if (vect_print_dump_info (REPORT_SLP))
419 fprintf (vect_dump, "Build SLP failed: no optab.");
422 icode = (int) optab->handlers[(int) vec_mode].insn_code;
423 if (icode == CODE_FOR_nothing)
425 if (vect_print_dump_info (REPORT_SLP))
426 fprintf (vect_dump, "Build SLP failed: "
427 "op not supported by target.");
430 optab_op2_mode = insn_data[icode].operand[2].mode;
431 if (!VECTOR_MODE_P (optab_op2_mode))
433 need_same_oprnds = true;
434 first_op1 = gimple_assign_rhs2 (stmt);
441 if (first_stmt_code != rhs_code
442 && (first_stmt_code != IMAGPART_EXPR
443 || rhs_code != REALPART_EXPR)
444 && (first_stmt_code != REALPART_EXPR
445 || rhs_code != IMAGPART_EXPR))
447 if (vect_print_dump_info (REPORT_SLP))
450 "Build SLP failed: different operation in stmt ");
451 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
458 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
460 if (vect_print_dump_info (REPORT_SLP))
463 "Build SLP failed: different shift arguments in ");
464 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
471 /* Strided store or load. */
472 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
474 if (REFERENCE_CLASS_P (lhs))
477 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
478 stmt, &def_stmts0, &def_stmts1,
481 &first_stmt_def0_type,
482 &first_stmt_def1_type,
483 &first_stmt_const_oprnd,
485 &pattern0, &pattern1))
491 /* FORNOW: Check that there is no gap between the loads. */
492 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
494 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
495 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
497 if (vect_print_dump_info (REPORT_SLP))
499 fprintf (vect_dump, "Build SLP failed: strided "
501 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
507 /* Check that the size of interleaved loads group is not
508 greater than the SLP group size. */
509 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
510 > ncopies * group_size)
512 if (vect_print_dump_info (REPORT_SLP))
514 fprintf (vect_dump, "Build SLP failed: the number of "
515 "interleaved loads is greater than"
516 " the SLP group size ");
517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
525 if (first_load == stmt)
527 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
528 if (vect_supportable_dr_alignment (first_dr)
529 == dr_unaligned_unsupported)
531 if (vect_print_dump_info (REPORT_SLP))
533 fprintf (vect_dump, "Build SLP failed: unsupported "
535 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
541 /* Analyze costs (for the first stmt in the group). */
542 vect_model_load_cost (vinfo_for_stmt (stmt),
543 ncopies_for_cost, *node);
546 /* Store the place of this load in the interleaving chain. In
547 case that permutation is needed we later decide if a specific
548 permutation is supported. */
549 load_place = vect_get_place_in_interleaving_chain (stmt,
554 VEC_safe_push (int, heap, *load_permutation, load_place);
556 /* We stop the tree when we reach a group of loads. */
557 stop_recursion = true;
560 } /* Strided access. */
563 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
565 /* Not strided load. */
566 if (vect_print_dump_info (REPORT_SLP))
568 fprintf (vect_dump, "Build SLP failed: not strided load ");
569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 /* FORNOW: Not strided loads are not supported. */
576 /* Not memory operation. */
577 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
578 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
580 if (vect_print_dump_info (REPORT_SLP))
582 fprintf (vect_dump, "Build SLP failed: operation");
583 fprintf (vect_dump, " unsupported ");
584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
590 /* Find the def-stmts. */
591 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
592 &def_stmts0, &def_stmts1,
593 &first_stmt_dt0, &first_stmt_dt1,
594 &first_stmt_def0_type,
595 &first_stmt_def1_type,
596 &first_stmt_const_oprnd,
598 &pattern0, &pattern1))
603 /* Add the costs of the node to the overall instance costs. */
604 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
605 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
607 /* Strided loads were reached - stop the recursion. */
612 VEC_safe_push (slp_tree, heap, *loads, *node);
613 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
619 /* Create SLP_TREE nodes for the definition node/s. */
620 if (first_stmt_dt0 == vect_internal_def)
622 slp_tree left_node = XNEW (struct _slp_tree);
623 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
624 SLP_TREE_VEC_STMTS (left_node) = NULL;
625 SLP_TREE_LEFT (left_node) = NULL;
626 SLP_TREE_RIGHT (left_node) = NULL;
627 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
628 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
629 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
630 inside_cost, outside_cost, ncopies_for_cost,
631 max_nunits, load_permutation, loads,
632 vectorization_factor))
635 SLP_TREE_LEFT (*node) = left_node;
638 if (first_stmt_dt1 == vect_internal_def)
640 slp_tree right_node = XNEW (struct _slp_tree);
641 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
642 SLP_TREE_VEC_STMTS (right_node) = NULL;
643 SLP_TREE_LEFT (right_node) = NULL;
644 SLP_TREE_RIGHT (right_node) = NULL;
645 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
646 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
647 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
648 inside_cost, outside_cost, ncopies_for_cost,
649 max_nunits, load_permutation, loads,
650 vectorization_factor))
653 SLP_TREE_RIGHT (*node) = right_node;
661 vect_print_slp_tree (slp_tree node)
669 fprintf (vect_dump, "node ");
670 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
672 fprintf (vect_dump, "\n\tstmt %d ", i);
673 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
675 fprintf (vect_dump, "\n");
677 vect_print_slp_tree (SLP_TREE_LEFT (node));
678 vect_print_slp_tree (SLP_TREE_RIGHT (node));
682 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
683 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
684 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
685 stmts in NODE are to be marked. */
688 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
696 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
698 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
700 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
701 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
705 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
708 vect_mark_slp_stmts_relevant (slp_tree node)
712 stmt_vec_info stmt_info;
717 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
719 stmt_info = vinfo_for_stmt (stmt);
720 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
721 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
722 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
725 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
726 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
730 /* Check if the permutation required by the SLP INSTANCE is supported.
731 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
734 vect_supported_slp_permutation_p (slp_instance instance)
736 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
737 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
738 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
739 VEC (slp_tree, heap) *sorted_loads = NULL;
741 slp_tree *tmp_loads = NULL;
742 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
745 /* FORNOW: The only supported loads permutation is loads from the same
746 location in all the loads in the node, when the data-refs in
747 nodes of LOADS constitute an interleaving chain.
748 Sort the nodes according to the order of accesses in the chain. */
749 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
751 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
752 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
753 i += group_size, j++)
755 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
756 /* Check that the loads are all in the same interleaving chain. */
757 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
759 if (vect_print_dump_info (REPORT_DETAILS))
761 fprintf (vect_dump, "Build SLP failed: unsupported data "
763 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
770 tmp_loads[index] = load;
773 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
774 for (i = 0; i < group_size; i++)
775 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
777 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
778 SLP_INSTANCE_LOADS (instance) = sorted_loads;
781 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
782 SLP_INSTANCE_UNROLLING_FACTOR (instance),
790 /* Check if the required load permutation is supported.
791 LOAD_PERMUTATION contains a list of indices of the loads.
792 In SLP this permutation is relative to the order of strided stores that are
793 the base of the SLP instance. */
796 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
797 VEC (int, heap) *load_permutation)
799 int i = 0, j, prev = -1, next, k;
803 /* FORNOW: permutations are only supported in SLP. */
807 if (vect_print_dump_info (REPORT_SLP))
809 fprintf (vect_dump, "Load permutation ");
810 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
811 fprintf (vect_dump, "%d ", next);
814 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
815 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
817 if (VEC_length (int, load_permutation)
818 != (unsigned int) (group_size * group_size))
822 load_index = sbitmap_alloc (group_size);
823 sbitmap_zero (load_index);
824 for (j = 0; j < group_size; j++)
826 for (i = j * group_size, k = 0;
827 VEC_iterate (int, load_permutation, i, next) && k < group_size;
830 if (i != j * group_size && next != prev)
839 if (TEST_BIT (load_index, prev))
845 SET_BIT (load_index, prev);
848 sbitmap_free (load_index);
850 if (supported && i == group_size * group_size
851 && vect_supported_slp_permutation_p (slp_instn))
858 /* Find the first load in the loop that belongs to INSTANCE.
859 When loads are in several SLP nodes, there can be a case in which the first
860 load does not appear in the first SLP node to be transformed, causing
861 incorrect order of statements. Since we generate all the loads together,
862 they must be inserted before the first load of the SLP instance and not
863 before the first load of the first node of the instance. */
865 vect_find_first_load_in_slp_instance (slp_instance instance)
869 gimple first_load = NULL, load;
872 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
875 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
877 first_load = get_earlier_stmt (load, first_load);
883 /* Analyze an SLP instance starting from a group of strided stores. Call
884 vect_build_slp_tree to build a tree of packed stmts if possible.
885 Return FALSE if it's impossible to SLP any stmt in the loop. */
888 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
891 slp_instance new_instance;
892 slp_tree node = XNEW (struct _slp_tree);
893 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
894 unsigned int unrolling_factor = 1, nunits;
895 tree vectype, scalar_type;
897 unsigned int vectorization_factor = 0;
898 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
899 unsigned int max_nunits = 0;
900 VEC (int, heap) *load_permutation;
901 VEC (slp_tree, heap) *loads;
903 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
904 vinfo_for_stmt (stmt))));
905 vectype = get_vectype_for_scalar_type (scalar_type);
908 if (vect_print_dump_info (REPORT_SLP))
910 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
911 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
916 nunits = TYPE_VECTOR_SUBPARTS (vectype);
918 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
920 /* No multitypes in BB SLP. */
921 vectorization_factor = nunits;
923 /* Calculate the unrolling factor. */
924 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
925 if (unrolling_factor != 1 && !loop_vinfo)
927 if (vect_print_dump_info (REPORT_SLP))
928 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
934 /* Create a node (a root of the SLP tree) for the packed strided stores. */
935 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
937 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
940 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
941 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
944 SLP_TREE_VEC_STMTS (node) = NULL;
945 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
946 SLP_TREE_LEFT (node) = NULL;
947 SLP_TREE_RIGHT (node) = NULL;
948 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
949 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
951 /* Calculate the number of vector stmts to create based on the unrolling
952 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
953 GROUP_SIZE / NUNITS otherwise. */
954 ncopies_for_cost = unrolling_factor * group_size / nunits;
956 load_permutation = VEC_alloc (int, heap, group_size * group_size);
957 loads = VEC_alloc (slp_tree, heap, group_size);
959 /* Build the tree for the SLP instance. */
960 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
961 &inside_cost, &outside_cost, ncopies_for_cost,
962 &max_nunits, &load_permutation, &loads,
963 vectorization_factor))
965 /* Create a new SLP instance. */
966 new_instance = XNEW (struct _slp_instance);
967 SLP_INSTANCE_TREE (new_instance) = node;
968 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
969 /* Calculate the unrolling factor based on the smallest type in the
971 if (max_nunits > nunits)
972 unrolling_factor = least_common_multiple (max_nunits, group_size)
975 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
976 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
977 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
978 SLP_INSTANCE_LOADS (new_instance) = loads;
979 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
980 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
981 if (VEC_length (slp_tree, loads))
983 if (!vect_supported_load_permutation_p (new_instance, group_size,
986 if (vect_print_dump_info (REPORT_SLP))
988 fprintf (vect_dump, "Build SLP failed: unsupported load "
990 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
993 vect_free_slp_instance (new_instance);
997 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
998 = vect_find_first_load_in_slp_instance (new_instance);
1001 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1004 VEC_safe_push (slp_instance, heap,
1005 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1008 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1011 if (vect_print_dump_info (REPORT_SLP))
1012 vect_print_slp_tree (node);
1017 /* Failed to SLP. */
1018 /* Free the allocated memory. */
1019 vect_free_slp_tree (node);
1020 VEC_free (int, heap, load_permutation);
1021 VEC_free (slp_tree, heap, loads);
1027 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1028 trees of packed scalar stmts if SLP is possible. */
1031 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1034 VEC (gimple, heap) *strided_stores;
1038 if (vect_print_dump_info (REPORT_SLP))
1039 fprintf (vect_dump, "=== vect_analyze_slp ===");
1042 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1044 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1046 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1047 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1050 if (bb_vinfo && !ok)
1052 if (vect_print_dump_info (REPORT_SLP))
1053 fprintf (vect_dump, "Failed to SLP the basic block.");
1062 /* For each possible SLP instance decide whether to SLP it and calculate overall
1063 unrolling factor needed to SLP the loop. */
1066 vect_make_slp_decision (loop_vec_info loop_vinfo)
1068 unsigned int i, unrolling_factor = 1;
1069 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1070 slp_instance instance;
1071 int decided_to_slp = 0;
1073 if (vect_print_dump_info (REPORT_SLP))
1074 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1076 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1078 /* FORNOW: SLP if you can. */
1079 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1080 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1082 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1083 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1084 loop-based vectorization. Such stmts will be marked as HYBRID. */
1085 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1089 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1091 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1092 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1093 decided_to_slp, unrolling_factor);
1097 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1098 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1101 vect_detect_hybrid_slp_stmts (slp_tree node)
1105 imm_use_iterator imm_iter;
1107 stmt_vec_info stmt_vinfo;
1112 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1113 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1114 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1115 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1116 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1117 && !STMT_SLP_TYPE (stmt_vinfo)
1118 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1119 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
1120 vect_mark_slp_stmts (node, hybrid, i);
1122 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1123 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1127 /* Find stmts that must be both vectorized and SLPed. */
1130 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1133 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1134 slp_instance instance;
1136 if (vect_print_dump_info (REPORT_SLP))
1137 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1139 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1140 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1144 /* Create and initialize a new bb_vec_info struct for BB, as well as
1145 stmt_vec_info structs for all the stmts in it. */
1148 new_bb_vec_info (basic_block bb)
1150 bb_vec_info res = NULL;
1151 gimple_stmt_iterator gsi;
1153 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1154 BB_VINFO_BB (res) = bb;
1156 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1158 gimple stmt = gsi_stmt (gsi);
1159 gimple_set_uid (stmt, 0);
1160 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1163 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1164 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1171 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1172 stmts in the basic block. */
1175 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1178 gimple_stmt_iterator si;
1183 bb = BB_VINFO_BB (bb_vinfo);
1185 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1187 gimple stmt = gsi_stmt (si);
1188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1191 /* Free stmt_vec_info. */
1192 free_stmt_vec_info (stmt);
1195 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1196 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1202 /* Analyze statements contained in SLP tree node after recursively analyzing
1203 the subtree. Return TRUE if the operations are supported. */
1206 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1215 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1216 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1219 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1221 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1222 gcc_assert (stmt_info);
1223 gcc_assert (PURE_SLP_STMT (stmt_info));
1225 if (!vect_analyze_stmt (stmt, &dummy, node))
1233 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1234 operations are supported. */
1237 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1239 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1240 slp_instance instance;
1243 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1245 if (!vect_slp_analyze_node_operations (bb_vinfo,
1246 SLP_INSTANCE_TREE (instance)))
1248 vect_free_slp_instance (instance);
1249 VEC_ordered_remove (slp_instance, slp_instances, i);
1255 if (!VEC_length (slp_instance, slp_instances))
1262 /* Cheick if the basic block can be vectorized. */
1265 vect_slp_analyze_bb (basic_block bb)
1267 bb_vec_info bb_vinfo;
1268 VEC (ddr_p, heap) *ddrs;
1269 VEC (slp_instance, heap) *slp_instances;
1270 slp_instance instance;
1272 gimple_stmt_iterator gsi;
1274 int max_vf = MAX_VECTORIZATION_FACTOR;
1276 if (vect_print_dump_info (REPORT_DETAILS))
1277 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1279 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1281 gimple stmt = gsi_stmt (gsi);
1282 if (!is_gimple_debug (stmt)
1283 && !gimple_nop_p (stmt)
1284 && gimple_code (stmt) != GIMPLE_LABEL)
1288 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1290 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1291 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1297 bb_vinfo = new_bb_vec_info (bb);
1301 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1303 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1304 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1307 destroy_bb_vec_info (bb_vinfo);
1311 ddrs = BB_VINFO_DDRS (bb_vinfo);
1312 if (!VEC_length (ddr_p, ddrs))
1314 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1315 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1318 destroy_bb_vec_info (bb_vinfo);
1322 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1325 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1326 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1327 "in basic block.\n");
1329 destroy_bb_vec_info (bb_vinfo);
1333 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1335 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1336 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1339 destroy_bb_vec_info (bb_vinfo);
1343 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1345 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1346 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1349 destroy_bb_vec_info (bb_vinfo);
1353 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1355 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1356 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1359 destroy_bb_vec_info (bb_vinfo);
1363 /* Check the SLP opportunities in the basic block, analyze and build SLP
1365 if (!vect_analyze_slp (NULL, bb_vinfo))
1367 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1368 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1369 "in basic block.\n");
1371 destroy_bb_vec_info (bb_vinfo);
1375 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1377 /* Mark all the statements that we want to vectorize as pure SLP and
1379 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1381 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1382 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1385 if (!vect_slp_analyze_operations (bb_vinfo))
1387 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1388 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1390 destroy_bb_vec_info (bb_vinfo);
1394 if (vect_print_dump_info (REPORT_DETAILS))
1395 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1401 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1402 the number of created vector stmts depends on the unrolling factor). However,
1403 the actual number of vector stmts for every SLP node depends on VF which is
1404 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1405 In this function we assume that the inside costs calculated in
1406 vect_model_xxx_cost are linear in ncopies. */
1409 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1411 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1412 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1413 slp_instance instance;
1415 if (vect_print_dump_info (REPORT_SLP))
1416 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1418 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1419 /* We assume that costs are linear in ncopies. */
1420 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1421 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1425 /* For constant and loop invariant defs of SLP_NODE this function returns
1426 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1427 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1428 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1431 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1432 unsigned int op_num, unsigned int number_of_vectors)
1434 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1435 gimple stmt = VEC_index (gimple, stmts, 0);
1436 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1440 int j, number_of_places_left_in_vector;
1443 int group_size = VEC_length (gimple, stmts);
1444 unsigned int vec_num, i;
1445 int number_of_copies = 1;
1446 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1447 bool constant_p, is_store;
1449 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1452 op = gimple_assign_rhs1 (stmt);
1457 op = gimple_op (stmt, op_num + 1);
1460 if (CONSTANT_CLASS_P (op))
1465 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1466 gcc_assert (vector_type);
1468 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1470 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1471 created vectors. It is greater than 1 if unrolling is performed.
1473 For example, we have two scalar operands, s1 and s2 (e.g., group of
1474 strided accesses of size two), while NUNITS is four (i.e., four scalars
1475 of this type can be packed in a vector). The output vector will contain
1476 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1479 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1480 containing the operands.
1482 For example, NUNITS is four as before, and the group size is 8
1483 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1484 {s5, s6, s7, s8}. */
1486 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1488 number_of_places_left_in_vector = nunits;
1489 for (j = 0; j < number_of_copies; j++)
1491 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1494 op = gimple_assign_rhs1 (stmt);
1496 op = gimple_op (stmt, op_num + 1);
1498 /* Create 'vect_ = {op0,op1,...,opn}'. */
1499 t = tree_cons (NULL_TREE, op, t);
1501 number_of_places_left_in_vector--;
1503 if (number_of_places_left_in_vector == 0)
1505 number_of_places_left_in_vector = nunits;
1508 vec_cst = build_vector (vector_type, t);
1510 vec_cst = build_constructor_from_list (vector_type, t);
1511 VEC_quick_push (tree, voprnds,
1512 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1518 /* Since the vectors are created in the reverse order, we should invert
1520 vec_num = VEC_length (tree, voprnds);
1521 for (j = vec_num - 1; j >= 0; j--)
1523 vop = VEC_index (tree, voprnds, j);
1524 VEC_quick_push (tree, *vec_oprnds, vop);
1527 VEC_free (tree, heap, voprnds);
1529 /* In case that VF is greater than the unrolling factor needed for the SLP
1530 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1531 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1532 to replicate the vectors. */
1533 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1535 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1536 VEC_quick_push (tree, *vec_oprnds, vop);
1541 /* Get vectorized definitions from SLP_NODE that contains corresponding
1542 vectorized def-stmts. */
1545 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1548 gimple vec_def_stmt;
1551 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1554 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1557 gcc_assert (vec_def_stmt);
1558 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1559 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1564 /* Get vectorized definitions for SLP_NODE.
1565 If the scalar definitions are loop invariants or constants, collect them and
1566 call vect_get_constant_vectors() to create vector stmts.
1567 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1568 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1569 vect_get_slp_vect_defs() to retrieve them.
1570 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1571 the right node. This is used when the second operand must remain scalar. */
1574 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1575 VEC (tree,heap) **vec_oprnds1)
1578 enum tree_code code;
1579 int number_of_vects;
1580 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1582 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1583 /* The number of vector defs is determined by the number of vector statements
1584 in the node from which we get those statements. */
1585 if (SLP_TREE_LEFT (slp_node))
1586 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1589 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1590 /* Number of vector stmts was calculated according to LHS in
1591 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1592 necessary. See vect_get_smallest_scalar_type() for details. */
1593 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1595 if (rhs_size_unit != lhs_size_unit)
1597 number_of_vects *= rhs_size_unit;
1598 number_of_vects /= lhs_size_unit;
1602 /* Allocate memory for vectorized defs. */
1603 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1605 /* SLP_NODE corresponds either to a group of stores or to a group of
1606 unary/binary operations. We don't call this function for loads. */
1607 if (SLP_TREE_LEFT (slp_node))
1608 /* The defs are already vectorized. */
1609 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1611 /* Build vectors from scalar defs. */
1612 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1614 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1615 /* Since we don't call this function with loads, this is a group of
1619 code = gimple_assign_rhs_code (first_stmt);
1620 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1623 /* The number of vector defs is determined by the number of vector statements
1624 in the node from which we get those statements. */
1625 if (SLP_TREE_RIGHT (slp_node))
1626 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1628 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1630 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1632 if (SLP_TREE_RIGHT (slp_node))
1633 /* The defs are already vectorized. */
1634 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1636 /* Build vectors from scalar defs. */
1637 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1641 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1642 building a vector of type MASK_TYPE from it) and two input vectors placed in
1643 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1644 shifting by STRIDE elements of DR_CHAIN for every copy.
1645 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1647 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1648 the created stmts must be inserted. */
1651 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1652 tree mask, int first_vec_indx, int second_vec_indx,
1653 gimple_stmt_iterator *gsi, slp_tree node,
1654 tree builtin_decl, tree vectype,
1655 VEC(tree,heap) *dr_chain,
1656 int ncopies, int vect_stmts_counter)
1659 gimple perm_stmt = NULL;
1660 stmt_vec_info next_stmt_info;
1662 tree first_vec, second_vec, data_ref;
1663 VEC (tree, heap) *params = NULL;
1665 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1667 /* Initialize the vect stmts of NODE to properly insert the generated
1669 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1670 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1671 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1673 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1674 for (i = 0; i < ncopies; i++)
1676 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1677 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1679 /* Build argument list for the vectorized call. */
1680 VEC_free (tree, heap, params);
1681 params = VEC_alloc (tree, heap, 3);
1682 VEC_quick_push (tree, params, first_vec);
1683 VEC_quick_push (tree, params, second_vec);
1684 VEC_quick_push (tree, params, mask);
1686 /* Generate the permute statement. */
1687 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1688 data_ref = make_ssa_name (perm_dest, perm_stmt);
1689 gimple_call_set_lhs (perm_stmt, data_ref);
1690 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1692 /* Store the vector statement in NODE. */
1693 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1694 stride * i + vect_stmts_counter, perm_stmt);
1696 first_vec_indx += stride;
1697 second_vec_indx += stride;
1700 /* Mark the scalar stmt as vectorized. */
1701 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1702 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1706 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1707 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1708 representation. Check that the mask is valid and return FALSE if not.
1709 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1710 the next vector, i.e., the current first vector is not needed. */
1713 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1714 int mask_nunits, bool only_one_vec, int index,
1715 int *mask, int *current_mask_element,
1716 bool *need_next_vector)
1719 static int number_of_mask_fixes = 1;
1720 static bool mask_fixed = false;
1721 static bool needs_first_vector = false;
1723 /* Convert to target specific representation. */
1724 *current_mask_element = first_mask_element + m;
1725 /* Adjust the value in case it's a mask for second and third vectors. */
1726 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1728 if (*current_mask_element < mask_nunits)
1729 needs_first_vector = true;
1731 /* We have only one input vector to permute but the mask accesses values in
1732 the next vector as well. */
1733 if (only_one_vec && *current_mask_element >= mask_nunits)
1735 if (vect_print_dump_info (REPORT_DETAILS))
1737 fprintf (vect_dump, "permutation requires at least two vectors ");
1738 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1744 /* The mask requires the next vector. */
1745 if (*current_mask_element >= mask_nunits * 2)
1747 if (needs_first_vector || mask_fixed)
1749 /* We either need the first vector too or have already moved to the
1750 next vector. In both cases, this permutation needs three
1752 if (vect_print_dump_info (REPORT_DETAILS))
1754 fprintf (vect_dump, "permutation requires at "
1755 "least three vectors ");
1756 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1762 /* We move to the next vector, dropping the first one and working with
1763 the second and the third - we need to adjust the values of the mask
1765 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1767 for (i = 0; i < index; i++)
1768 mask[i] -= mask_nunits * number_of_mask_fixes;
1770 (number_of_mask_fixes)++;
1774 *need_next_vector = mask_fixed;
1776 /* This was the last element of this mask. Start a new one. */
1777 if (index == mask_nunits - 1)
1779 number_of_mask_fixes = 1;
1781 needs_first_vector = false;
1788 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1789 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1790 permute statements for SLP_NODE_INSTANCE. */
1792 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1793 gimple_stmt_iterator *gsi, int vf,
1794 slp_instance slp_node_instance, bool analyze_only)
1796 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1797 tree mask_element_type = NULL_TREE, mask_type;
1798 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1800 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1801 gimple next_scalar_stmt;
1802 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1803 int first_mask_element;
1804 int index, unroll_factor, *mask, current_mask_element, ncopies;
1805 bool only_one_vec = false, need_next_vector = false;
1806 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1808 if (!targetm.vectorize.builtin_vec_perm)
1810 if (vect_print_dump_info (REPORT_DETAILS))
1812 fprintf (vect_dump, "no builtin for vect permute for ");
1813 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1819 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1820 &mask_element_type);
1821 if (!builtin_decl || !mask_element_type)
1823 if (vect_print_dump_info (REPORT_DETAILS))
1825 fprintf (vect_dump, "no builtin for vect permute for ");
1826 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1832 mask_type = get_vectype_for_scalar_type (mask_element_type);
1833 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1834 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1835 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1836 scale = mask_nunits / nunits;
1837 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1839 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1840 unrolling factor. */
1841 orig_vec_stmts_num = group_size *
1842 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1843 if (orig_vec_stmts_num == 1)
1844 only_one_vec = true;
1846 /* Number of copies is determined by the final vectorization factor
1847 relatively to SLP_NODE_INSTANCE unrolling factor. */
1848 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1850 /* Generate permutation masks for every NODE. Number of masks for each NODE
1851 is equal to GROUP_SIZE.
1852 E.g., we have a group of three nodes with three loads from the same
1853 location in each node, and the vector size is 4. I.e., we have a
1854 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1855 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1856 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1859 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1860 scpecific type, e.g., in bytes for Altivec.
1861 The last mask is illegal since we assume two operands for permute
1862 operation, and the mask element values can't be outside that range. Hence,
1863 the last mask must be converted into {2,5,5,5}.
1864 For the first two permutations we need the first and the second input
1865 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1866 we need the second and the third vectors: {b1,c1,a2,b2} and
1870 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1876 vect_stmts_counter = 0;
1878 first_vec_index = vec_index++;
1880 second_vec_index = first_vec_index;
1882 second_vec_index = vec_index++;
1884 for (j = 0; j < unroll_factor; j++)
1886 for (k = 0; k < group_size; k++)
1888 first_mask_element = (i + j * group_size) * scale;
1889 for (m = 0; m < scale; m++)
1891 if (!vect_get_mask_element (stmt, first_mask_element, m,
1892 mask_nunits, only_one_vec, index, mask,
1893 ¤t_mask_element, &need_next_vector))
1896 mask[index++] = current_mask_element;
1899 if (index == mask_nunits)
1901 tree mask_vec = NULL;
1903 while (--index >= 0)
1905 tree t = build_int_cst (mask_element_type, mask[index]);
1906 mask_vec = tree_cons (NULL, t, mask_vec);
1908 mask_vec = build_vector (mask_type, mask_vec);
1911 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
1914 if (vect_print_dump_info (REPORT_DETAILS))
1916 fprintf (vect_dump, "unsupported vect permute ");
1917 print_generic_expr (vect_dump, mask_vec, 0);
1925 if (need_next_vector)
1927 first_vec_index = second_vec_index;
1928 second_vec_index = vec_index;
1931 next_scalar_stmt = VEC_index (gimple,
1932 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1934 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1935 mask_vec, first_vec_index, second_vec_index,
1936 gsi, node, builtin_decl, vectype, dr_chain,
1937 ncopies, vect_stmts_counter++);
1950 /* Vectorize SLP instance tree in postorder. */
1953 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1954 unsigned int vectorization_factor)
1957 bool strided_store, is_store;
1958 gimple_stmt_iterator si;
1959 stmt_vec_info stmt_info;
1960 unsigned int vec_stmts_size, nunits, group_size;
1963 slp_tree loads_node;
1968 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1969 vectorization_factor);
1970 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1971 vectorization_factor);
1973 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1974 stmt_info = vinfo_for_stmt (stmt);
1976 /* VECTYPE is the type of the destination. */
1977 vectype = STMT_VINFO_VECTYPE (stmt_info);
1978 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1979 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1981 /* For each SLP instance calculate number of vector stmts to be created
1982 for the scalar stmts in each node of the SLP tree. Number of vector
1983 elements in one vector iteration is the number of scalar elements in
1984 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1986 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1988 /* In case of load permutation we have to allocate vectorized statements for
1989 all the nodes that participate in that permutation. */
1990 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1993 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1996 if (!SLP_TREE_VEC_STMTS (loads_node))
1998 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2000 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2005 if (!SLP_TREE_VEC_STMTS (node))
2007 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2008 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2011 if (vect_print_dump_info (REPORT_DETAILS))
2013 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2014 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2017 /* Loads should be inserted before the first load. */
2018 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2019 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2020 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2021 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2023 si = gsi_for_stmt (stmt);
2025 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2028 if (DR_GROUP_FIRST_DR (stmt_info))
2029 /* If IS_STORE is TRUE, the vectorization of the
2030 interleaving chain was completed - free all the stores in
2032 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2034 /* FORNOW: SLP originates only from strided stores. */
2040 /* FORNOW: SLP originates only from strided stores. */
2046 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2048 VEC (slp_instance, heap) *slp_instances;
2049 slp_instance instance;
2051 bool is_store = false;
2055 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2056 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2060 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2064 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2066 /* Schedule the tree of INSTANCE. */
2067 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2069 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2070 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2071 fprintf (vect_dump, "vectorizing stmts using SLP.");
2078 /* Vectorize the basic block. */
2081 vect_slp_transform_bb (basic_block bb)
2083 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2084 gimple_stmt_iterator si;
2086 gcc_assert (bb_vinfo);
2088 if (vect_print_dump_info (REPORT_DETAILS))
2089 fprintf (vect_dump, "SLPing BB\n");
2091 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2093 gimple stmt = gsi_stmt (si);
2094 stmt_vec_info stmt_info;
2096 if (vect_print_dump_info (REPORT_DETAILS))
2098 fprintf (vect_dump, "------>SLPing statement: ");
2099 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2102 stmt_info = vinfo_for_stmt (stmt);
2103 gcc_assert (stmt_info);
2105 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2106 if (STMT_SLP_TYPE (stmt_info))
2108 vect_schedule_slp (NULL, bb_vinfo);
2113 mark_sym_for_renaming (gimple_vop (cfun));
2114 /* The memory tags and pointers in vectorized statements need to
2115 have their SSA forms updated. FIXME, why can't this be delayed
2116 until all the loops have been transformed? */
2117 update_ssa (TODO_update_ssa);
2119 if (vect_print_dump_info (REPORT_DETAILS))
2120 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2122 destroy_bb_vec_info (bb_vinfo);