1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
46 find_bb_location (basic_block bb)
49 gimple_stmt_iterator si;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
68 vect_free_slp_tree (slp_tree node)
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
88 /* Free the memory allocated for the SLP instance. */
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
117 unsigned int i, number_of_oprnds;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
156 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
157 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
159 if (!*first_stmt_dt0)
163 if (i == 1 && !*first_stmt_dt1)
165 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
167 if (vect_print_dump_info (REPORT_DETAILS))
169 fprintf (vect_dump, "Build SLP failed: some of the stmts"
170 " are in a pattern, and others are not ");
171 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
178 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
179 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
181 if (*dt == vect_unknown_def_type)
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "Unsupported pattern.");
188 switch (gimple_code (def_stmt))
191 def = gimple_phi_result (def_stmt);
195 def = gimple_assign_lhs (def_stmt);
199 if (vect_print_dump_info (REPORT_DETAILS))
200 fprintf (vect_dump, "unsupported defining stmt: ");
205 if (!*first_stmt_dt0)
207 /* op0 of the first stmt of the group - store its info. */
208 *first_stmt_dt0 = dt[i];
210 *first_stmt_def0_type = TREE_TYPE (def);
212 *first_stmt_const_oprnd = oprnd;
214 /* Analyze costs (for the first stmt of the group only). */
215 if (rhs_class != GIMPLE_SINGLE_RHS)
216 /* Not memory operation (we don't call this functions for loads). */
217 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
220 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
226 if (!*first_stmt_dt1 && i == 1)
228 /* op1 of the first stmt of the group - store its info. */
229 *first_stmt_dt1 = dt[i];
231 *first_stmt_def1_type = TREE_TYPE (def);
234 /* We assume that the stmt contains only one constant
235 operand. We fail otherwise, to be on the safe side. */
236 if (*first_stmt_const_oprnd)
238 if (vect_print_dump_info (REPORT_SLP))
239 fprintf (vect_dump, "Build SLP failed: two constant "
243 *first_stmt_const_oprnd = oprnd;
248 /* Not first stmt of the group, check that the def-stmt/s match
249 the def-stmt/s of the first stmt. Allow different definition
250 types for reduction chains: the first stmt must be a
251 vect_reduction_def (a phi node), and the rest
252 vect_internal_def. */
254 && ((*first_stmt_dt0 != dt[i]
255 && !(*first_stmt_dt0 == vect_reduction_def
256 && dt[i] == vect_internal_def))
257 || (*first_stmt_def0_type && def
258 && !types_compatible_p (*first_stmt_def0_type,
261 && ((*first_stmt_dt1 != dt[i]
262 && !(*first_stmt_dt1 == vect_reduction_def
263 && dt[i] == vect_internal_def))
264 || (*first_stmt_def1_type && def
265 && !types_compatible_p (*first_stmt_def1_type,
268 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
271 if (vect_print_dump_info (REPORT_SLP))
272 fprintf (vect_dump, "Build SLP failed: different types ");
279 /* Check the types of the definitions. */
282 case vect_constant_def:
283 case vect_external_def:
286 case vect_internal_def:
287 case vect_reduction_def:
289 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
291 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
295 /* FORNOW: Not supported. */
296 if (vect_print_dump_info (REPORT_SLP))
298 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
299 print_generic_expr (vect_dump, def, TDF_SLIM);
310 /* Recursively build an SLP tree starting from NODE.
311 Fail (and return FALSE) if def-stmts are not isomorphic, require data
312 permutation or are of unsupported types of operation. Otherwise, return
316 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
317 slp_tree *node, unsigned int group_size,
318 int *inside_cost, int *outside_cost,
319 int ncopies_for_cost, unsigned int *max_nunits,
320 VEC (int, heap) **load_permutation,
321 VEC (slp_tree, heap) **loads,
322 unsigned int vectorization_factor)
324 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
325 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
327 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
328 gimple stmt = VEC_index (gimple, stmts, 0);
329 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
330 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
331 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
332 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
334 bool stop_recursion = false, need_same_oprnds = false;
335 tree vectype, scalar_type, first_op1 = NULL_TREE;
336 unsigned int ncopies;
339 enum machine_mode optab_op2_mode;
340 enum machine_mode vec_mode;
341 tree first_stmt_const_oprnd = NULL_TREE;
342 struct data_reference *first_dr;
343 bool pattern0 = false, pattern1 = false;
345 bool permutation = false;
346 unsigned int load_place;
347 gimple first_load, prev_first_load = NULL;
349 /* For every stmt in NODE find its def stmt/s. */
350 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
352 if (vect_print_dump_info (REPORT_SLP))
354 fprintf (vect_dump, "Build SLP for ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
358 /* Fail to vectorize statements marked as unvectorizable. */
359 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
361 if (vect_print_dump_info (REPORT_SLP))
364 "Build SLP failed: unvectorizable statement ");
365 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
371 lhs = gimple_get_lhs (stmt);
372 if (lhs == NULL_TREE)
374 if (vect_print_dump_info (REPORT_SLP))
377 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
378 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
384 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
385 vectype = get_vectype_for_scalar_type (scalar_type);
388 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
391 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
396 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
399 if (vect_print_dump_info (REPORT_SLP))
400 fprintf (vect_dump, "SLP with multiple types ");
402 /* FORNOW: multiple types are unsupported in BB SLP. */
407 /* In case of multiple types we need to detect the smallest type. */
408 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
409 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
411 if (is_gimple_call (stmt))
412 rhs_code = CALL_EXPR;
414 rhs_code = gimple_assign_rhs_code (stmt);
416 /* Check the operation. */
419 first_stmt_code = rhs_code;
421 /* Shift arguments should be equal in all the packed stmts for a
422 vector shift with scalar shift operand. */
423 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
424 || rhs_code == LROTATE_EXPR
425 || rhs_code == RROTATE_EXPR)
427 vec_mode = TYPE_MODE (vectype);
429 /* First see if we have a vector/vector shift. */
430 optab = optab_for_tree_code (rhs_code, vectype,
434 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
436 /* No vector/vector shift, try for a vector/scalar shift. */
437 optab = optab_for_tree_code (rhs_code, vectype,
442 if (vect_print_dump_info (REPORT_SLP))
443 fprintf (vect_dump, "Build SLP failed: no optab.");
446 icode = (int) optab_handler (optab, vec_mode);
447 if (icode == CODE_FOR_nothing)
449 if (vect_print_dump_info (REPORT_SLP))
450 fprintf (vect_dump, "Build SLP failed: "
451 "op not supported by target.");
454 optab_op2_mode = insn_data[icode].operand[2].mode;
455 if (!VECTOR_MODE_P (optab_op2_mode))
457 need_same_oprnds = true;
458 first_op1 = gimple_assign_rhs2 (stmt);
465 if (first_stmt_code != rhs_code
466 && (first_stmt_code != IMAGPART_EXPR
467 || rhs_code != REALPART_EXPR)
468 && (first_stmt_code != REALPART_EXPR
469 || rhs_code != IMAGPART_EXPR)
470 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
471 && (first_stmt_code == ARRAY_REF
472 || first_stmt_code == INDIRECT_REF
473 || first_stmt_code == COMPONENT_REF
474 || first_stmt_code == MEM_REF)))
476 if (vect_print_dump_info (REPORT_SLP))
479 "Build SLP failed: different operation in stmt ");
480 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
487 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
489 if (vect_print_dump_info (REPORT_SLP))
492 "Build SLP failed: different shift arguments in ");
493 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
500 /* Strided store or load. */
501 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
503 if (REFERENCE_CLASS_P (lhs))
506 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
507 stmt, &def_stmts0, &def_stmts1,
510 &first_stmt_def0_type,
511 &first_stmt_def1_type,
512 &first_stmt_const_oprnd,
514 &pattern0, &pattern1))
520 /* FORNOW: Check that there is no gap between the loads. */
521 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
522 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
523 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
524 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
526 if (vect_print_dump_info (REPORT_SLP))
528 fprintf (vect_dump, "Build SLP failed: strided "
530 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
536 /* Check that the size of interleaved loads group is not
537 greater than the SLP group size. */
538 if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
540 if (vect_print_dump_info (REPORT_SLP))
542 fprintf (vect_dump, "Build SLP failed: the number of "
543 "interleaved loads is greater than"
544 " the SLP group size ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
551 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
554 /* Check that there are no loads from different interleaving
555 chains in the same node. The only exception is complex
557 if (prev_first_load != first_load
558 && rhs_code != REALPART_EXPR
559 && rhs_code != IMAGPART_EXPR)
561 if (vect_print_dump_info (REPORT_SLP))
563 fprintf (vect_dump, "Build SLP failed: different "
564 "interleaving chains in one node ");
565 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 prev_first_load = first_load;
574 if (first_load == stmt)
576 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
577 if (vect_supportable_dr_alignment (first_dr, false)
578 == dr_unaligned_unsupported)
580 if (vect_print_dump_info (REPORT_SLP))
582 fprintf (vect_dump, "Build SLP failed: unsupported "
584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
590 /* Analyze costs (for the first stmt in the group). */
591 vect_model_load_cost (vinfo_for_stmt (stmt),
592 ncopies_for_cost, false, *node);
595 /* Store the place of this load in the interleaving chain. In
596 case that permutation is needed we later decide if a specific
597 permutation is supported. */
598 load_place = vect_get_place_in_interleaving_chain (stmt,
603 VEC_safe_push (int, heap, *load_permutation, load_place);
605 /* We stop the tree when we reach a group of loads. */
606 stop_recursion = true;
609 } /* Strided access. */
612 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
614 /* Not strided load. */
615 if (vect_print_dump_info (REPORT_SLP))
617 fprintf (vect_dump, "Build SLP failed: not strided load ");
618 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
621 /* FORNOW: Not strided loads are not supported. */
625 /* Not memory operation. */
626 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
627 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
629 if (vect_print_dump_info (REPORT_SLP))
631 fprintf (vect_dump, "Build SLP failed: operation");
632 fprintf (vect_dump, " unsupported ");
633 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
639 /* Find the def-stmts. */
640 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
641 &def_stmts0, &def_stmts1,
642 &first_stmt_dt0, &first_stmt_dt1,
643 &first_stmt_def0_type,
644 &first_stmt_def1_type,
645 &first_stmt_const_oprnd,
647 &pattern0, &pattern1))
652 /* Add the costs of the node to the overall instance costs. */
653 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
654 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
656 /* Strided loads were reached - stop the recursion. */
661 VEC_safe_push (slp_tree, heap, *loads, *node);
663 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
668 /* We don't check here complex numbers chains, so we keep them in
669 LOADS for further check in vect_supported_load_permutation_p. */
670 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
671 VEC_safe_push (slp_tree, heap, *loads, *node);
677 /* Create SLP_TREE nodes for the definition node/s. */
678 if (first_stmt_dt0 == vect_internal_def)
680 slp_tree left_node = XNEW (struct _slp_tree);
681 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
682 SLP_TREE_VEC_STMTS (left_node) = NULL;
683 SLP_TREE_LEFT (left_node) = NULL;
684 SLP_TREE_RIGHT (left_node) = NULL;
685 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
686 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
687 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
688 inside_cost, outside_cost, ncopies_for_cost,
689 max_nunits, load_permutation, loads,
690 vectorization_factor))
693 SLP_TREE_LEFT (*node) = left_node;
696 if (first_stmt_dt1 == vect_internal_def)
698 slp_tree right_node = XNEW (struct _slp_tree);
699 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
700 SLP_TREE_VEC_STMTS (right_node) = NULL;
701 SLP_TREE_LEFT (right_node) = NULL;
702 SLP_TREE_RIGHT (right_node) = NULL;
703 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
704 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
705 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
706 inside_cost, outside_cost, ncopies_for_cost,
707 max_nunits, load_permutation, loads,
708 vectorization_factor))
711 SLP_TREE_RIGHT (*node) = right_node;
719 vect_print_slp_tree (slp_tree node)
727 fprintf (vect_dump, "node ");
728 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
730 fprintf (vect_dump, "\n\tstmt %d ", i);
731 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
733 fprintf (vect_dump, "\n");
735 vect_print_slp_tree (SLP_TREE_LEFT (node));
736 vect_print_slp_tree (SLP_TREE_RIGHT (node));
740 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
741 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
742 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
743 stmts in NODE are to be marked. */
746 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
754 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
756 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
758 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
759 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
763 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
766 vect_mark_slp_stmts_relevant (slp_tree node)
770 stmt_vec_info stmt_info;
775 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
777 stmt_info = vinfo_for_stmt (stmt);
778 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
779 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
780 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
783 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
784 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
788 /* Check if the permutation required by the SLP INSTANCE is supported.
789 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
792 vect_supported_slp_permutation_p (slp_instance instance)
794 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
795 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
796 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
797 VEC (slp_tree, heap) *sorted_loads = NULL;
799 slp_tree *tmp_loads = NULL;
800 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
803 /* FORNOW: The only supported loads permutation is loads from the same
804 location in all the loads in the node, when the data-refs in
805 nodes of LOADS constitute an interleaving chain.
806 Sort the nodes according to the order of accesses in the chain. */
807 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
809 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
810 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
811 i += group_size, j++)
813 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
814 /* Check that the loads are all in the same interleaving chain. */
815 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
817 if (vect_print_dump_info (REPORT_DETAILS))
819 fprintf (vect_dump, "Build SLP failed: unsupported data "
821 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
828 tmp_loads[index] = load;
831 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
832 for (i = 0; i < group_size; i++)
833 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
835 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
836 SLP_INSTANCE_LOADS (instance) = sorted_loads;
839 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
840 SLP_INSTANCE_UNROLLING_FACTOR (instance),
848 /* Rearrange the statements of NODE according to PERMUTATION. */
851 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
852 VEC (int, heap) *permutation)
855 VEC (gimple, heap) *tmp_stmts;
856 unsigned int index, i;
861 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
862 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
864 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
865 tmp_stmts = VEC_alloc (gimple, heap, group_size);
867 for (i = 0; i < group_size; i++)
868 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
870 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
872 index = VEC_index (int, permutation, i);
873 VEC_replace (gimple, tmp_stmts, index, stmt);
876 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
877 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
881 /* Check if the required load permutation is supported.
882 LOAD_PERMUTATION contains a list of indices of the loads.
883 In SLP this permutation is relative to the order of strided stores that are
884 the base of the SLP instance. */
887 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
888 VEC (int, heap) *load_permutation)
890 int i = 0, j, prev = -1, next, k, number_of_groups;
891 bool supported, bad_permutation = false;
893 slp_tree node, other_complex_node;
894 gimple stmt, first = NULL, other_node_first;
895 unsigned complex_numbers = 0;
897 /* FORNOW: permutations are only supported in SLP. */
901 if (vect_print_dump_info (REPORT_SLP))
903 fprintf (vect_dump, "Load permutation ");
904 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
905 fprintf (vect_dump, "%d ", next);
908 /* In case of reduction every load permutation is allowed, since the order
909 of the reduction statements is not important (as opposed to the case of
910 strided stores). The only condition we need to check is that all the
911 load nodes are of the same size and have the same permutation (and then
912 rearrange all the nodes of the SLP instance according to this
915 /* Check that all the load nodes are of the same size. */
916 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
918 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
919 != (unsigned) group_size)
922 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
923 if (is_gimple_assign (stmt)
924 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
925 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
929 /* Complex operands can be swapped as following:
930 real_c = real_b + real_a;
931 imag_c = imag_a + imag_b;
932 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
933 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
934 chains are mixed, they match the above pattern. */
937 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
939 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
945 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
947 if (complex_numbers != 2)
955 other_complex_node = VEC_index (slp_tree,
956 SLP_INSTANCE_LOADS (slp_instn), k);
957 other_node_first = VEC_index (gimple,
958 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
960 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
969 /* We checked that this case ok, so there is no need to proceed with
970 permutation tests. */
971 if (complex_numbers == 2)
973 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
974 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
978 node = SLP_INSTANCE_TREE (slp_instn);
979 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
980 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
981 instance, not all the loads belong to the same node or interleaving
982 group. Hence, we need to divide them into groups according to
984 number_of_groups = VEC_length (int, load_permutation) / group_size;
986 /* Reduction (there are no data-refs in the root).
987 In reduction chain the order of the loads is important. */
988 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
989 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
991 int first_group_load_index;
993 /* Compare all the permutation sequences to the first one. */
994 for (i = 1; i < number_of_groups; i++)
997 for (j = i * group_size; j < i * group_size + group_size; j++)
999 next = VEC_index (int, load_permutation, j);
1000 first_group_load_index = VEC_index (int, load_permutation, k);
1002 if (next != first_group_load_index)
1004 bad_permutation = true;
1011 if (bad_permutation)
1015 if (!bad_permutation)
1017 /* Check that the loads in the first sequence are different and there
1018 are no gaps between them. */
1019 load_index = sbitmap_alloc (group_size);
1020 sbitmap_zero (load_index);
1021 for (k = 0; k < group_size; k++)
1023 first_group_load_index = VEC_index (int, load_permutation, k);
1024 if (TEST_BIT (load_index, first_group_load_index))
1026 bad_permutation = true;
1030 SET_BIT (load_index, first_group_load_index);
1033 if (!bad_permutation)
1034 for (k = 0; k < group_size; k++)
1035 if (!TEST_BIT (load_index, k))
1037 bad_permutation = true;
1041 sbitmap_free (load_index);
1044 if (!bad_permutation)
1046 /* This permutation is valid for reduction. Since the order of the
1047 statements in the nodes is not important unless they are memory
1048 accesses, we can rearrange the statements in all the nodes
1049 according to the order of the loads. */
1050 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1052 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1057 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1058 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1059 well (unless it's reduction). */
1060 if (VEC_length (int, load_permutation)
1061 != (unsigned int) (group_size * group_size))
1065 load_index = sbitmap_alloc (group_size);
1066 sbitmap_zero (load_index);
1067 for (j = 0; j < group_size; j++)
1069 for (i = j * group_size, k = 0;
1070 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1073 if (i != j * group_size && next != prev)
1082 if (TEST_BIT (load_index, prev))
1088 SET_BIT (load_index, prev);
1091 for (j = 0; j < group_size; j++)
1092 if (!TEST_BIT (load_index, j))
1095 sbitmap_free (load_index);
1097 if (supported && i == group_size * group_size
1098 && vect_supported_slp_permutation_p (slp_instn))
1105 /* Find the first load in the loop that belongs to INSTANCE.
1106 When loads are in several SLP nodes, there can be a case in which the first
1107 load does not appear in the first SLP node to be transformed, causing
1108 incorrect order of statements. Since we generate all the loads together,
1109 they must be inserted before the first load of the SLP instance and not
1110 before the first load of the first node of the instance. */
1113 vect_find_first_load_in_slp_instance (slp_instance instance)
1117 gimple first_load = NULL, load;
1119 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1120 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1121 first_load = get_earlier_stmt (load, first_load);
1127 /* Find the last store in SLP INSTANCE. */
1130 vect_find_last_store_in_slp_instance (slp_instance instance)
1134 gimple last_store = NULL, store;
1136 node = SLP_INSTANCE_TREE (instance);
1138 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1140 last_store = get_later_stmt (store, last_store);
1146 /* Analyze an SLP instance starting from a group of strided stores. Call
1147 vect_build_slp_tree to build a tree of packed stmts if possible.
1148 Return FALSE if it's impossible to SLP any stmt in the loop. */
1151 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1154 slp_instance new_instance;
1155 slp_tree node = XNEW (struct _slp_tree);
1156 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1157 unsigned int unrolling_factor = 1, nunits;
1158 tree vectype, scalar_type = NULL_TREE;
1160 unsigned int vectorization_factor = 0;
1161 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1162 unsigned int max_nunits = 0;
1163 VEC (int, heap) *load_permutation;
1164 VEC (slp_tree, heap) *loads;
1165 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1167 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1171 scalar_type = TREE_TYPE (DR_REF (dr));
1172 vectype = get_vectype_for_scalar_type (scalar_type);
1176 gcc_assert (loop_vinfo);
1177 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1180 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1184 gcc_assert (loop_vinfo);
1185 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1186 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1191 if (vect_print_dump_info (REPORT_SLP))
1193 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1194 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1200 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1202 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1204 /* No multitypes in BB SLP. */
1205 vectorization_factor = nunits;
1207 /* Calculate the unrolling factor. */
1208 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1209 if (unrolling_factor != 1 && !loop_vinfo)
1211 if (vect_print_dump_info (REPORT_SLP))
1212 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1218 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1219 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1221 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1223 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1226 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1227 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1232 /* Collect reduction statements. */
1233 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1236 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1239 SLP_TREE_VEC_STMTS (node) = NULL;
1240 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1241 SLP_TREE_LEFT (node) = NULL;
1242 SLP_TREE_RIGHT (node) = NULL;
1243 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1244 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1246 /* Calculate the number of vector stmts to create based on the unrolling
1247 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1248 GROUP_SIZE / NUNITS otherwise. */
1249 ncopies_for_cost = unrolling_factor * group_size / nunits;
1251 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1252 loads = VEC_alloc (slp_tree, heap, group_size);
1254 /* Build the tree for the SLP instance. */
1255 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1256 &inside_cost, &outside_cost, ncopies_for_cost,
1257 &max_nunits, &load_permutation, &loads,
1258 vectorization_factor))
1260 /* Create a new SLP instance. */
1261 new_instance = XNEW (struct _slp_instance);
1262 SLP_INSTANCE_TREE (new_instance) = node;
1263 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1264 /* Calculate the unrolling factor based on the smallest type in the
1266 if (max_nunits > nunits)
1267 unrolling_factor = least_common_multiple (max_nunits, group_size)
1270 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1271 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1272 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1273 SLP_INSTANCE_LOADS (new_instance) = loads;
1274 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1275 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1276 if (VEC_length (slp_tree, loads))
1278 if (!vect_supported_load_permutation_p (new_instance, group_size,
1281 if (vect_print_dump_info (REPORT_SLP))
1283 fprintf (vect_dump, "Build SLP failed: unsupported load "
1285 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1288 vect_free_slp_instance (new_instance);
1292 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1293 = vect_find_first_load_in_slp_instance (new_instance);
1296 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1299 VEC_safe_push (slp_instance, heap,
1300 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1303 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1306 if (vect_print_dump_info (REPORT_SLP))
1307 vect_print_slp_tree (node);
1312 /* Failed to SLP. */
1313 /* Free the allocated memory. */
1314 vect_free_slp_tree (node);
1315 VEC_free (int, heap, load_permutation);
1316 VEC_free (slp_tree, heap, loads);
1322 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1323 trees of packed scalar stmts if SLP is possible. */
1326 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1329 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1330 gimple first_element;
1333 if (vect_print_dump_info (REPORT_SLP))
1334 fprintf (vect_dump, "=== vect_analyze_slp ===");
1338 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1339 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1340 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1343 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1345 /* Find SLP sequences starting from groups of strided stores. */
1346 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1347 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1350 if (bb_vinfo && !ok)
1352 if (vect_print_dump_info (REPORT_SLP))
1353 fprintf (vect_dump, "Failed to SLP the basic block.");
1359 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1361 /* Find SLP sequences starting from reduction chains. */
1362 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1363 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1368 /* Don't try to vectorize SLP reductions if reduction chain was
1373 /* Find SLP sequences starting from groups of reductions. */
1374 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1375 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1376 VEC_index (gimple, reductions, 0)))
1383 /* For each possible SLP instance decide whether to SLP it and calculate overall
1384 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1385 least one instance. */
1388 vect_make_slp_decision (loop_vec_info loop_vinfo)
1390 unsigned int i, unrolling_factor = 1;
1391 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1392 slp_instance instance;
1393 int decided_to_slp = 0;
1395 if (vect_print_dump_info (REPORT_SLP))
1396 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1398 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1400 /* FORNOW: SLP if you can. */
1401 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1402 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1404 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1405 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1406 loop-based vectorization. Such stmts will be marked as HYBRID. */
1407 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1411 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1413 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1414 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1415 decided_to_slp, unrolling_factor);
1417 return (decided_to_slp > 0);
1421 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1422 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1425 vect_detect_hybrid_slp_stmts (slp_tree node)
1429 imm_use_iterator imm_iter;
1431 stmt_vec_info stmt_vinfo;
1436 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1437 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1438 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1439 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1440 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1441 && !STMT_SLP_TYPE (stmt_vinfo)
1442 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1443 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1444 && !(gimple_code (use_stmt) == GIMPLE_PHI
1445 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1446 == vect_reduction_def))
1447 vect_mark_slp_stmts (node, hybrid, i);
1449 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1450 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1454 /* Find stmts that must be both vectorized and SLPed. */
1457 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1460 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1461 slp_instance instance;
1463 if (vect_print_dump_info (REPORT_SLP))
1464 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1466 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1467 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1471 /* Create and initialize a new bb_vec_info struct for BB, as well as
1472 stmt_vec_info structs for all the stmts in it. */
1475 new_bb_vec_info (basic_block bb)
1477 bb_vec_info res = NULL;
1478 gimple_stmt_iterator gsi;
1480 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1481 BB_VINFO_BB (res) = bb;
1483 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1485 gimple stmt = gsi_stmt (gsi);
1486 gimple_set_uid (stmt, 0);
1487 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1490 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1491 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1498 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1499 stmts in the basic block. */
1502 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1505 gimple_stmt_iterator si;
1510 bb = BB_VINFO_BB (bb_vinfo);
1512 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1514 gimple stmt = gsi_stmt (si);
1515 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1518 /* Free stmt_vec_info. */
1519 free_stmt_vec_info (stmt);
1522 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1523 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1524 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1525 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1531 /* Analyze statements contained in SLP tree node after recursively analyzing
1532 the subtree. Return TRUE if the operations are supported. */
1535 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1544 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1545 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1548 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1551 gcc_assert (stmt_info);
1552 gcc_assert (PURE_SLP_STMT (stmt_info));
1554 if (!vect_analyze_stmt (stmt, &dummy, node))
1562 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1563 operations are supported. */
1566 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1568 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1569 slp_instance instance;
1572 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1574 if (!vect_slp_analyze_node_operations (bb_vinfo,
1575 SLP_INSTANCE_TREE (instance)))
1577 vect_free_slp_instance (instance);
1578 VEC_ordered_remove (slp_instance, slp_instances, i);
1584 if (!VEC_length (slp_instance, slp_instances))
1590 /* Check if loads and stores are mixed in the basic block (in that
1591 case if we are not sure that the accesses differ, we can't vectorize the
1592 basic block). Also return FALSE in case that there is statement marked as
1593 not vectorizable. */
1596 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1598 basic_block bb = BB_VINFO_BB (bb_vinfo);
1599 gimple_stmt_iterator si;
1600 bool detected_store = false;
1602 struct data_reference *dr;
1604 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1606 stmt = gsi_stmt (si);
1608 /* We can't allow not analyzed statements, since they may contain data
1610 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1613 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1616 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1617 if (DR_IS_READ (dr) && detected_store)
1620 if (!DR_IS_READ (dr))
1621 detected_store = true;
1627 /* Check if vectorization of the basic block is profitable. */
1630 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1632 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1633 slp_instance instance;
1635 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1636 unsigned int stmt_cost;
1638 gimple_stmt_iterator si;
1639 basic_block bb = BB_VINFO_BB (bb_vinfo);
1640 stmt_vec_info stmt_info = NULL;
1641 tree dummy_type = NULL;
1644 /* Calculate vector costs. */
1645 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1647 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1648 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1651 /* Calculate scalar cost. */
1652 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1654 stmt = gsi_stmt (si);
1655 stmt_info = vinfo_for_stmt (stmt);
1657 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1658 || !PURE_SLP_STMT (stmt_info))
1661 if (STMT_VINFO_DATA_REF (stmt_info))
1663 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1664 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1665 (scalar_load, dummy_type, dummy);
1667 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1668 (scalar_store, dummy_type, dummy);
1671 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1672 (scalar_stmt, dummy_type, dummy);
1674 scalar_cost += stmt_cost;
1677 if (vect_print_dump_info (REPORT_COST))
1679 fprintf (vect_dump, "Cost model analysis: \n");
1680 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1682 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1684 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1687 /* Vectorization is profitable if its cost is less than the cost of scalar
1689 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1695 /* Check if the basic block can be vectorized. */
1698 vect_slp_analyze_bb (basic_block bb)
1700 bb_vec_info bb_vinfo;
1701 VEC (ddr_p, heap) *ddrs;
1702 VEC (slp_instance, heap) *slp_instances;
1703 slp_instance instance;
1705 gimple_stmt_iterator gsi;
1707 int max_vf = MAX_VECTORIZATION_FACTOR;
1708 bool data_dependence_in_bb = false;
1710 current_vector_size = 0;
1712 if (vect_print_dump_info (REPORT_DETAILS))
1713 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1715 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1717 gimple stmt = gsi_stmt (gsi);
1718 if (!is_gimple_debug (stmt)
1719 && !gimple_nop_p (stmt)
1720 && gimple_code (stmt) != GIMPLE_LABEL)
1724 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1726 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1727 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1733 bb_vinfo = new_bb_vec_info (bb);
1737 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1740 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1743 destroy_bb_vec_info (bb_vinfo);
1747 ddrs = BB_VINFO_DDRS (bb_vinfo);
1748 if (!VEC_length (ddr_p, ddrs))
1750 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1751 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1754 destroy_bb_vec_info (bb_vinfo);
1758 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1759 &data_dependence_in_bb)
1761 || (data_dependence_in_bb
1762 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1764 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1765 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1766 "in basic block.\n");
1768 destroy_bb_vec_info (bb_vinfo);
1772 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1774 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1775 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1778 destroy_bb_vec_info (bb_vinfo);
1782 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1784 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1785 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1788 destroy_bb_vec_info (bb_vinfo);
1792 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1794 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1795 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1798 destroy_bb_vec_info (bb_vinfo);
1802 /* Check the SLP opportunities in the basic block, analyze and build SLP
1804 if (!vect_analyze_slp (NULL, bb_vinfo))
1806 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1807 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1808 "in basic block.\n");
1810 destroy_bb_vec_info (bb_vinfo);
1814 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1816 /* Mark all the statements that we want to vectorize as pure SLP and
1818 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1820 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1821 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1824 if (!vect_slp_analyze_operations (bb_vinfo))
1826 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1827 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1829 destroy_bb_vec_info (bb_vinfo);
1833 /* Cost model: check if the vectorization is worthwhile. */
1834 if (flag_vect_cost_model
1835 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1837 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1838 fprintf (vect_dump, "not vectorized: vectorization is not "
1841 destroy_bb_vec_info (bb_vinfo);
1845 if (vect_print_dump_info (REPORT_DETAILS))
1846 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1852 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1853 the number of created vector stmts depends on the unrolling factor).
1854 However, the actual number of vector stmts for every SLP node depends on
1855 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1856 should be updated. In this function we assume that the inside costs
1857 calculated in vect_model_xxx_cost are linear in ncopies. */
1860 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1862 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1863 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1864 slp_instance instance;
1866 if (vect_print_dump_info (REPORT_SLP))
1867 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1869 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1870 /* We assume that costs are linear in ncopies. */
1871 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1872 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1876 /* For constant and loop invariant defs of SLP_NODE this function returns
1877 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1878 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1879 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1880 REDUC_INDEX is the index of the reduction operand in the statements, unless
1884 vect_get_constant_vectors (tree op, slp_tree slp_node,
1885 VEC (tree, heap) **vec_oprnds,
1886 unsigned int op_num, unsigned int number_of_vectors,
1889 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1890 gimple stmt = VEC_index (gimple, stmts, 0);
1891 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1895 int j, number_of_places_left_in_vector;
1898 int group_size = VEC_length (gimple, stmts);
1899 unsigned int vec_num, i;
1900 int number_of_copies = 1;
1901 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1902 bool constant_p, is_store;
1903 tree neutral_op = NULL;
1904 enum tree_code code = gimple_assign_rhs_code (stmt);
1908 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1909 && reduc_index != -1)
1911 op_num = reduc_index - 1;
1912 op = gimple_op (stmt, reduc_index);
1913 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1914 we need either neutral operands or the original operands. See
1915 get_initial_def_for_reduction() for details. */
1918 case WIDEN_SUM_EXPR:
1924 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1925 neutral_op = build_real (TREE_TYPE (op), dconst0);
1927 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1932 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1933 neutral_op = build_real (TREE_TYPE (op), dconst1);
1935 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1940 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1945 def_stmt = SSA_NAME_DEF_STMT (op);
1946 loop = (gimple_bb (stmt))->loop_father;
1947 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1948 loop_preheader_edge (loop));
1956 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1959 op = gimple_assign_rhs1 (stmt);
1966 if (CONSTANT_CLASS_P (op))
1971 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1972 gcc_assert (vector_type);
1973 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1975 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1976 created vectors. It is greater than 1 if unrolling is performed.
1978 For example, we have two scalar operands, s1 and s2 (e.g., group of
1979 strided accesses of size two), while NUNITS is four (i.e., four scalars
1980 of this type can be packed in a vector). The output vector will contain
1981 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1984 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1985 containing the operands.
1987 For example, NUNITS is four as before, and the group size is 8
1988 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1989 {s5, s6, s7, s8}. */
1991 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1993 number_of_places_left_in_vector = nunits;
1994 for (j = 0; j < number_of_copies; j++)
1996 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1999 op = gimple_assign_rhs1 (stmt);
2001 op = gimple_op (stmt, op_num + 1);
2003 if (reduc_index != -1)
2005 loop = (gimple_bb (stmt))->loop_father;
2006 def_stmt = SSA_NAME_DEF_STMT (op);
2010 /* Get the def before the loop. In reduction chain we have only
2011 one initial value. */
2012 if ((j != (number_of_copies - 1)
2013 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2018 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2019 loop_preheader_edge (loop));
2022 /* Create 'vect_ = {op0,op1,...,opn}'. */
2023 t = tree_cons (NULL_TREE, op, t);
2025 number_of_places_left_in_vector--;
2027 if (number_of_places_left_in_vector == 0)
2029 number_of_places_left_in_vector = nunits;
2032 vec_cst = build_vector (vector_type, t);
2034 vec_cst = build_constructor_from_list (vector_type, t);
2035 VEC_quick_push (tree, voprnds,
2036 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2042 /* Since the vectors are created in the reverse order, we should invert
2044 vec_num = VEC_length (tree, voprnds);
2045 for (j = vec_num - 1; j >= 0; j--)
2047 vop = VEC_index (tree, voprnds, j);
2048 VEC_quick_push (tree, *vec_oprnds, vop);
2051 VEC_free (tree, heap, voprnds);
2053 /* In case that VF is greater than the unrolling factor needed for the SLP
2054 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2055 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2056 to replicate the vectors. */
2057 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2059 tree neutral_vec = NULL;
2064 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2066 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2070 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2071 VEC_quick_push (tree, *vec_oprnds, vop);
2077 /* Get vectorized definitions from SLP_NODE that contains corresponding
2078 vectorized def-stmts. */
2081 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2084 gimple vec_def_stmt;
2087 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2089 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2091 gcc_assert (vec_def_stmt);
2092 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2093 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2098 /* Get vectorized definitions for SLP_NODE.
2099 If the scalar definitions are loop invariants or constants, collect them and
2100 call vect_get_constant_vectors() to create vector stmts.
2101 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2102 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2103 vect_get_slp_vect_defs() to retrieve them.
2104 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2105 the right node. This is used when the second operand must remain scalar. */
2108 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2109 VEC (tree,heap) **vec_oprnds0,
2110 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2113 enum tree_code code;
2114 int number_of_vects;
2115 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2117 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2118 /* The number of vector defs is determined by the number of vector statements
2119 in the node from which we get those statements. */
2120 if (SLP_TREE_LEFT (slp_node))
2121 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2124 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2125 /* Number of vector stmts was calculated according to LHS in
2126 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2127 necessary. See vect_get_smallest_scalar_type () for details. */
2128 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2130 if (rhs_size_unit != lhs_size_unit)
2132 number_of_vects *= rhs_size_unit;
2133 number_of_vects /= lhs_size_unit;
2137 /* Allocate memory for vectorized defs. */
2138 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2140 /* SLP_NODE corresponds either to a group of stores or to a group of
2141 unary/binary operations. We don't call this function for loads.
2142 For reduction defs we call vect_get_constant_vectors(), since we are
2143 looking for initial loop invariant values. */
2144 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2145 /* The defs are already vectorized. */
2146 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2148 /* Build vectors from scalar defs. */
2149 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
2152 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2153 /* Since we don't call this function with loads, this is a group of
2157 /* For reductions, we only need initial values. */
2158 if (reduc_index != -1)
2161 code = gimple_assign_rhs_code (first_stmt);
2162 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
2165 /* The number of vector defs is determined by the number of vector statements
2166 in the node from which we get those statements. */
2167 if (SLP_TREE_RIGHT (slp_node))
2168 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2170 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2172 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2174 if (SLP_TREE_RIGHT (slp_node))
2175 /* The defs are already vectorized. */
2176 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2178 /* Build vectors from scalar defs. */
2179 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2184 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2185 building a vector of type MASK_TYPE from it) and two input vectors placed in
2186 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2187 shifting by STRIDE elements of DR_CHAIN for every copy.
2188 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2190 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2191 the created stmts must be inserted. */
2194 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2195 tree mask, int first_vec_indx, int second_vec_indx,
2196 gimple_stmt_iterator *gsi, slp_tree node,
2197 tree builtin_decl, tree vectype,
2198 VEC(tree,heap) *dr_chain,
2199 int ncopies, int vect_stmts_counter)
2202 gimple perm_stmt = NULL;
2203 stmt_vec_info next_stmt_info;
2205 tree first_vec, second_vec, data_ref;
2207 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2209 /* Initialize the vect stmts of NODE to properly insert the generated
2211 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2212 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2213 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2215 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2216 for (i = 0; i < ncopies; i++)
2218 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2219 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2221 /* Generate the permute statement. */
2222 perm_stmt = gimple_build_call (builtin_decl,
2223 3, first_vec, second_vec, mask);
2224 data_ref = make_ssa_name (perm_dest, perm_stmt);
2225 gimple_call_set_lhs (perm_stmt, data_ref);
2226 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2228 /* Store the vector statement in NODE. */
2229 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2230 stride * i + vect_stmts_counter, perm_stmt);
2232 first_vec_indx += stride;
2233 second_vec_indx += stride;
2236 /* Mark the scalar stmt as vectorized. */
2237 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2238 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2242 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2243 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2244 representation. Check that the mask is valid and return FALSE if not.
2245 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2246 the next vector, i.e., the current first vector is not needed. */
2249 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2250 int mask_nunits, bool only_one_vec, int index,
2251 int *mask, int *current_mask_element,
2252 bool *need_next_vector, int *number_of_mask_fixes,
2253 bool *mask_fixed, bool *needs_first_vector)
2257 /* Convert to target specific representation. */
2258 *current_mask_element = first_mask_element + m;
2259 /* Adjust the value in case it's a mask for second and third vectors. */
2260 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2262 if (*current_mask_element < mask_nunits)
2263 *needs_first_vector = true;
2265 /* We have only one input vector to permute but the mask accesses values in
2266 the next vector as well. */
2267 if (only_one_vec && *current_mask_element >= mask_nunits)
2269 if (vect_print_dump_info (REPORT_DETAILS))
2271 fprintf (vect_dump, "permutation requires at least two vectors ");
2272 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2278 /* The mask requires the next vector. */
2279 if (*current_mask_element >= mask_nunits * 2)
2281 if (*needs_first_vector || *mask_fixed)
2283 /* We either need the first vector too or have already moved to the
2284 next vector. In both cases, this permutation needs three
2286 if (vect_print_dump_info (REPORT_DETAILS))
2288 fprintf (vect_dump, "permutation requires at "
2289 "least three vectors ");
2290 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2296 /* We move to the next vector, dropping the first one and working with
2297 the second and the third - we need to adjust the values of the mask
2299 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2301 for (i = 0; i < index; i++)
2302 mask[i] -= mask_nunits * *number_of_mask_fixes;
2304 (*number_of_mask_fixes)++;
2308 *need_next_vector = *mask_fixed;
2310 /* This was the last element of this mask. Start a new one. */
2311 if (index == mask_nunits - 1)
2313 *number_of_mask_fixes = 1;
2314 *mask_fixed = false;
2315 *needs_first_vector = false;
2322 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2323 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2324 permute statements for SLP_NODE_INSTANCE. */
2326 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2327 gimple_stmt_iterator *gsi, int vf,
2328 slp_instance slp_node_instance, bool analyze_only)
2330 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2331 tree mask_element_type = NULL_TREE, mask_type;
2332 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2334 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2335 gimple next_scalar_stmt;
2336 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2337 int first_mask_element;
2338 int index, unroll_factor, *mask, current_mask_element, ncopies;
2339 bool only_one_vec = false, need_next_vector = false;
2340 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2341 int number_of_mask_fixes = 1;
2342 bool mask_fixed = false;
2343 bool needs_first_vector = false;
2345 if (!targetm.vectorize.builtin_vec_perm)
2347 if (vect_print_dump_info (REPORT_DETAILS))
2349 fprintf (vect_dump, "no builtin for vect permute for ");
2350 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2356 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2357 &mask_element_type);
2358 if (!builtin_decl || !mask_element_type)
2360 if (vect_print_dump_info (REPORT_DETAILS))
2362 fprintf (vect_dump, "no builtin for vect permute for ");
2363 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2369 mask_type = get_vectype_for_scalar_type (mask_element_type);
2370 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2371 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2372 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2373 scale = mask_nunits / nunits;
2374 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2376 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2377 unrolling factor. */
2378 orig_vec_stmts_num = group_size *
2379 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2380 if (orig_vec_stmts_num == 1)
2381 only_one_vec = true;
2383 /* Number of copies is determined by the final vectorization factor
2384 relatively to SLP_NODE_INSTANCE unrolling factor. */
2385 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2387 /* Generate permutation masks for every NODE. Number of masks for each NODE
2388 is equal to GROUP_SIZE.
2389 E.g., we have a group of three nodes with three loads from the same
2390 location in each node, and the vector size is 4. I.e., we have a
2391 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2392 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2393 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2396 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2397 scpecific type, e.g., in bytes for Altivec.
2398 The last mask is illegal since we assume two operands for permute
2399 operation, and the mask element values can't be outside that range.
2400 Hence, the last mask must be converted into {2,5,5,5}.
2401 For the first two permutations we need the first and the second input
2402 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2403 we need the second and the third vectors: {b1,c1,a2,b2} and
2406 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2410 vect_stmts_counter = 0;
2412 first_vec_index = vec_index++;
2414 second_vec_index = first_vec_index;
2416 second_vec_index = vec_index++;
2418 for (j = 0; j < unroll_factor; j++)
2420 for (k = 0; k < group_size; k++)
2422 first_mask_element = (i + j * group_size) * scale;
2423 for (m = 0; m < scale; m++)
2425 if (!vect_get_mask_element (stmt, first_mask_element, m,
2426 mask_nunits, only_one_vec, index, mask,
2427 ¤t_mask_element, &need_next_vector,
2428 &number_of_mask_fixes, &mask_fixed,
2429 &needs_first_vector))
2432 mask[index++] = current_mask_element;
2435 if (index == mask_nunits)
2437 tree mask_vec = NULL;
2439 while (--index >= 0)
2441 tree t = build_int_cst (mask_element_type, mask[index]);
2442 mask_vec = tree_cons (NULL, t, mask_vec);
2444 mask_vec = build_vector (mask_type, mask_vec);
2447 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2450 if (vect_print_dump_info (REPORT_DETAILS))
2452 fprintf (vect_dump, "unsupported vect permute ");
2453 print_generic_expr (vect_dump, mask_vec, 0);
2461 if (need_next_vector)
2463 first_vec_index = second_vec_index;
2464 second_vec_index = vec_index;
2467 next_scalar_stmt = VEC_index (gimple,
2468 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2470 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2471 mask_vec, first_vec_index, second_vec_index,
2472 gsi, node, builtin_decl, vectype, dr_chain,
2473 ncopies, vect_stmts_counter++);
2486 /* Vectorize SLP instance tree in postorder. */
2489 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2490 unsigned int vectorization_factor)
2493 bool strided_store, is_store;
2494 gimple_stmt_iterator si;
2495 stmt_vec_info stmt_info;
2496 unsigned int vec_stmts_size, nunits, group_size;
2499 slp_tree loads_node;
2504 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2505 vectorization_factor);
2506 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2507 vectorization_factor);
2509 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2510 stmt_info = vinfo_for_stmt (stmt);
2512 /* VECTYPE is the type of the destination. */
2513 vectype = STMT_VINFO_VECTYPE (stmt_info);
2514 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2515 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2517 /* For each SLP instance calculate number of vector stmts to be created
2518 for the scalar stmts in each node of the SLP tree. Number of vector
2519 elements in one vector iteration is the number of scalar elements in
2520 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2522 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2524 /* In case of load permutation we have to allocate vectorized statements for
2525 all the nodes that participate in that permutation. */
2526 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2528 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2530 if (!SLP_TREE_VEC_STMTS (loads_node))
2532 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2534 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2539 if (!SLP_TREE_VEC_STMTS (node))
2541 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2542 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2545 if (vect_print_dump_info (REPORT_DETAILS))
2547 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2548 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2551 /* Loads should be inserted before the first load. */
2552 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2553 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2554 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2555 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2556 else if (is_pattern_stmt_p (stmt_info))
2557 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2559 si = gsi_for_stmt (stmt);
2561 /* Stores should be inserted just before the last store. */
2562 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2563 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2565 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2566 si = gsi_for_stmt (last_store);
2569 /* Mark the first element of the reduction chain as reduction to properly
2570 transform the node. In the analysis phase only the last element of the
2571 chain is marked as reduction. */
2572 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2573 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2575 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2576 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2579 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2584 /* Generate vector code for all SLP instances in the loop/basic block. */
2587 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2589 VEC (slp_instance, heap) *slp_instances;
2590 slp_instance instance;
2592 bool is_store = false;
2596 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2597 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2601 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2605 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2607 /* Schedule the tree of INSTANCE. */
2608 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2610 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2611 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2612 fprintf (vect_dump, "vectorizing stmts using SLP.");
2615 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2617 slp_tree root = SLP_INSTANCE_TREE (instance);
2620 gimple_stmt_iterator gsi;
2622 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2623 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2625 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2628 /* Free the attached stmt_vec_info and remove the stmt. */
2629 gsi = gsi_for_stmt (store);
2630 gsi_remove (&gsi, true);
2631 free_stmt_vec_info (store);
2639 /* Vectorize the basic block. */
2642 vect_slp_transform_bb (basic_block bb)
2644 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2645 gimple_stmt_iterator si;
2647 gcc_assert (bb_vinfo);
2649 if (vect_print_dump_info (REPORT_DETAILS))
2650 fprintf (vect_dump, "SLPing BB\n");
2652 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2654 gimple stmt = gsi_stmt (si);
2655 stmt_vec_info stmt_info;
2657 if (vect_print_dump_info (REPORT_DETAILS))
2659 fprintf (vect_dump, "------>SLPing statement: ");
2660 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2663 stmt_info = vinfo_for_stmt (stmt);
2664 gcc_assert (stmt_info);
2666 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2667 if (STMT_SLP_TYPE (stmt_info))
2669 vect_schedule_slp (NULL, bb_vinfo);
2674 mark_sym_for_renaming (gimple_vop (cfun));
2675 /* The memory tags and pointers in vectorized statements need to
2676 have their SSA forms updated. FIXME, why can't this be delayed
2677 until all the loops have been transformed? */
2678 update_ssa (TODO_update_ssa);
2680 if (vect_print_dump_info (REPORT_DETAILS))
2681 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2683 destroy_bb_vec_info (bb_vinfo);