1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
46 find_bb_location (basic_block bb)
49 gimple_stmt_iterator si;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
68 vect_free_slp_tree (slp_tree node)
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
88 /* Free the memory allocated for the SLP instance. */
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
117 unsigned int i, number_of_oprnds;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
156 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
157 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
159 if (!*first_stmt_dt0)
163 if (i == 1 && !*first_stmt_dt1)
165 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
167 if (vect_print_dump_info (REPORT_DETAILS))
169 fprintf (vect_dump, "Build SLP failed: some of the stmts"
170 " are in a pattern, and others are not ");
171 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
178 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
179 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
181 if (*dt == vect_unknown_def_type)
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "Unsupported pattern.");
188 switch (gimple_code (def_stmt))
191 def = gimple_phi_result (def_stmt);
195 def = gimple_assign_lhs (def_stmt);
199 if (vect_print_dump_info (REPORT_DETAILS))
200 fprintf (vect_dump, "unsupported defining stmt: ");
205 if (!*first_stmt_dt0)
207 /* op0 of the first stmt of the group - store its info. */
208 *first_stmt_dt0 = dt[i];
210 *first_stmt_def0_type = TREE_TYPE (def);
212 *first_stmt_const_oprnd = oprnd;
214 /* Analyze costs (for the first stmt of the group only). */
215 if (rhs_class != GIMPLE_SINGLE_RHS)
216 /* Not memory operation (we don't call this functions for loads). */
217 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
220 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
226 if (!*first_stmt_dt1 && i == 1)
228 /* op1 of the first stmt of the group - store its info. */
229 *first_stmt_dt1 = dt[i];
231 *first_stmt_def1_type = TREE_TYPE (def);
234 /* We assume that the stmt contains only one constant
235 operand. We fail otherwise, to be on the safe side. */
236 if (*first_stmt_const_oprnd)
238 if (vect_print_dump_info (REPORT_SLP))
239 fprintf (vect_dump, "Build SLP failed: two constant "
243 *first_stmt_const_oprnd = oprnd;
248 /* Not first stmt of the group, check that the def-stmt/s match
249 the def-stmt/s of the first stmt. Allow different definition
250 types for reduction chains: the first stmt must be a
251 vect_reduction_def (a phi node), and the rest
252 vect_internal_def. */
254 && ((*first_stmt_dt0 != dt[i]
255 && !(*first_stmt_dt0 == vect_reduction_def
256 && dt[i] == vect_internal_def))
257 || (*first_stmt_def0_type && def
258 && !types_compatible_p (*first_stmt_def0_type,
261 && ((*first_stmt_dt1 != dt[i]
262 && !(*first_stmt_dt1 == vect_reduction_def
263 && dt[i] == vect_internal_def))
264 || (*first_stmt_def1_type && def
265 && !types_compatible_p (*first_stmt_def1_type,
268 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
271 if (vect_print_dump_info (REPORT_SLP))
272 fprintf (vect_dump, "Build SLP failed: different types ");
279 /* Check the types of the definitions. */
282 case vect_constant_def:
283 case vect_external_def:
286 case vect_internal_def:
287 case vect_reduction_def:
289 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
291 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
295 /* FORNOW: Not supported. */
296 if (vect_print_dump_info (REPORT_SLP))
298 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
299 print_generic_expr (vect_dump, def, TDF_SLIM);
310 /* Recursively build an SLP tree starting from NODE.
311 Fail (and return FALSE) if def-stmts are not isomorphic, require data
312 permutation or are of unsupported types of operation. Otherwise, return
316 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
317 slp_tree *node, unsigned int group_size,
318 int *inside_cost, int *outside_cost,
319 int ncopies_for_cost, unsigned int *max_nunits,
320 VEC (int, heap) **load_permutation,
321 VEC (slp_tree, heap) **loads,
322 unsigned int vectorization_factor)
324 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
325 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
327 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
328 gimple stmt = VEC_index (gimple, stmts, 0);
329 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
330 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
331 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
332 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
334 bool stop_recursion = false, need_same_oprnds = false;
335 tree vectype, scalar_type, first_op1 = NULL_TREE;
336 unsigned int ncopies;
339 enum machine_mode optab_op2_mode;
340 enum machine_mode vec_mode;
341 tree first_stmt_const_oprnd = NULL_TREE;
342 struct data_reference *first_dr;
343 bool pattern0 = false, pattern1 = false;
345 bool permutation = false;
346 unsigned int load_place;
347 gimple first_load, prev_first_load = NULL;
349 /* For every stmt in NODE find its def stmt/s. */
350 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
352 if (vect_print_dump_info (REPORT_SLP))
354 fprintf (vect_dump, "Build SLP for ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
358 /* Fail to vectorize statements marked as unvectorizable. */
359 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
361 if (vect_print_dump_info (REPORT_SLP))
364 "Build SLP failed: unvectorizable statement ");
365 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
371 lhs = gimple_get_lhs (stmt);
372 if (lhs == NULL_TREE)
374 if (vect_print_dump_info (REPORT_SLP))
377 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
378 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
384 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
385 vectype = get_vectype_for_scalar_type (scalar_type);
388 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
391 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
396 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
399 if (vect_print_dump_info (REPORT_SLP))
400 fprintf (vect_dump, "SLP with multiple types ");
402 /* FORNOW: multiple types are unsupported in BB SLP. */
407 /* In case of multiple types we need to detect the smallest type. */
408 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
409 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
411 if (is_gimple_call (stmt))
412 rhs_code = CALL_EXPR;
414 rhs_code = gimple_assign_rhs_code (stmt);
416 /* Check the operation. */
419 first_stmt_code = rhs_code;
421 /* Shift arguments should be equal in all the packed stmts for a
422 vector shift with scalar shift operand. */
423 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
424 || rhs_code == LROTATE_EXPR
425 || rhs_code == RROTATE_EXPR)
427 vec_mode = TYPE_MODE (vectype);
429 /* First see if we have a vector/vector shift. */
430 optab = optab_for_tree_code (rhs_code, vectype,
434 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
436 /* No vector/vector shift, try for a vector/scalar shift. */
437 optab = optab_for_tree_code (rhs_code, vectype,
442 if (vect_print_dump_info (REPORT_SLP))
443 fprintf (vect_dump, "Build SLP failed: no optab.");
446 icode = (int) optab_handler (optab, vec_mode);
447 if (icode == CODE_FOR_nothing)
449 if (vect_print_dump_info (REPORT_SLP))
450 fprintf (vect_dump, "Build SLP failed: "
451 "op not supported by target.");
454 optab_op2_mode = insn_data[icode].operand[2].mode;
455 if (!VECTOR_MODE_P (optab_op2_mode))
457 need_same_oprnds = true;
458 first_op1 = gimple_assign_rhs2 (stmt);
465 if (first_stmt_code != rhs_code
466 && (first_stmt_code != IMAGPART_EXPR
467 || rhs_code != REALPART_EXPR)
468 && (first_stmt_code != REALPART_EXPR
469 || rhs_code != IMAGPART_EXPR)
470 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
471 && (first_stmt_code == ARRAY_REF
472 || first_stmt_code == INDIRECT_REF
473 || first_stmt_code == COMPONENT_REF
474 || first_stmt_code == MEM_REF)))
476 if (vect_print_dump_info (REPORT_SLP))
479 "Build SLP failed: different operation in stmt ");
480 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
487 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
489 if (vect_print_dump_info (REPORT_SLP))
492 "Build SLP failed: different shift arguments in ");
493 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
500 /* Strided store or load. */
501 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
503 if (REFERENCE_CLASS_P (lhs))
506 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
507 stmt, &def_stmts0, &def_stmts1,
510 &first_stmt_def0_type,
511 &first_stmt_def1_type,
512 &first_stmt_const_oprnd,
514 &pattern0, &pattern1))
520 /* FORNOW: Check that there is no gap between the loads. */
521 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
522 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
523 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
524 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
526 if (vect_print_dump_info (REPORT_SLP))
528 fprintf (vect_dump, "Build SLP failed: strided "
530 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
536 /* Check that the size of interleaved loads group is not
537 greater than the SLP group size. */
538 if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
540 if (vect_print_dump_info (REPORT_SLP))
542 fprintf (vect_dump, "Build SLP failed: the number of "
543 "interleaved loads is greater than"
544 " the SLP group size ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
551 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
554 /* Check that there are no loads from different interleaving
555 chains in the same node. The only exception is complex
557 if (prev_first_load != first_load
558 && rhs_code != REALPART_EXPR
559 && rhs_code != IMAGPART_EXPR)
561 if (vect_print_dump_info (REPORT_SLP))
563 fprintf (vect_dump, "Build SLP failed: different "
564 "interleaving chains in one node ");
565 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 prev_first_load = first_load;
574 if (first_load == stmt)
576 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
577 if (vect_supportable_dr_alignment (first_dr, false)
578 == dr_unaligned_unsupported)
580 if (vect_print_dump_info (REPORT_SLP))
582 fprintf (vect_dump, "Build SLP failed: unsupported "
584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
590 /* Analyze costs (for the first stmt in the group). */
591 vect_model_load_cost (vinfo_for_stmt (stmt),
592 ncopies_for_cost, false, *node);
595 /* Store the place of this load in the interleaving chain. In
596 case that permutation is needed we later decide if a specific
597 permutation is supported. */
598 load_place = vect_get_place_in_interleaving_chain (stmt,
603 VEC_safe_push (int, heap, *load_permutation, load_place);
605 /* We stop the tree when we reach a group of loads. */
606 stop_recursion = true;
609 } /* Strided access. */
612 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
614 /* Not strided load. */
615 if (vect_print_dump_info (REPORT_SLP))
617 fprintf (vect_dump, "Build SLP failed: not strided load ");
618 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
621 /* FORNOW: Not strided loads are not supported. */
625 /* Not memory operation. */
626 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
627 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
629 if (vect_print_dump_info (REPORT_SLP))
631 fprintf (vect_dump, "Build SLP failed: operation");
632 fprintf (vect_dump, " unsupported ");
633 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
639 /* Find the def-stmts. */
640 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
641 &def_stmts0, &def_stmts1,
642 &first_stmt_dt0, &first_stmt_dt1,
643 &first_stmt_def0_type,
644 &first_stmt_def1_type,
645 &first_stmt_const_oprnd,
647 &pattern0, &pattern1))
652 /* Add the costs of the node to the overall instance costs. */
653 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
654 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
656 /* Strided loads were reached - stop the recursion. */
661 VEC_safe_push (slp_tree, heap, *loads, *node);
663 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
668 /* We don't check here complex numbers chains, so we keep them in
669 LOADS for further check in vect_supported_load_permutation_p. */
670 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
671 VEC_safe_push (slp_tree, heap, *loads, *node);
677 /* Create SLP_TREE nodes for the definition node/s. */
678 if (first_stmt_dt0 == vect_internal_def)
680 slp_tree left_node = XNEW (struct _slp_tree);
681 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
682 SLP_TREE_VEC_STMTS (left_node) = NULL;
683 SLP_TREE_LEFT (left_node) = NULL;
684 SLP_TREE_RIGHT (left_node) = NULL;
685 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
686 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
687 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
688 inside_cost, outside_cost, ncopies_for_cost,
689 max_nunits, load_permutation, loads,
690 vectorization_factor))
693 SLP_TREE_LEFT (*node) = left_node;
696 if (first_stmt_dt1 == vect_internal_def)
698 slp_tree right_node = XNEW (struct _slp_tree);
699 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
700 SLP_TREE_VEC_STMTS (right_node) = NULL;
701 SLP_TREE_LEFT (right_node) = NULL;
702 SLP_TREE_RIGHT (right_node) = NULL;
703 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
704 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
705 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
706 inside_cost, outside_cost, ncopies_for_cost,
707 max_nunits, load_permutation, loads,
708 vectorization_factor))
711 SLP_TREE_RIGHT (*node) = right_node;
719 vect_print_slp_tree (slp_tree node)
727 fprintf (vect_dump, "node ");
728 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
730 fprintf (vect_dump, "\n\tstmt %d ", i);
731 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
733 fprintf (vect_dump, "\n");
735 vect_print_slp_tree (SLP_TREE_LEFT (node));
736 vect_print_slp_tree (SLP_TREE_RIGHT (node));
740 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
741 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
742 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
743 stmts in NODE are to be marked. */
746 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
754 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
756 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
758 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
759 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
763 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
766 vect_mark_slp_stmts_relevant (slp_tree node)
770 stmt_vec_info stmt_info;
775 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
777 stmt_info = vinfo_for_stmt (stmt);
778 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
779 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
780 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
783 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
784 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
788 /* Check if the permutation required by the SLP INSTANCE is supported.
789 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
792 vect_supported_slp_permutation_p (slp_instance instance)
794 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
795 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
796 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
797 VEC (slp_tree, heap) *sorted_loads = NULL;
799 slp_tree *tmp_loads = NULL;
800 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
803 /* FORNOW: The only supported loads permutation is loads from the same
804 location in all the loads in the node, when the data-refs in
805 nodes of LOADS constitute an interleaving chain.
806 Sort the nodes according to the order of accesses in the chain. */
807 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
809 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
810 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
811 i += group_size, j++)
813 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
814 /* Check that the loads are all in the same interleaving chain. */
815 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
817 if (vect_print_dump_info (REPORT_DETAILS))
819 fprintf (vect_dump, "Build SLP failed: unsupported data "
821 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
828 tmp_loads[index] = load;
831 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
832 for (i = 0; i < group_size; i++)
833 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
835 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
836 SLP_INSTANCE_LOADS (instance) = sorted_loads;
839 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
840 SLP_INSTANCE_UNROLLING_FACTOR (instance),
848 /* Rearrange the statements of NODE according to PERMUTATION. */
851 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
852 VEC (int, heap) *permutation)
855 VEC (gimple, heap) *tmp_stmts;
856 unsigned int index, i;
861 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
862 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
864 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
865 tmp_stmts = VEC_alloc (gimple, heap, group_size);
867 for (i = 0; i < group_size; i++)
868 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
870 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
872 index = VEC_index (int, permutation, i);
873 VEC_replace (gimple, tmp_stmts, index, stmt);
876 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
877 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
881 /* Check if the required load permutation is supported.
882 LOAD_PERMUTATION contains a list of indices of the loads.
883 In SLP this permutation is relative to the order of strided stores that are
884 the base of the SLP instance. */
887 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
888 VEC (int, heap) *load_permutation)
890 int i = 0, j, prev = -1, next, k, number_of_groups;
891 bool supported, bad_permutation = false;
893 slp_tree node, other_complex_node;
894 gimple stmt, first = NULL, other_node_first;
895 unsigned complex_numbers = 0;
897 /* FORNOW: permutations are only supported in SLP. */
901 if (vect_print_dump_info (REPORT_SLP))
903 fprintf (vect_dump, "Load permutation ");
904 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
905 fprintf (vect_dump, "%d ", next);
908 /* In case of reduction every load permutation is allowed, since the order
909 of the reduction statements is not important (as opposed to the case of
910 strided stores). The only condition we need to check is that all the
911 load nodes are of the same size and have the same permutation (and then
912 rearrange all the nodes of the SLP instance according to this
915 /* Check that all the load nodes are of the same size. */
916 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
918 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
919 != (unsigned) group_size)
922 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
923 if (is_gimple_assign (stmt)
924 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
925 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
929 /* Complex operands can be swapped as following:
930 real_c = real_b + real_a;
931 imag_c = imag_a + imag_b;
932 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
933 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
934 chains are mixed, they match the above pattern. */
937 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
939 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
945 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
947 if (complex_numbers != 2)
955 other_complex_node = VEC_index (slp_tree,
956 SLP_INSTANCE_LOADS (slp_instn), k);
957 other_node_first = VEC_index (gimple,
958 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
960 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
969 /* We checked that this case ok, so there is no need to proceed with
970 permutation tests. */
971 if (complex_numbers == 2)
973 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
974 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
978 node = SLP_INSTANCE_TREE (slp_instn);
979 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
980 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
981 instance, not all the loads belong to the same node or interleaving
982 group. Hence, we need to divide them into groups according to
984 number_of_groups = VEC_length (int, load_permutation) / group_size;
986 /* Reduction (there are no data-refs in the root).
987 In reduction chain the order of the loads is important. */
988 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
989 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
991 int first_group_load_index;
993 /* Compare all the permutation sequences to the first one. */
994 for (i = 1; i < number_of_groups; i++)
997 for (j = i * group_size; j < i * group_size + group_size; j++)
999 next = VEC_index (int, load_permutation, j);
1000 first_group_load_index = VEC_index (int, load_permutation, k);
1002 if (next != first_group_load_index)
1004 bad_permutation = true;
1011 if (bad_permutation)
1015 if (!bad_permutation)
1017 /* Check that the loads in the first sequence are different and there
1018 are no gaps between them. */
1019 load_index = sbitmap_alloc (group_size);
1020 sbitmap_zero (load_index);
1021 for (k = 0; k < group_size; k++)
1023 first_group_load_index = VEC_index (int, load_permutation, k);
1024 if (TEST_BIT (load_index, first_group_load_index))
1026 bad_permutation = true;
1030 SET_BIT (load_index, first_group_load_index);
1033 if (!bad_permutation)
1034 for (k = 0; k < group_size; k++)
1035 if (!TEST_BIT (load_index, k))
1037 bad_permutation = true;
1041 sbitmap_free (load_index);
1044 if (!bad_permutation)
1046 /* This permutation is valid for reduction. Since the order of the
1047 statements in the nodes is not important unless they are memory
1048 accesses, we can rearrange the statements in all the nodes
1049 according to the order of the loads. */
1050 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1052 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1057 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1058 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1059 well (unless it's reduction). */
1060 if (VEC_length (int, load_permutation)
1061 != (unsigned int) (group_size * group_size))
1065 load_index = sbitmap_alloc (group_size);
1066 sbitmap_zero (load_index);
1067 for (j = 0; j < group_size; j++)
1069 for (i = j * group_size, k = 0;
1070 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1073 if (i != j * group_size && next != prev)
1082 if (TEST_BIT (load_index, prev))
1088 SET_BIT (load_index, prev);
1091 for (j = 0; j < group_size; j++)
1092 if (!TEST_BIT (load_index, j))
1095 sbitmap_free (load_index);
1097 if (supported && i == group_size * group_size
1098 && vect_supported_slp_permutation_p (slp_instn))
1105 /* Find the first load in the loop that belongs to INSTANCE.
1106 When loads are in several SLP nodes, there can be a case in which the first
1107 load does not appear in the first SLP node to be transformed, causing
1108 incorrect order of statements. Since we generate all the loads together,
1109 they must be inserted before the first load of the SLP instance and not
1110 before the first load of the first node of the instance. */
1113 vect_find_first_load_in_slp_instance (slp_instance instance)
1117 gimple first_load = NULL, load;
1119 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1120 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1121 first_load = get_earlier_stmt (load, first_load);
1127 /* Find the last store in SLP INSTANCE. */
1130 vect_find_last_store_in_slp_instance (slp_instance instance)
1134 gimple last_store = NULL, store;
1136 node = SLP_INSTANCE_TREE (instance);
1138 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1140 last_store = get_later_stmt (store, last_store);
1146 /* Analyze an SLP instance starting from a group of strided stores. Call
1147 vect_build_slp_tree to build a tree of packed stmts if possible.
1148 Return FALSE if it's impossible to SLP any stmt in the loop. */
1151 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1154 slp_instance new_instance;
1155 slp_tree node = XNEW (struct _slp_tree);
1156 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1157 unsigned int unrolling_factor = 1, nunits;
1158 tree vectype, scalar_type = NULL_TREE;
1160 unsigned int vectorization_factor = 0;
1161 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1162 unsigned int max_nunits = 0;
1163 VEC (int, heap) *load_permutation;
1164 VEC (slp_tree, heap) *loads;
1165 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1167 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1171 scalar_type = TREE_TYPE (DR_REF (dr));
1172 vectype = get_vectype_for_scalar_type (scalar_type);
1176 gcc_assert (loop_vinfo);
1177 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1180 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1184 gcc_assert (loop_vinfo);
1185 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1186 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1191 if (vect_print_dump_info (REPORT_SLP))
1193 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1194 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1200 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1202 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1204 /* No multitypes in BB SLP. */
1205 vectorization_factor = nunits;
1207 /* Calculate the unrolling factor. */
1208 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1209 if (unrolling_factor != 1 && !loop_vinfo)
1211 if (vect_print_dump_info (REPORT_SLP))
1212 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1218 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1219 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1221 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1223 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1226 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1227 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1232 /* Collect reduction statements. */
1233 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1236 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1239 SLP_TREE_VEC_STMTS (node) = NULL;
1240 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1241 SLP_TREE_LEFT (node) = NULL;
1242 SLP_TREE_RIGHT (node) = NULL;
1243 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1244 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1246 /* Calculate the number of vector stmts to create based on the unrolling
1247 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1248 GROUP_SIZE / NUNITS otherwise. */
1249 ncopies_for_cost = unrolling_factor * group_size / nunits;
1251 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1252 loads = VEC_alloc (slp_tree, heap, group_size);
1254 /* Build the tree for the SLP instance. */
1255 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1256 &inside_cost, &outside_cost, ncopies_for_cost,
1257 &max_nunits, &load_permutation, &loads,
1258 vectorization_factor))
1260 /* Create a new SLP instance. */
1261 new_instance = XNEW (struct _slp_instance);
1262 SLP_INSTANCE_TREE (new_instance) = node;
1263 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1264 /* Calculate the unrolling factor based on the smallest type in the
1266 if (max_nunits > nunits)
1267 unrolling_factor = least_common_multiple (max_nunits, group_size)
1270 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1271 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1272 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1273 SLP_INSTANCE_LOADS (new_instance) = loads;
1274 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1275 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1276 if (VEC_length (slp_tree, loads))
1278 if (!vect_supported_load_permutation_p (new_instance, group_size,
1281 if (vect_print_dump_info (REPORT_SLP))
1283 fprintf (vect_dump, "Build SLP failed: unsupported load "
1285 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1288 vect_free_slp_instance (new_instance);
1292 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1293 = vect_find_first_load_in_slp_instance (new_instance);
1296 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1299 VEC_safe_push (slp_instance, heap,
1300 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1303 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1306 if (vect_print_dump_info (REPORT_SLP))
1307 vect_print_slp_tree (node);
1312 /* Failed to SLP. */
1313 /* Free the allocated memory. */
1314 vect_free_slp_tree (node);
1315 VEC_free (int, heap, load_permutation);
1316 VEC_free (slp_tree, heap, loads);
1322 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1323 trees of packed scalar stmts if SLP is possible. */
1326 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1329 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1330 gimple first_element;
1333 if (vect_print_dump_info (REPORT_SLP))
1334 fprintf (vect_dump, "=== vect_analyze_slp ===");
1338 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1339 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1340 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1343 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1345 /* Find SLP sequences starting from groups of strided stores. */
1346 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1347 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1350 if (bb_vinfo && !ok)
1352 if (vect_print_dump_info (REPORT_SLP))
1353 fprintf (vect_dump, "Failed to SLP the basic block.");
1359 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1361 /* Find SLP sequences starting from reduction chains. */
1362 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1363 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1368 /* Don't try to vectorize SLP reductions if reduction chain was
1373 /* Find SLP sequences starting from groups of reductions. */
1374 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1375 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1376 VEC_index (gimple, reductions, 0)))
1383 /* For each possible SLP instance decide whether to SLP it and calculate overall
1384 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1385 least one instance. */
1388 vect_make_slp_decision (loop_vec_info loop_vinfo)
1390 unsigned int i, unrolling_factor = 1;
1391 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1392 slp_instance instance;
1393 int decided_to_slp = 0;
1395 if (vect_print_dump_info (REPORT_SLP))
1396 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1398 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1400 /* FORNOW: SLP if you can. */
1401 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1402 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1404 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1405 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1406 loop-based vectorization. Such stmts will be marked as HYBRID. */
1407 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1411 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1413 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1414 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1415 decided_to_slp, unrolling_factor);
1417 return (decided_to_slp > 0);
1421 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1422 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1425 vect_detect_hybrid_slp_stmts (slp_tree node)
1429 imm_use_iterator imm_iter;
1431 stmt_vec_info stmt_vinfo;
1436 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1437 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1438 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1439 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1440 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1441 && !STMT_SLP_TYPE (stmt_vinfo)
1442 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1443 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1444 && !(gimple_code (use_stmt) == GIMPLE_PHI
1445 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1446 == vect_reduction_def))
1447 vect_mark_slp_stmts (node, hybrid, i);
1449 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1450 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1454 /* Find stmts that must be both vectorized and SLPed. */
1457 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1460 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1461 slp_instance instance;
1463 if (vect_print_dump_info (REPORT_SLP))
1464 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1466 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1467 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1471 /* Create and initialize a new bb_vec_info struct for BB, as well as
1472 stmt_vec_info structs for all the stmts in it. */
1475 new_bb_vec_info (basic_block bb)
1477 bb_vec_info res = NULL;
1478 gimple_stmt_iterator gsi;
1480 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1481 BB_VINFO_BB (res) = bb;
1483 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1485 gimple stmt = gsi_stmt (gsi);
1486 gimple_set_uid (stmt, 0);
1487 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1490 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1491 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1498 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1499 stmts in the basic block. */
1502 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1505 gimple_stmt_iterator si;
1510 bb = BB_VINFO_BB (bb_vinfo);
1512 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1514 gimple stmt = gsi_stmt (si);
1515 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1518 /* Free stmt_vec_info. */
1519 free_stmt_vec_info (stmt);
1522 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1523 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1524 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1525 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1531 /* Analyze statements contained in SLP tree node after recursively analyzing
1532 the subtree. Return TRUE if the operations are supported. */
1535 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1544 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1545 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1548 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1551 gcc_assert (stmt_info);
1552 gcc_assert (PURE_SLP_STMT (stmt_info));
1554 if (!vect_analyze_stmt (stmt, &dummy, node))
1562 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1563 operations are supported. */
1566 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1568 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1569 slp_instance instance;
1572 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1574 if (!vect_slp_analyze_node_operations (bb_vinfo,
1575 SLP_INSTANCE_TREE (instance)))
1577 vect_free_slp_instance (instance);
1578 VEC_ordered_remove (slp_instance, slp_instances, i);
1584 if (!VEC_length (slp_instance, slp_instances))
1590 /* Check if loads and stores are mixed in the basic block (in that
1591 case if we are not sure that the accesses differ, we can't vectorize the
1592 basic block). Also return FALSE in case that there is statement marked as
1593 not vectorizable. */
1596 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1598 basic_block bb = BB_VINFO_BB (bb_vinfo);
1599 gimple_stmt_iterator si;
1600 bool detected_store = false;
1602 struct data_reference *dr;
1604 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1606 stmt = gsi_stmt (si);
1608 /* We can't allow not analyzed statements, since they may contain data
1610 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1613 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1616 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1617 if (DR_IS_READ (dr) && detected_store)
1620 if (!DR_IS_READ (dr))
1621 detected_store = true;
1627 /* Check if vectorization of the basic block is profitable. */
1630 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1632 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1633 slp_instance instance;
1635 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1636 unsigned int stmt_cost;
1638 gimple_stmt_iterator si;
1639 basic_block bb = BB_VINFO_BB (bb_vinfo);
1640 stmt_vec_info stmt_info = NULL;
1641 tree dummy_type = NULL;
1644 /* Calculate vector costs. */
1645 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1647 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1648 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1651 /* Calculate scalar cost. */
1652 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1654 stmt = gsi_stmt (si);
1655 stmt_info = vinfo_for_stmt (stmt);
1657 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1658 || !PURE_SLP_STMT (stmt_info))
1661 if (STMT_VINFO_DATA_REF (stmt_info))
1663 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1664 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1665 (scalar_load, dummy_type, dummy);
1667 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1668 (scalar_store, dummy_type, dummy);
1671 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1672 (scalar_stmt, dummy_type, dummy);
1674 scalar_cost += stmt_cost;
1677 if (vect_print_dump_info (REPORT_COST))
1679 fprintf (vect_dump, "Cost model analysis: \n");
1680 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1682 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1684 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1687 /* Vectorization is profitable if its cost is less than the cost of scalar
1689 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1695 /* Check if the basic block can be vectorized. */
1698 vect_slp_analyze_bb_1 (basic_block bb)
1700 bb_vec_info bb_vinfo;
1701 VEC (ddr_p, heap) *ddrs;
1702 VEC (slp_instance, heap) *slp_instances;
1703 slp_instance instance;
1706 int max_vf = MAX_VECTORIZATION_FACTOR;
1707 bool data_dependence_in_bb = false;
1709 bb_vinfo = new_bb_vec_info (bb);
1713 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1715 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1716 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1719 destroy_bb_vec_info (bb_vinfo);
1723 ddrs = BB_VINFO_DDRS (bb_vinfo);
1724 if (!VEC_length (ddr_p, ddrs))
1726 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1727 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1730 destroy_bb_vec_info (bb_vinfo);
1734 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1735 &data_dependence_in_bb)
1737 || (data_dependence_in_bb
1738 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1740 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1741 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1742 "in basic block.\n");
1744 destroy_bb_vec_info (bb_vinfo);
1748 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1750 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1751 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1754 destroy_bb_vec_info (bb_vinfo);
1758 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1760 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1761 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1764 destroy_bb_vec_info (bb_vinfo);
1768 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1770 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1771 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1774 destroy_bb_vec_info (bb_vinfo);
1778 /* Check the SLP opportunities in the basic block, analyze and build SLP
1780 if (!vect_analyze_slp (NULL, bb_vinfo))
1782 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1783 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1784 "in basic block.\n");
1786 destroy_bb_vec_info (bb_vinfo);
1790 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1792 /* Mark all the statements that we want to vectorize as pure SLP and
1794 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1796 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1797 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1800 if (!vect_slp_analyze_operations (bb_vinfo))
1802 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1803 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1805 destroy_bb_vec_info (bb_vinfo);
1809 /* Cost model: check if the vectorization is worthwhile. */
1810 if (flag_vect_cost_model
1811 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1813 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1814 fprintf (vect_dump, "not vectorized: vectorization is not "
1817 destroy_bb_vec_info (bb_vinfo);
1821 if (vect_print_dump_info (REPORT_DETAILS))
1822 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1829 vect_slp_analyze_bb (basic_block bb)
1831 bb_vec_info bb_vinfo;
1833 gimple_stmt_iterator gsi;
1834 unsigned int vector_sizes;
1836 if (vect_print_dump_info (REPORT_DETAILS))
1837 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1839 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1841 gimple stmt = gsi_stmt (gsi);
1842 if (!is_gimple_debug (stmt)
1843 && !gimple_nop_p (stmt)
1844 && gimple_code (stmt) != GIMPLE_LABEL)
1848 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1850 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1851 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1857 /* Autodetect first vector size we try. */
1858 current_vector_size = 0;
1859 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1863 bb_vinfo = vect_slp_analyze_bb_1 (bb);
1867 destroy_bb_vec_info (bb_vinfo);
1869 vector_sizes &= ~current_vector_size;
1870 if (vector_sizes == 0
1871 || current_vector_size == 0)
1874 /* Try the next biggest vector size. */
1875 current_vector_size = 1 << floor_log2 (vector_sizes);
1876 if (vect_print_dump_info (REPORT_DETAILS))
1877 fprintf (vect_dump, "***** Re-trying analysis with "
1878 "vector size %d\n", current_vector_size);
1883 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1884 the number of created vector stmts depends on the unrolling factor).
1885 However, the actual number of vector stmts for every SLP node depends on
1886 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1887 should be updated. In this function we assume that the inside costs
1888 calculated in vect_model_xxx_cost are linear in ncopies. */
1891 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1893 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1894 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1895 slp_instance instance;
1897 if (vect_print_dump_info (REPORT_SLP))
1898 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1900 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1901 /* We assume that costs are linear in ncopies. */
1902 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1903 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1907 /* For constant and loop invariant defs of SLP_NODE this function returns
1908 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1909 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1910 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1911 REDUC_INDEX is the index of the reduction operand in the statements, unless
1915 vect_get_constant_vectors (tree op, slp_tree slp_node,
1916 VEC (tree, heap) **vec_oprnds,
1917 unsigned int op_num, unsigned int number_of_vectors,
1920 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1921 gimple stmt = VEC_index (gimple, stmts, 0);
1922 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1926 int j, number_of_places_left_in_vector;
1929 int group_size = VEC_length (gimple, stmts);
1930 unsigned int vec_num, i;
1931 int number_of_copies = 1;
1932 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1933 bool constant_p, is_store;
1934 tree neutral_op = NULL;
1935 enum tree_code code = gimple_assign_rhs_code (stmt);
1939 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1940 && reduc_index != -1)
1942 op_num = reduc_index - 1;
1943 op = gimple_op (stmt, reduc_index);
1944 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1945 we need either neutral operands or the original operands. See
1946 get_initial_def_for_reduction() for details. */
1949 case WIDEN_SUM_EXPR:
1955 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1956 neutral_op = build_real (TREE_TYPE (op), dconst0);
1958 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1963 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1964 neutral_op = build_real (TREE_TYPE (op), dconst1);
1966 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1971 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1976 def_stmt = SSA_NAME_DEF_STMT (op);
1977 loop = (gimple_bb (stmt))->loop_father;
1978 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1979 loop_preheader_edge (loop));
1987 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1990 op = gimple_assign_rhs1 (stmt);
1997 if (CONSTANT_CLASS_P (op))
2002 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2003 gcc_assert (vector_type);
2004 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2006 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2007 created vectors. It is greater than 1 if unrolling is performed.
2009 For example, we have two scalar operands, s1 and s2 (e.g., group of
2010 strided accesses of size two), while NUNITS is four (i.e., four scalars
2011 of this type can be packed in a vector). The output vector will contain
2012 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2015 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2016 containing the operands.
2018 For example, NUNITS is four as before, and the group size is 8
2019 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2020 {s5, s6, s7, s8}. */
2022 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2024 number_of_places_left_in_vector = nunits;
2025 for (j = 0; j < number_of_copies; j++)
2027 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2030 op = gimple_assign_rhs1 (stmt);
2032 op = gimple_op (stmt, op_num + 1);
2034 if (reduc_index != -1)
2036 loop = (gimple_bb (stmt))->loop_father;
2037 def_stmt = SSA_NAME_DEF_STMT (op);
2041 /* Get the def before the loop. In reduction chain we have only
2042 one initial value. */
2043 if ((j != (number_of_copies - 1)
2044 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2049 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2050 loop_preheader_edge (loop));
2053 /* Create 'vect_ = {op0,op1,...,opn}'. */
2054 t = tree_cons (NULL_TREE, op, t);
2056 number_of_places_left_in_vector--;
2058 if (number_of_places_left_in_vector == 0)
2060 number_of_places_left_in_vector = nunits;
2063 vec_cst = build_vector (vector_type, t);
2065 vec_cst = build_constructor_from_list (vector_type, t);
2066 VEC_quick_push (tree, voprnds,
2067 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2073 /* Since the vectors are created in the reverse order, we should invert
2075 vec_num = VEC_length (tree, voprnds);
2076 for (j = vec_num - 1; j >= 0; j--)
2078 vop = VEC_index (tree, voprnds, j);
2079 VEC_quick_push (tree, *vec_oprnds, vop);
2082 VEC_free (tree, heap, voprnds);
2084 /* In case that VF is greater than the unrolling factor needed for the SLP
2085 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2086 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2087 to replicate the vectors. */
2088 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2090 tree neutral_vec = NULL;
2095 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2097 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2101 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2102 VEC_quick_push (tree, *vec_oprnds, vop);
2108 /* Get vectorized definitions from SLP_NODE that contains corresponding
2109 vectorized def-stmts. */
2112 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2115 gimple vec_def_stmt;
2118 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2120 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2122 gcc_assert (vec_def_stmt);
2123 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2124 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2129 /* Get vectorized definitions for SLP_NODE.
2130 If the scalar definitions are loop invariants or constants, collect them and
2131 call vect_get_constant_vectors() to create vector stmts.
2132 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2133 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2134 vect_get_slp_vect_defs() to retrieve them.
2135 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2136 the right node. This is used when the second operand must remain scalar. */
2139 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2140 VEC (tree,heap) **vec_oprnds0,
2141 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2144 enum tree_code code;
2145 int number_of_vects;
2146 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2148 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2149 /* The number of vector defs is determined by the number of vector statements
2150 in the node from which we get those statements. */
2151 if (SLP_TREE_LEFT (slp_node))
2152 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2155 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2156 /* Number of vector stmts was calculated according to LHS in
2157 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2158 necessary. See vect_get_smallest_scalar_type () for details. */
2159 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2161 if (rhs_size_unit != lhs_size_unit)
2163 number_of_vects *= rhs_size_unit;
2164 number_of_vects /= lhs_size_unit;
2168 /* Allocate memory for vectorized defs. */
2169 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2171 /* SLP_NODE corresponds either to a group of stores or to a group of
2172 unary/binary operations. We don't call this function for loads.
2173 For reduction defs we call vect_get_constant_vectors(), since we are
2174 looking for initial loop invariant values. */
2175 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2176 /* The defs are already vectorized. */
2177 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2179 /* Build vectors from scalar defs. */
2180 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
2183 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2184 /* Since we don't call this function with loads, this is a group of
2188 /* For reductions, we only need initial values. */
2189 if (reduc_index != -1)
2192 code = gimple_assign_rhs_code (first_stmt);
2193 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
2196 /* The number of vector defs is determined by the number of vector statements
2197 in the node from which we get those statements. */
2198 if (SLP_TREE_RIGHT (slp_node))
2199 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2201 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2203 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2205 if (SLP_TREE_RIGHT (slp_node))
2206 /* The defs are already vectorized. */
2207 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2209 /* Build vectors from scalar defs. */
2210 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2215 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2216 building a vector of type MASK_TYPE from it) and two input vectors placed in
2217 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2218 shifting by STRIDE elements of DR_CHAIN for every copy.
2219 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2221 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2222 the created stmts must be inserted. */
2225 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2226 tree mask, int first_vec_indx, int second_vec_indx,
2227 gimple_stmt_iterator *gsi, slp_tree node,
2228 tree builtin_decl, tree vectype,
2229 VEC(tree,heap) *dr_chain,
2230 int ncopies, int vect_stmts_counter)
2233 gimple perm_stmt = NULL;
2234 stmt_vec_info next_stmt_info;
2236 tree first_vec, second_vec, data_ref;
2238 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2240 /* Initialize the vect stmts of NODE to properly insert the generated
2242 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2243 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2244 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2246 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2247 for (i = 0; i < ncopies; i++)
2249 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2250 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2252 /* Generate the permute statement. */
2253 perm_stmt = gimple_build_call (builtin_decl,
2254 3, first_vec, second_vec, mask);
2255 data_ref = make_ssa_name (perm_dest, perm_stmt);
2256 gimple_call_set_lhs (perm_stmt, data_ref);
2257 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2259 /* Store the vector statement in NODE. */
2260 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2261 stride * i + vect_stmts_counter, perm_stmt);
2263 first_vec_indx += stride;
2264 second_vec_indx += stride;
2267 /* Mark the scalar stmt as vectorized. */
2268 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2269 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2273 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2274 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2275 representation. Check that the mask is valid and return FALSE if not.
2276 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2277 the next vector, i.e., the current first vector is not needed. */
2280 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2281 int mask_nunits, bool only_one_vec, int index,
2282 int *mask, int *current_mask_element,
2283 bool *need_next_vector, int *number_of_mask_fixes,
2284 bool *mask_fixed, bool *needs_first_vector)
2288 /* Convert to target specific representation. */
2289 *current_mask_element = first_mask_element + m;
2290 /* Adjust the value in case it's a mask for second and third vectors. */
2291 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2293 if (*current_mask_element < mask_nunits)
2294 *needs_first_vector = true;
2296 /* We have only one input vector to permute but the mask accesses values in
2297 the next vector as well. */
2298 if (only_one_vec && *current_mask_element >= mask_nunits)
2300 if (vect_print_dump_info (REPORT_DETAILS))
2302 fprintf (vect_dump, "permutation requires at least two vectors ");
2303 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2309 /* The mask requires the next vector. */
2310 if (*current_mask_element >= mask_nunits * 2)
2312 if (*needs_first_vector || *mask_fixed)
2314 /* We either need the first vector too or have already moved to the
2315 next vector. In both cases, this permutation needs three
2317 if (vect_print_dump_info (REPORT_DETAILS))
2319 fprintf (vect_dump, "permutation requires at "
2320 "least three vectors ");
2321 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2327 /* We move to the next vector, dropping the first one and working with
2328 the second and the third - we need to adjust the values of the mask
2330 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2332 for (i = 0; i < index; i++)
2333 mask[i] -= mask_nunits * *number_of_mask_fixes;
2335 (*number_of_mask_fixes)++;
2339 *need_next_vector = *mask_fixed;
2341 /* This was the last element of this mask. Start a new one. */
2342 if (index == mask_nunits - 1)
2344 *number_of_mask_fixes = 1;
2345 *mask_fixed = false;
2346 *needs_first_vector = false;
2353 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2354 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2355 permute statements for SLP_NODE_INSTANCE. */
2357 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2358 gimple_stmt_iterator *gsi, int vf,
2359 slp_instance slp_node_instance, bool analyze_only)
2361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2362 tree mask_element_type = NULL_TREE, mask_type;
2363 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2365 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2366 gimple next_scalar_stmt;
2367 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2368 int first_mask_element;
2369 int index, unroll_factor, *mask, current_mask_element, ncopies;
2370 bool only_one_vec = false, need_next_vector = false;
2371 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2372 int number_of_mask_fixes = 1;
2373 bool mask_fixed = false;
2374 bool needs_first_vector = false;
2376 if (!targetm.vectorize.builtin_vec_perm)
2378 if (vect_print_dump_info (REPORT_DETAILS))
2380 fprintf (vect_dump, "no builtin for vect permute for ");
2381 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2387 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2388 &mask_element_type);
2389 if (!builtin_decl || !mask_element_type)
2391 if (vect_print_dump_info (REPORT_DETAILS))
2393 fprintf (vect_dump, "no builtin for vect permute for ");
2394 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2400 mask_type = get_vectype_for_scalar_type (mask_element_type);
2401 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2402 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2403 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2404 scale = mask_nunits / nunits;
2405 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2407 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2408 unrolling factor. */
2409 orig_vec_stmts_num = group_size *
2410 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2411 if (orig_vec_stmts_num == 1)
2412 only_one_vec = true;
2414 /* Number of copies is determined by the final vectorization factor
2415 relatively to SLP_NODE_INSTANCE unrolling factor. */
2416 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2418 /* Generate permutation masks for every NODE. Number of masks for each NODE
2419 is equal to GROUP_SIZE.
2420 E.g., we have a group of three nodes with three loads from the same
2421 location in each node, and the vector size is 4. I.e., we have a
2422 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2423 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2424 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2427 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2428 scpecific type, e.g., in bytes for Altivec.
2429 The last mask is illegal since we assume two operands for permute
2430 operation, and the mask element values can't be outside that range.
2431 Hence, the last mask must be converted into {2,5,5,5}.
2432 For the first two permutations we need the first and the second input
2433 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2434 we need the second and the third vectors: {b1,c1,a2,b2} and
2437 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2441 vect_stmts_counter = 0;
2443 first_vec_index = vec_index++;
2445 second_vec_index = first_vec_index;
2447 second_vec_index = vec_index++;
2449 for (j = 0; j < unroll_factor; j++)
2451 for (k = 0; k < group_size; k++)
2453 first_mask_element = (i + j * group_size) * scale;
2454 for (m = 0; m < scale; m++)
2456 if (!vect_get_mask_element (stmt, first_mask_element, m,
2457 mask_nunits, only_one_vec, index, mask,
2458 ¤t_mask_element, &need_next_vector,
2459 &number_of_mask_fixes, &mask_fixed,
2460 &needs_first_vector))
2463 mask[index++] = current_mask_element;
2466 if (index == mask_nunits)
2468 tree mask_vec = NULL;
2470 while (--index >= 0)
2472 tree t = build_int_cst (mask_element_type, mask[index]);
2473 mask_vec = tree_cons (NULL, t, mask_vec);
2475 mask_vec = build_vector (mask_type, mask_vec);
2478 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2481 if (vect_print_dump_info (REPORT_DETAILS))
2483 fprintf (vect_dump, "unsupported vect permute ");
2484 print_generic_expr (vect_dump, mask_vec, 0);
2492 if (need_next_vector)
2494 first_vec_index = second_vec_index;
2495 second_vec_index = vec_index;
2498 next_scalar_stmt = VEC_index (gimple,
2499 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2501 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2502 mask_vec, first_vec_index, second_vec_index,
2503 gsi, node, builtin_decl, vectype, dr_chain,
2504 ncopies, vect_stmts_counter++);
2517 /* Vectorize SLP instance tree in postorder. */
2520 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2521 unsigned int vectorization_factor)
2524 bool strided_store, is_store;
2525 gimple_stmt_iterator si;
2526 stmt_vec_info stmt_info;
2527 unsigned int vec_stmts_size, nunits, group_size;
2530 slp_tree loads_node;
2535 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2536 vectorization_factor);
2537 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2538 vectorization_factor);
2540 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2541 stmt_info = vinfo_for_stmt (stmt);
2543 /* VECTYPE is the type of the destination. */
2544 vectype = STMT_VINFO_VECTYPE (stmt_info);
2545 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2546 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2548 /* For each SLP instance calculate number of vector stmts to be created
2549 for the scalar stmts in each node of the SLP tree. Number of vector
2550 elements in one vector iteration is the number of scalar elements in
2551 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2553 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2555 /* In case of load permutation we have to allocate vectorized statements for
2556 all the nodes that participate in that permutation. */
2557 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2559 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2561 if (!SLP_TREE_VEC_STMTS (loads_node))
2563 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2565 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2570 if (!SLP_TREE_VEC_STMTS (node))
2572 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2573 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2576 if (vect_print_dump_info (REPORT_DETAILS))
2578 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2579 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2582 /* Loads should be inserted before the first load. */
2583 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2584 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2585 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2586 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2587 else if (is_pattern_stmt_p (stmt_info))
2588 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2590 si = gsi_for_stmt (stmt);
2592 /* Stores should be inserted just before the last store. */
2593 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2594 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2596 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2597 si = gsi_for_stmt (last_store);
2600 /* Mark the first element of the reduction chain as reduction to properly
2601 transform the node. In the analysis phase only the last element of the
2602 chain is marked as reduction. */
2603 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2604 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2606 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2607 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2610 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2615 /* Generate vector code for all SLP instances in the loop/basic block. */
2618 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2620 VEC (slp_instance, heap) *slp_instances;
2621 slp_instance instance;
2623 bool is_store = false;
2627 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2628 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2632 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2636 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2638 /* Schedule the tree of INSTANCE. */
2639 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2641 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2642 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2643 fprintf (vect_dump, "vectorizing stmts using SLP.");
2646 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2648 slp_tree root = SLP_INSTANCE_TREE (instance);
2651 gimple_stmt_iterator gsi;
2653 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2654 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2656 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2659 /* Free the attached stmt_vec_info and remove the stmt. */
2660 gsi = gsi_for_stmt (store);
2661 gsi_remove (&gsi, true);
2662 free_stmt_vec_info (store);
2670 /* Vectorize the basic block. */
2673 vect_slp_transform_bb (basic_block bb)
2675 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2676 gimple_stmt_iterator si;
2678 gcc_assert (bb_vinfo);
2680 if (vect_print_dump_info (REPORT_DETAILS))
2681 fprintf (vect_dump, "SLPing BB\n");
2683 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2685 gimple stmt = gsi_stmt (si);
2686 stmt_vec_info stmt_info;
2688 if (vect_print_dump_info (REPORT_DETAILS))
2690 fprintf (vect_dump, "------>SLPing statement: ");
2691 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2694 stmt_info = vinfo_for_stmt (stmt);
2695 gcc_assert (stmt_info);
2697 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2698 if (STMT_SLP_TYPE (stmt_info))
2700 vect_schedule_slp (NULL, bb_vinfo);
2705 mark_sym_for_renaming (gimple_vop (cfun));
2706 /* The memory tags and pointers in vectorized statements need to
2707 have their SSA forms updated. FIXME, why can't this be delayed
2708 until all the loops have been transformed? */
2709 update_ssa (TODO_update_ssa);
2711 if (vect_print_dump_info (REPORT_DETAILS))
2712 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2714 destroy_bb_vec_info (bb_vinfo);