1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb)
50 gimple_stmt_iterator si;
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node)
77 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78 vect_free_slp_tree ((slp_tree)child);
80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
89 /* Free the memory allocated for the SLP instance. */
92 vect_free_slp_instance (slp_instance instance)
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
100 /* Create an SLP node for SCALAR_STMTS. */
103 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
105 slp_tree node = XNEW (struct _slp_tree);
106 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
109 if (is_gimple_call (stmt))
110 nops = gimple_call_num_args (stmt);
111 else if (is_gimple_assign (stmt))
112 nops = gimple_num_ops (stmt) - 1;
116 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
117 SLP_TREE_VEC_STMTS (node) = NULL;
118 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
119 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
120 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
126 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
128 static VEC (slp_oprnd_info, heap) *
129 vect_create_oprnd_info (int nops, int group_size)
132 slp_oprnd_info oprnd_info;
133 VEC (slp_oprnd_info, heap) *oprnds_info;
135 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
136 for (i = 0; i < nops; i++)
138 oprnd_info = XNEW (struct _slp_oprnd_info);
139 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
140 oprnd_info->first_dt = vect_uninitialized_def;
141 oprnd_info->first_def_type = NULL_TREE;
142 oprnd_info->first_const_oprnd = NULL_TREE;
143 oprnd_info->first_pattern = false;
144 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
151 /* Free operands info. Free def-stmts in FREE_DEF_STMTS is true.
152 (FREE_DEF_STMTS is true when the SLP analysis fails, and false when it
153 succeds. In the later case we don't need the operands info that we used to
154 check isomorphism of the stmts, but we still need the def-stmts - they are
155 used as scalar stmts in SLP nodes. */
157 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info,
161 slp_oprnd_info oprnd_info;
164 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
165 VEC_free (gimple, heap, oprnd_info->def_stmts);
167 VEC_free (slp_oprnd_info, heap, *oprnds_info);
171 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
172 they are of a valid type and that they match the defs of the first stmt of
173 the SLP group (stored in OPRNDS_INFO). */
176 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
177 slp_tree slp_node, gimple stmt,
178 int ncopies_for_cost, bool first,
179 VEC (slp_oprnd_info, heap) **oprnds_info)
182 unsigned int i, number_of_oprnds;
183 tree def, def_op0 = NULL_TREE;
185 enum vect_def_type dt = vect_uninitialized_def;
186 enum vect_def_type dt_op0 = vect_uninitialized_def;
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 tree lhs = gimple_get_lhs (stmt);
189 struct loop *loop = NULL;
190 enum tree_code rhs_code;
191 bool different_types = false;
192 bool pattern = false;
193 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
196 loop = LOOP_VINFO_LOOP (loop_vinfo);
198 if (is_gimple_call (stmt))
199 number_of_oprnds = gimple_call_num_args (stmt);
201 number_of_oprnds = gimple_num_ops (stmt) - 1;
203 for (i = 0; i < number_of_oprnds; i++)
205 oprnd = gimple_op (stmt, i + 1);
206 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
208 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
210 || (!def_stmt && dt != vect_constant_def))
212 if (vect_print_dump_info (REPORT_SLP))
214 fprintf (vect_dump, "Build SLP failed: can't find def for ");
215 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
221 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
222 from the pattern. Check that all the stmts of the node are in the
224 if (loop && def_stmt && gimple_bb (def_stmt)
225 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
226 && vinfo_for_stmt (def_stmt)
227 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
228 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
229 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
232 if (!first && !oprnd_info->first_pattern)
234 if (vect_print_dump_info (REPORT_DETAILS))
236 fprintf (vect_dump, "Build SLP failed: some of the stmts"
237 " are in a pattern, and others are not ");
238 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
244 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
245 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
247 if (dt == vect_unknown_def_type
248 || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt)))
250 if (vect_print_dump_info (REPORT_DETAILS))
251 fprintf (vect_dump, "Unsupported pattern.");
255 switch (gimple_code (def_stmt))
258 def = gimple_phi_result (def_stmt);
262 def = gimple_assign_lhs (def_stmt);
266 if (vect_print_dump_info (REPORT_DETAILS))
267 fprintf (vect_dump, "unsupported defining stmt: ");
274 oprnd_info->first_dt = dt;
275 oprnd_info->first_pattern = pattern;
278 oprnd_info->first_def_type = TREE_TYPE (def);
279 oprnd_info->first_const_oprnd = NULL_TREE;
283 oprnd_info->first_def_type = NULL_TREE;
284 oprnd_info->first_const_oprnd = oprnd;
291 /* Analyze costs (for the first stmt of the group only). */
292 if (REFERENCE_CLASS_P (lhs))
294 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
297 /* Not memory operation (we don't call this function for
299 vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
305 /* Not first stmt of the group, check that the def-stmt/s match
306 the def-stmt/s of the first stmt. Allow different definition
307 types for reduction chains: the first stmt must be a
308 vect_reduction_def (a phi node), and the rest
309 vect_internal_def. */
310 if (((oprnd_info->first_dt != dt
311 && !(oprnd_info->first_dt == vect_reduction_def
312 && dt == vect_internal_def))
313 || (oprnd_info->first_def_type != NULL_TREE
315 && !types_compatible_p (oprnd_info->first_def_type,
318 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
322 if (number_of_oprnds != 2)
324 if (vect_print_dump_info (REPORT_SLP))
325 fprintf (vect_dump, "Build SLP failed: different types ");
330 /* Try to swap operands in case of binary operation. */
332 different_types = true;
335 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
336 if (is_gimple_assign (stmt)
337 && (rhs_code = gimple_assign_rhs_code (stmt))
338 && TREE_CODE_CLASS (rhs_code) == tcc_binary
339 && commutative_tree_code (rhs_code)
340 && oprnd0_info->first_dt == dt
341 && oprnd_info->first_dt == dt_op0
343 && !(oprnd0_info->first_def_type
344 && !types_compatible_p (oprnd0_info->first_def_type,
346 && !(oprnd_info->first_def_type
347 && !types_compatible_p (oprnd_info->first_def_type,
348 TREE_TYPE (def_op0))))
350 if (vect_print_dump_info (REPORT_SLP))
352 fprintf (vect_dump, "Swapping operands of ");
353 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
356 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
357 gimple_assign_rhs2_ptr (stmt));
361 if (vect_print_dump_info (REPORT_SLP))
362 fprintf (vect_dump, "Build SLP failed: different types ");
370 /* Check the types of the definitions. */
373 case vect_constant_def:
374 case vect_external_def:
375 case vect_reduction_def:
378 case vect_internal_def:
381 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
382 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
384 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
386 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
389 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
394 /* FORNOW: Not supported. */
395 if (vect_print_dump_info (REPORT_SLP))
397 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
398 print_generic_expr (vect_dump, def, TDF_SLIM);
409 /* Recursively build an SLP tree starting from NODE.
410 Fail (and return FALSE) if def-stmts are not isomorphic, require data
411 permutation or are of unsupported types of operation. Otherwise, return
415 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
416 slp_tree *node, unsigned int group_size,
417 int *inside_cost, int *outside_cost,
418 int ncopies_for_cost, unsigned int *max_nunits,
419 VEC (int, heap) **load_permutation,
420 VEC (slp_tree, heap) **loads,
421 unsigned int vectorization_factor, bool *loads_permuted)
424 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
425 gimple stmt = VEC_index (gimple, stmts, 0);
426 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
428 bool stop_recursion = false, need_same_oprnds = false;
429 tree vectype, scalar_type, first_op1 = NULL_TREE;
430 unsigned int ncopies;
433 enum machine_mode optab_op2_mode;
434 enum machine_mode vec_mode;
435 struct data_reference *first_dr;
437 bool permutation = false;
438 unsigned int load_place;
439 gimple first_load, prev_first_load = NULL;
440 VEC (slp_oprnd_info, heap) *oprnds_info;
442 slp_oprnd_info oprnd_info;
444 if (is_gimple_call (stmt))
445 nops = gimple_call_num_args (stmt);
447 nops = gimple_num_ops (stmt) - 1;
449 oprnds_info = vect_create_oprnd_info (nops, group_size);
451 /* For every stmt in NODE find its def stmt/s. */
452 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
454 if (vect_print_dump_info (REPORT_SLP))
456 fprintf (vect_dump, "Build SLP for ");
457 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
460 /* Fail to vectorize statements marked as unvectorizable. */
461 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
463 if (vect_print_dump_info (REPORT_SLP))
466 "Build SLP failed: unvectorizable statement ");
467 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
470 vect_free_oprnd_info (&oprnds_info, true);
474 lhs = gimple_get_lhs (stmt);
475 if (lhs == NULL_TREE)
477 if (vect_print_dump_info (REPORT_SLP))
480 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
481 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
484 vect_free_oprnd_info (&oprnds_info, true);
488 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
489 vectype = get_vectype_for_scalar_type (scalar_type);
492 if (vect_print_dump_info (REPORT_SLP))
494 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
495 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
498 vect_free_oprnd_info (&oprnds_info, true);
502 /* In case of multiple types we need to detect the smallest type. */
503 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
505 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
507 vectorization_factor = *max_nunits;
510 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
512 if (is_gimple_call (stmt))
513 rhs_code = CALL_EXPR;
515 rhs_code = gimple_assign_rhs_code (stmt);
517 /* Check the operation. */
520 first_stmt_code = rhs_code;
522 /* Shift arguments should be equal in all the packed stmts for a
523 vector shift with scalar shift operand. */
524 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
525 || rhs_code == LROTATE_EXPR
526 || rhs_code == RROTATE_EXPR)
528 vec_mode = TYPE_MODE (vectype);
530 /* First see if we have a vector/vector shift. */
531 optab = optab_for_tree_code (rhs_code, vectype,
535 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
537 /* No vector/vector shift, try for a vector/scalar shift. */
538 optab = optab_for_tree_code (rhs_code, vectype,
543 if (vect_print_dump_info (REPORT_SLP))
544 fprintf (vect_dump, "Build SLP failed: no optab.");
545 vect_free_oprnd_info (&oprnds_info, true);
548 icode = (int) optab_handler (optab, vec_mode);
549 if (icode == CODE_FOR_nothing)
551 if (vect_print_dump_info (REPORT_SLP))
552 fprintf (vect_dump, "Build SLP failed: "
553 "op not supported by target.");
554 vect_free_oprnd_info (&oprnds_info, true);
557 optab_op2_mode = insn_data[icode].operand[2].mode;
558 if (!VECTOR_MODE_P (optab_op2_mode))
560 need_same_oprnds = true;
561 first_op1 = gimple_assign_rhs2 (stmt);
565 else if (rhs_code == WIDEN_LSHIFT_EXPR)
567 need_same_oprnds = true;
568 first_op1 = gimple_assign_rhs2 (stmt);
573 if (first_stmt_code != rhs_code
574 && (first_stmt_code != IMAGPART_EXPR
575 || rhs_code != REALPART_EXPR)
576 && (first_stmt_code != REALPART_EXPR
577 || rhs_code != IMAGPART_EXPR)
578 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
579 && (first_stmt_code == ARRAY_REF
580 || first_stmt_code == INDIRECT_REF
581 || first_stmt_code == COMPONENT_REF
582 || first_stmt_code == MEM_REF)))
584 if (vect_print_dump_info (REPORT_SLP))
587 "Build SLP failed: different operation in stmt ");
588 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
591 vect_free_oprnd_info (&oprnds_info, true);
596 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
598 if (vect_print_dump_info (REPORT_SLP))
601 "Build SLP failed: different shift arguments in ");
602 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
605 vect_free_oprnd_info (&oprnds_info, true);
610 /* Strided store or load. */
611 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
613 if (REFERENCE_CLASS_P (lhs))
616 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
617 stmt, ncopies_for_cost,
618 (i == 0), &oprnds_info))
620 vect_free_oprnd_info (&oprnds_info, true);
627 /* FORNOW: Check that there is no gap between the loads. */
628 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
629 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
630 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
631 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
633 if (vect_print_dump_info (REPORT_SLP))
635 fprintf (vect_dump, "Build SLP failed: strided "
637 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
640 vect_free_oprnd_info (&oprnds_info, true);
644 /* Check that the size of interleaved loads group is not
645 greater than the SLP group size. */
647 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
649 if (vect_print_dump_info (REPORT_SLP))
651 fprintf (vect_dump, "Build SLP failed: the number of "
652 "interleaved loads is greater than"
653 " the SLP group size ");
654 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
657 vect_free_oprnd_info (&oprnds_info, true);
661 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
664 /* Check that there are no loads from different interleaving
665 chains in the same node. The only exception is complex
667 if (prev_first_load != first_load
668 && rhs_code != REALPART_EXPR
669 && rhs_code != IMAGPART_EXPR)
671 if (vect_print_dump_info (REPORT_SLP))
673 fprintf (vect_dump, "Build SLP failed: different "
674 "interleaving chains in one node ");
675 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
678 vect_free_oprnd_info (&oprnds_info, true);
683 prev_first_load = first_load;
685 if (first_load == stmt)
687 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
688 if (vect_supportable_dr_alignment (first_dr, false)
689 == dr_unaligned_unsupported)
691 if (vect_print_dump_info (REPORT_SLP))
693 fprintf (vect_dump, "Build SLP failed: unsupported "
695 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
698 vect_free_oprnd_info (&oprnds_info, true);
702 /* Analyze costs (for the first stmt in the group). */
703 vect_model_load_cost (vinfo_for_stmt (stmt),
704 ncopies_for_cost, false, *node);
707 /* Store the place of this load in the interleaving chain. In
708 case that permutation is needed we later decide if a specific
709 permutation is supported. */
710 load_place = vect_get_place_in_interleaving_chain (stmt,
715 VEC_safe_push (int, heap, *load_permutation, load_place);
717 /* We stop the tree when we reach a group of loads. */
718 stop_recursion = true;
721 } /* Strided access. */
724 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
726 /* Not strided load. */
727 if (vect_print_dump_info (REPORT_SLP))
729 fprintf (vect_dump, "Build SLP failed: not strided load ");
730 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
733 /* FORNOW: Not strided loads are not supported. */
734 vect_free_oprnd_info (&oprnds_info, true);
738 /* Not memory operation. */
739 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
740 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
742 if (vect_print_dump_info (REPORT_SLP))
744 fprintf (vect_dump, "Build SLP failed: operation");
745 fprintf (vect_dump, " unsupported ");
746 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
749 vect_free_oprnd_info (&oprnds_info, true);
753 /* Find the def-stmts. */
754 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
755 ncopies_for_cost, (i == 0),
758 vect_free_oprnd_info (&oprnds_info, true);
764 /* Add the costs of the node to the overall instance costs. */
765 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
766 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
768 /* Strided loads were reached - stop the recursion. */
771 VEC_safe_push (slp_tree, heap, *loads, *node);
775 *loads_permuted = true;
777 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
782 /* We don't check here complex numbers chains, so we set
783 LOADS_PERMUTED for further check in
784 vect_supported_load_permutation_p. */
785 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
786 *loads_permuted = true;
792 /* Create SLP_TREE nodes for the definition node/s. */
793 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
797 if (oprnd_info->first_dt != vect_internal_def)
800 child = vect_create_new_slp_node (oprnd_info->def_stmts);
802 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
803 inside_cost, outside_cost, ncopies_for_cost,
804 max_nunits, load_permutation, loads,
805 vectorization_factor, loads_permuted))
808 vect_free_oprnd_info (&oprnds_info, true);
812 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
815 vect_free_oprnd_info (&oprnds_info, false);
821 vect_print_slp_tree (slp_tree node)
830 fprintf (vect_dump, "node ");
831 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
833 fprintf (vect_dump, "\n\tstmt %d ", i);
834 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
836 fprintf (vect_dump, "\n");
838 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
839 vect_print_slp_tree ((slp_tree) child);
843 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
844 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
845 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
846 stmts in NODE are to be marked. */
849 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
858 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
860 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
862 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
863 vect_mark_slp_stmts ((slp_tree) child, mark, j);
867 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
870 vect_mark_slp_stmts_relevant (slp_tree node)
874 stmt_vec_info stmt_info;
880 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
882 stmt_info = vinfo_for_stmt (stmt);
883 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
884 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
885 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
888 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
889 vect_mark_slp_stmts_relevant ((slp_tree) child);
893 /* Check if the permutation required by the SLP INSTANCE is supported.
894 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
897 vect_supported_slp_permutation_p (slp_instance instance)
899 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
900 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
901 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
902 VEC (slp_tree, heap) *sorted_loads = NULL;
904 slp_tree *tmp_loads = NULL;
905 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
908 /* FORNOW: The only supported loads permutation is loads from the same
909 location in all the loads in the node, when the data-refs in
910 nodes of LOADS constitute an interleaving chain.
911 Sort the nodes according to the order of accesses in the chain. */
912 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
914 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
915 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
916 i += group_size, j++)
918 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
919 /* Check that the loads are all in the same interleaving chain. */
920 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
922 if (vect_print_dump_info (REPORT_DETAILS))
924 fprintf (vect_dump, "Build SLP failed: unsupported data "
926 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
933 tmp_loads[index] = load;
936 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
937 for (i = 0; i < group_size; i++)
938 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
940 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
941 SLP_INSTANCE_LOADS (instance) = sorted_loads;
944 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
945 SLP_INSTANCE_UNROLLING_FACTOR (instance),
953 /* Rearrange the statements of NODE according to PERMUTATION. */
956 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
957 VEC (int, heap) *permutation)
960 VEC (gimple, heap) *tmp_stmts;
961 unsigned int index, i;
967 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
968 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
970 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
971 tmp_stmts = VEC_alloc (gimple, heap, group_size);
973 for (i = 0; i < group_size; i++)
974 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
976 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
978 index = VEC_index (int, permutation, i);
979 VEC_replace (gimple, tmp_stmts, index, stmt);
982 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
983 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
987 /* Check if the required load permutation is supported.
988 LOAD_PERMUTATION contains a list of indices of the loads.
989 In SLP this permutation is relative to the order of strided stores that are
990 the base of the SLP instance. */
993 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
994 VEC (int, heap) *load_permutation)
996 int i = 0, j, prev = -1, next, k, number_of_groups;
997 bool supported, bad_permutation = false;
999 slp_tree node, other_complex_node;
1000 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1001 unsigned complex_numbers = 0;
1002 struct data_reference *dr;
1003 bb_vec_info bb_vinfo;
1005 /* FORNOW: permutations are only supported in SLP. */
1009 if (vect_print_dump_info (REPORT_SLP))
1011 fprintf (vect_dump, "Load permutation ");
1012 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1013 fprintf (vect_dump, "%d ", next);
1016 /* In case of reduction every load permutation is allowed, since the order
1017 of the reduction statements is not important (as opposed to the case of
1018 strided stores). The only condition we need to check is that all the
1019 load nodes are of the same size and have the same permutation (and then
1020 rearrange all the nodes of the SLP instance according to this
1023 /* Check that all the load nodes are of the same size. */
1024 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1026 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1027 != (unsigned) group_size)
1030 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1031 if (is_gimple_assign (stmt)
1032 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1033 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1037 /* Complex operands can be swapped as following:
1038 real_c = real_b + real_a;
1039 imag_c = imag_a + imag_b;
1040 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1041 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1042 chains are mixed, they match the above pattern. */
1043 if (complex_numbers)
1045 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1047 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1053 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1055 if (complex_numbers != 2)
1063 other_complex_node = VEC_index (slp_tree,
1064 SLP_INSTANCE_LOADS (slp_instn), k);
1065 other_node_first = VEC_index (gimple,
1066 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1068 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1069 != other_node_first)
1077 /* We checked that this case ok, so there is no need to proceed with
1078 permutation tests. */
1079 if (complex_numbers == 2)
1081 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1082 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1086 node = SLP_INSTANCE_TREE (slp_instn);
1087 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1088 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1089 instance, not all the loads belong to the same node or interleaving
1090 group. Hence, we need to divide them into groups according to
1092 number_of_groups = VEC_length (int, load_permutation) / group_size;
1094 /* Reduction (there are no data-refs in the root).
1095 In reduction chain the order of the loads is important. */
1096 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1097 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1099 int first_group_load_index;
1101 /* Compare all the permutation sequences to the first one. */
1102 for (i = 1; i < number_of_groups; i++)
1105 for (j = i * group_size; j < i * group_size + group_size; j++)
1107 next = VEC_index (int, load_permutation, j);
1108 first_group_load_index = VEC_index (int, load_permutation, k);
1110 if (next != first_group_load_index)
1112 bad_permutation = true;
1119 if (bad_permutation)
1123 if (!bad_permutation)
1125 /* Check that the loads in the first sequence are different and there
1126 are no gaps between them. */
1127 load_index = sbitmap_alloc (group_size);
1128 sbitmap_zero (load_index);
1129 for (k = 0; k < group_size; k++)
1131 first_group_load_index = VEC_index (int, load_permutation, k);
1132 if (TEST_BIT (load_index, first_group_load_index))
1134 bad_permutation = true;
1138 SET_BIT (load_index, first_group_load_index);
1141 if (!bad_permutation)
1142 for (k = 0; k < group_size; k++)
1143 if (!TEST_BIT (load_index, k))
1145 bad_permutation = true;
1149 sbitmap_free (load_index);
1152 if (!bad_permutation)
1154 /* This permutation is valid for reduction. Since the order of the
1155 statements in the nodes is not important unless they are memory
1156 accesses, we can rearrange the statements in all the nodes
1157 according to the order of the loads. */
1158 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1160 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1165 /* In basic block vectorization we allow any subchain of an interleaving
1167 FORNOW: not supported in loop SLP because of realignment compications. */
1168 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1169 bad_permutation = false;
1170 /* Check that for every node in the instance teh loads form a subchain. */
1173 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1177 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1180 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1182 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1184 bad_permutation = true;
1188 if (j != 0 && next_load != load)
1190 bad_permutation = true;
1194 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1197 if (bad_permutation)
1201 /* Check that the alignment of the first load in every subchain, i.e.,
1202 the first statement in every load node, is supported. */
1203 if (!bad_permutation)
1205 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1207 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1209 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1211 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1212 if (vect_supportable_dr_alignment (dr, false)
1213 == dr_unaligned_unsupported)
1215 if (vect_print_dump_info (REPORT_SLP))
1217 fprintf (vect_dump, "unsupported unaligned load ");
1218 print_gimple_stmt (vect_dump, first_load, 0,
1221 bad_permutation = true;
1227 if (!bad_permutation)
1229 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1235 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1236 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1237 well (unless it's reduction). */
1238 if (VEC_length (int, load_permutation)
1239 != (unsigned int) (group_size * group_size))
1243 load_index = sbitmap_alloc (group_size);
1244 sbitmap_zero (load_index);
1245 for (j = 0; j < group_size; j++)
1247 for (i = j * group_size, k = 0;
1248 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1251 if (i != j * group_size && next != prev)
1260 if (TEST_BIT (load_index, prev))
1266 SET_BIT (load_index, prev);
1269 for (j = 0; j < group_size; j++)
1270 if (!TEST_BIT (load_index, j))
1273 sbitmap_free (load_index);
1275 if (supported && i == group_size * group_size
1276 && vect_supported_slp_permutation_p (slp_instn))
1283 /* Find the first load in the loop that belongs to INSTANCE.
1284 When loads are in several SLP nodes, there can be a case in which the first
1285 load does not appear in the first SLP node to be transformed, causing
1286 incorrect order of statements. Since we generate all the loads together,
1287 they must be inserted before the first load of the SLP instance and not
1288 before the first load of the first node of the instance. */
1291 vect_find_first_load_in_slp_instance (slp_instance instance)
1295 gimple first_load = NULL, load;
1297 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1298 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1299 first_load = get_earlier_stmt (load, first_load);
1305 /* Find the last store in SLP INSTANCE. */
1308 vect_find_last_store_in_slp_instance (slp_instance instance)
1312 gimple last_store = NULL, store;
1314 node = SLP_INSTANCE_TREE (instance);
1316 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1318 last_store = get_later_stmt (store, last_store);
1324 /* Analyze an SLP instance starting from a group of strided stores. Call
1325 vect_build_slp_tree to build a tree of packed stmts if possible.
1326 Return FALSE if it's impossible to SLP any stmt in the loop. */
1329 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1332 slp_instance new_instance;
1334 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1335 unsigned int unrolling_factor = 1, nunits;
1336 tree vectype, scalar_type = NULL_TREE;
1338 unsigned int vectorization_factor = 0;
1339 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1340 unsigned int max_nunits = 0;
1341 VEC (int, heap) *load_permutation;
1342 VEC (slp_tree, heap) *loads;
1343 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1344 bool loads_permuted = false;
1345 VEC (gimple, heap) *scalar_stmts;
1347 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1351 scalar_type = TREE_TYPE (DR_REF (dr));
1352 vectype = get_vectype_for_scalar_type (scalar_type);
1356 gcc_assert (loop_vinfo);
1357 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1360 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1364 gcc_assert (loop_vinfo);
1365 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1366 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1371 if (vect_print_dump_info (REPORT_SLP))
1373 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1374 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1380 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1382 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1384 vectorization_factor = nunits;
1386 /* Calculate the unrolling factor. */
1387 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1388 if (unrolling_factor != 1 && !loop_vinfo)
1390 if (vect_print_dump_info (REPORT_SLP))
1391 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1397 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1398 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1400 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1402 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1405 VEC_safe_push (gimple, heap, scalar_stmts, next);
1406 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1411 /* Collect reduction statements. */
1412 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1413 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1414 VEC_safe_push (gimple, heap, scalar_stmts, next);
1417 node = vect_create_new_slp_node (scalar_stmts);
1419 /* Calculate the number of vector stmts to create based on the unrolling
1420 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1421 GROUP_SIZE / NUNITS otherwise. */
1422 ncopies_for_cost = unrolling_factor * group_size / nunits;
1424 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1425 loads = VEC_alloc (slp_tree, heap, group_size);
1427 /* Build the tree for the SLP instance. */
1428 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1429 &inside_cost, &outside_cost, ncopies_for_cost,
1430 &max_nunits, &load_permutation, &loads,
1431 vectorization_factor, &loads_permuted))
1433 /* Calculate the unrolling factor based on the smallest type. */
1434 if (max_nunits > nunits)
1435 unrolling_factor = least_common_multiple (max_nunits, group_size)
1438 if (unrolling_factor != 1 && !loop_vinfo)
1440 if (vect_print_dump_info (REPORT_SLP))
1441 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1446 /* Create a new SLP instance. */
1447 new_instance = XNEW (struct _slp_instance);
1448 SLP_INSTANCE_TREE (new_instance) = node;
1449 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1450 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1451 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1452 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1453 SLP_INSTANCE_LOADS (new_instance) = loads;
1454 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1455 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1459 if (!vect_supported_load_permutation_p (new_instance, group_size,
1462 if (vect_print_dump_info (REPORT_SLP))
1464 fprintf (vect_dump, "Build SLP failed: unsupported load "
1466 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1469 vect_free_slp_instance (new_instance);
1473 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1474 = vect_find_first_load_in_slp_instance (new_instance);
1477 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1480 VEC_safe_push (slp_instance, heap,
1481 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1484 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1487 if (vect_print_dump_info (REPORT_SLP))
1488 vect_print_slp_tree (node);
1493 /* Failed to SLP. */
1494 /* Free the allocated memory. */
1495 vect_free_slp_tree (node);
1496 VEC_free (int, heap, load_permutation);
1497 VEC_free (slp_tree, heap, loads);
1503 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1504 trees of packed scalar stmts if SLP is possible. */
1507 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1510 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1511 gimple first_element;
1514 if (vect_print_dump_info (REPORT_SLP))
1515 fprintf (vect_dump, "=== vect_analyze_slp ===");
1519 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1520 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1521 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1524 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1526 /* Find SLP sequences starting from groups of strided stores. */
1527 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1528 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1531 if (bb_vinfo && !ok)
1533 if (vect_print_dump_info (REPORT_SLP))
1534 fprintf (vect_dump, "Failed to SLP the basic block.");
1540 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1542 /* Find SLP sequences starting from reduction chains. */
1543 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1544 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1549 /* Don't try to vectorize SLP reductions if reduction chain was
1554 /* Find SLP sequences starting from groups of reductions. */
1555 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1556 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1557 VEC_index (gimple, reductions, 0)))
1564 /* For each possible SLP instance decide whether to SLP it and calculate overall
1565 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1566 least one instance. */
1569 vect_make_slp_decision (loop_vec_info loop_vinfo)
1571 unsigned int i, unrolling_factor = 1;
1572 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1573 slp_instance instance;
1574 int decided_to_slp = 0;
1576 if (vect_print_dump_info (REPORT_SLP))
1577 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1579 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1581 /* FORNOW: SLP if you can. */
1582 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1583 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1585 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1586 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1587 loop-based vectorization. Such stmts will be marked as HYBRID. */
1588 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1592 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1594 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1595 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1596 decided_to_slp, unrolling_factor);
1598 return (decided_to_slp > 0);
1602 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1603 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1606 vect_detect_hybrid_slp_stmts (slp_tree node)
1610 imm_use_iterator imm_iter;
1612 stmt_vec_info stmt_vinfo;
1618 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1619 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1620 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1621 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1622 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1623 && !STMT_SLP_TYPE (stmt_vinfo)
1624 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1625 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1626 && !(gimple_code (use_stmt) == GIMPLE_PHI
1627 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1628 == vect_reduction_def))
1629 vect_mark_slp_stmts (node, hybrid, i);
1631 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1632 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1636 /* Find stmts that must be both vectorized and SLPed. */
1639 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1642 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1643 slp_instance instance;
1645 if (vect_print_dump_info (REPORT_SLP))
1646 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1648 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1649 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1653 /* Create and initialize a new bb_vec_info struct for BB, as well as
1654 stmt_vec_info structs for all the stmts in it. */
1657 new_bb_vec_info (basic_block bb)
1659 bb_vec_info res = NULL;
1660 gimple_stmt_iterator gsi;
1662 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1663 BB_VINFO_BB (res) = bb;
1665 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1667 gimple stmt = gsi_stmt (gsi);
1668 gimple_set_uid (stmt, 0);
1669 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1672 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1673 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1680 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1681 stmts in the basic block. */
1684 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1687 gimple_stmt_iterator si;
1692 bb = BB_VINFO_BB (bb_vinfo);
1694 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1696 gimple stmt = gsi_stmt (si);
1697 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1700 /* Free stmt_vec_info. */
1701 free_stmt_vec_info (stmt);
1704 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1705 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1706 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1707 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1713 /* Analyze statements contained in SLP tree node after recursively analyzing
1714 the subtree. Return TRUE if the operations are supported. */
1717 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1727 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1728 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1731 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1734 gcc_assert (stmt_info);
1735 gcc_assert (PURE_SLP_STMT (stmt_info));
1737 if (!vect_analyze_stmt (stmt, &dummy, node))
1745 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1746 operations are supported. */
1749 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1751 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1752 slp_instance instance;
1755 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1757 if (!vect_slp_analyze_node_operations (bb_vinfo,
1758 SLP_INSTANCE_TREE (instance)))
1760 vect_free_slp_instance (instance);
1761 VEC_ordered_remove (slp_instance, slp_instances, i);
1767 if (!VEC_length (slp_instance, slp_instances))
1773 /* Check if vectorization of the basic block is profitable. */
1776 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1778 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1779 slp_instance instance;
1781 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1782 unsigned int stmt_cost;
1784 gimple_stmt_iterator si;
1785 basic_block bb = BB_VINFO_BB (bb_vinfo);
1786 stmt_vec_info stmt_info = NULL;
1787 tree dummy_type = NULL;
1790 /* Calculate vector costs. */
1791 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1793 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1794 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1797 /* Calculate scalar cost. */
1798 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1800 stmt = gsi_stmt (si);
1801 stmt_info = vinfo_for_stmt (stmt);
1803 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1804 || !PURE_SLP_STMT (stmt_info))
1807 if (STMT_VINFO_DATA_REF (stmt_info))
1809 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1810 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1811 (scalar_load, dummy_type, dummy);
1813 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1814 (scalar_store, dummy_type, dummy);
1817 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1818 (scalar_stmt, dummy_type, dummy);
1820 scalar_cost += stmt_cost;
1823 if (vect_print_dump_info (REPORT_COST))
1825 fprintf (vect_dump, "Cost model analysis: \n");
1826 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1828 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1830 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1833 /* Vectorization is profitable if its cost is less than the cost of scalar
1835 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1841 /* Check if the basic block can be vectorized. */
1844 vect_slp_analyze_bb_1 (basic_block bb)
1846 bb_vec_info bb_vinfo;
1847 VEC (ddr_p, heap) *ddrs;
1848 VEC (slp_instance, heap) *slp_instances;
1849 slp_instance instance;
1852 int max_vf = MAX_VECTORIZATION_FACTOR;
1854 bb_vinfo = new_bb_vec_info (bb);
1858 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1860 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1861 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1864 destroy_bb_vec_info (bb_vinfo);
1868 ddrs = BB_VINFO_DDRS (bb_vinfo);
1869 if (!VEC_length (ddr_p, ddrs))
1871 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1872 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1875 destroy_bb_vec_info (bb_vinfo);
1879 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1882 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1883 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1884 "in basic block.\n");
1886 destroy_bb_vec_info (bb_vinfo);
1890 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1892 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1893 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1896 destroy_bb_vec_info (bb_vinfo);
1900 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1902 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1903 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1906 destroy_bb_vec_info (bb_vinfo);
1910 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1912 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1913 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1916 destroy_bb_vec_info (bb_vinfo);
1920 /* Check the SLP opportunities in the basic block, analyze and build SLP
1922 if (!vect_analyze_slp (NULL, bb_vinfo))
1924 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1925 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1926 "in basic block.\n");
1928 destroy_bb_vec_info (bb_vinfo);
1932 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1934 /* Mark all the statements that we want to vectorize as pure SLP and
1936 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1938 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1939 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1942 if (!vect_slp_analyze_operations (bb_vinfo))
1944 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1945 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1947 destroy_bb_vec_info (bb_vinfo);
1951 /* Cost model: check if the vectorization is worthwhile. */
1952 if (flag_vect_cost_model
1953 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1955 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1956 fprintf (vect_dump, "not vectorized: vectorization is not "
1959 destroy_bb_vec_info (bb_vinfo);
1963 if (vect_print_dump_info (REPORT_DETAILS))
1964 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1971 vect_slp_analyze_bb (basic_block bb)
1973 bb_vec_info bb_vinfo;
1975 gimple_stmt_iterator gsi;
1976 unsigned int vector_sizes;
1978 if (vect_print_dump_info (REPORT_DETAILS))
1979 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1981 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1983 gimple stmt = gsi_stmt (gsi);
1984 if (!is_gimple_debug (stmt)
1985 && !gimple_nop_p (stmt)
1986 && gimple_code (stmt) != GIMPLE_LABEL)
1990 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1992 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1993 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1999 /* Autodetect first vector size we try. */
2000 current_vector_size = 0;
2001 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2005 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2009 destroy_bb_vec_info (bb_vinfo);
2011 vector_sizes &= ~current_vector_size;
2012 if (vector_sizes == 0
2013 || current_vector_size == 0)
2016 /* Try the next biggest vector size. */
2017 current_vector_size = 1 << floor_log2 (vector_sizes);
2018 if (vect_print_dump_info (REPORT_DETAILS))
2019 fprintf (vect_dump, "***** Re-trying analysis with "
2020 "vector size %d\n", current_vector_size);
2025 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2026 the number of created vector stmts depends on the unrolling factor).
2027 However, the actual number of vector stmts for every SLP node depends on
2028 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2029 should be updated. In this function we assume that the inside costs
2030 calculated in vect_model_xxx_cost are linear in ncopies. */
2033 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2035 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2036 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2037 slp_instance instance;
2039 if (vect_print_dump_info (REPORT_SLP))
2040 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2042 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2043 /* We assume that costs are linear in ncopies. */
2044 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2045 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2049 /* For constant and loop invariant defs of SLP_NODE this function returns
2050 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2051 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2052 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2053 REDUC_INDEX is the index of the reduction operand in the statements, unless
2057 vect_get_constant_vectors (tree op, slp_tree slp_node,
2058 VEC (tree, heap) **vec_oprnds,
2059 unsigned int op_num, unsigned int number_of_vectors,
2062 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2063 gimple stmt = VEC_index (gimple, stmts, 0);
2064 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2068 int j, number_of_places_left_in_vector;
2071 int group_size = VEC_length (gimple, stmts);
2072 unsigned int vec_num, i;
2073 int number_of_copies = 1;
2074 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2075 bool constant_p, is_store;
2076 tree neutral_op = NULL;
2077 enum tree_code code = gimple_assign_rhs_code (stmt);
2081 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2082 && reduc_index != -1)
2084 op_num = reduc_index - 1;
2085 op = gimple_op (stmt, reduc_index);
2086 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2087 we need either neutral operands or the original operands. See
2088 get_initial_def_for_reduction() for details. */
2091 case WIDEN_SUM_EXPR:
2097 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2098 neutral_op = build_real (TREE_TYPE (op), dconst0);
2100 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2105 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2106 neutral_op = build_real (TREE_TYPE (op), dconst1);
2108 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2113 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2118 def_stmt = SSA_NAME_DEF_STMT (op);
2119 loop = (gimple_bb (stmt))->loop_father;
2120 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2121 loop_preheader_edge (loop));
2129 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2132 op = gimple_assign_rhs1 (stmt);
2139 if (CONSTANT_CLASS_P (op))
2144 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2145 gcc_assert (vector_type);
2146 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2148 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2149 created vectors. It is greater than 1 if unrolling is performed.
2151 For example, we have two scalar operands, s1 and s2 (e.g., group of
2152 strided accesses of size two), while NUNITS is four (i.e., four scalars
2153 of this type can be packed in a vector). The output vector will contain
2154 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2157 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2158 containing the operands.
2160 For example, NUNITS is four as before, and the group size is 8
2161 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2162 {s5, s6, s7, s8}. */
2164 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2166 number_of_places_left_in_vector = nunits;
2167 for (j = 0; j < number_of_copies; j++)
2169 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2172 op = gimple_assign_rhs1 (stmt);
2174 op = gimple_op (stmt, op_num + 1);
2176 if (reduc_index != -1)
2178 loop = (gimple_bb (stmt))->loop_father;
2179 def_stmt = SSA_NAME_DEF_STMT (op);
2183 /* Get the def before the loop. In reduction chain we have only
2184 one initial value. */
2185 if ((j != (number_of_copies - 1)
2186 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2191 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2192 loop_preheader_edge (loop));
2195 /* Create 'vect_ = {op0,op1,...,opn}'. */
2196 t = tree_cons (NULL_TREE, op, t);
2198 number_of_places_left_in_vector--;
2200 if (number_of_places_left_in_vector == 0)
2202 number_of_places_left_in_vector = nunits;
2205 vec_cst = build_vector (vector_type, t);
2207 vec_cst = build_constructor_from_list (vector_type, t);
2208 VEC_quick_push (tree, voprnds,
2209 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2215 /* Since the vectors are created in the reverse order, we should invert
2217 vec_num = VEC_length (tree, voprnds);
2218 for (j = vec_num - 1; j >= 0; j--)
2220 vop = VEC_index (tree, voprnds, j);
2221 VEC_quick_push (tree, *vec_oprnds, vop);
2224 VEC_free (tree, heap, voprnds);
2226 /* In case that VF is greater than the unrolling factor needed for the SLP
2227 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2228 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2229 to replicate the vectors. */
2230 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2232 tree neutral_vec = NULL;
2237 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2239 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2243 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2244 VEC_quick_push (tree, *vec_oprnds, vop);
2250 /* Get vectorized definitions from SLP_NODE that contains corresponding
2251 vectorized def-stmts. */
2254 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2257 gimple vec_def_stmt;
2260 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2262 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2264 gcc_assert (vec_def_stmt);
2265 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2266 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2271 /* Get vectorized definitions for SLP_NODE.
2272 If the scalar definitions are loop invariants or constants, collect them and
2273 call vect_get_constant_vectors() to create vector stmts.
2274 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2275 must be stored in the corresponding child of SLP_NODE, and we call
2276 vect_get_slp_vect_defs () to retrieve them. */
2279 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2280 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2282 gimple first_stmt, first_def;
2283 int number_of_vects = 0, i;
2284 unsigned int child_index = 0;
2285 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2286 slp_tree child = NULL;
2287 VEC (tree, heap) *vec_defs;
2288 tree oprnd, def_lhs;
2289 bool vectorized_defs;
2291 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2292 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2294 /* For each operand we check if it has vectorized definitions in a child
2295 node or we need to create them (for invariants and constants). We
2296 check if the LHS of the first stmt of the next child matches OPRND.
2297 If it does, we found the correct child. Otherwise, we call
2298 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2299 to check this child node for the next operand. */
2300 vectorized_defs = false;
2301 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2303 child = (slp_tree) VEC_index (slp_void_p,
2304 SLP_TREE_CHILDREN (slp_node),
2306 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2308 /* In the end of a pattern sequence we have a use of the original stmt,
2309 so we need to compare OPRND with the original def. */
2310 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2311 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2312 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2313 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2315 if (is_gimple_call (first_def))
2316 def_lhs = gimple_call_lhs (first_def);
2318 def_lhs = gimple_assign_lhs (first_def);
2320 if (operand_equal_p (oprnd, def_lhs, 0))
2322 /* The number of vector defs is determined by the number of
2323 vector statements in the node from which we get those
2325 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2326 vectorized_defs = true;
2331 if (!vectorized_defs)
2335 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2336 /* Number of vector stmts was calculated according to LHS in
2337 vect_schedule_slp_instance (), fix it by replacing LHS with
2338 RHS, if necessary. See vect_get_smallest_scalar_type () for
2340 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2342 if (rhs_size_unit != lhs_size_unit)
2344 number_of_vects *= rhs_size_unit;
2345 number_of_vects /= lhs_size_unit;
2350 /* Allocate memory for vectorized defs. */
2351 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2353 /* For reduction defs we call vect_get_constant_vectors (), since we are
2354 looking for initial loop invariant values. */
2355 if (vectorized_defs && reduc_index == -1)
2356 /* The defs are already vectorized. */
2357 vect_get_slp_vect_defs (child, &vec_defs);
2359 /* Build vectors from scalar defs. */
2360 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2361 number_of_vects, reduc_index);
2363 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2365 /* For reductions, we only need initial values. */
2366 if (reduc_index != -1)
2372 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2373 building a vector of type MASK_TYPE from it) and two input vectors placed in
2374 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2375 shifting by STRIDE elements of DR_CHAIN for every copy.
2376 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2378 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2379 the created stmts must be inserted. */
2382 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2383 tree mask, int first_vec_indx, int second_vec_indx,
2384 gimple_stmt_iterator *gsi, slp_tree node,
2385 tree vectype, VEC(tree,heap) *dr_chain,
2386 int ncopies, int vect_stmts_counter)
2389 gimple perm_stmt = NULL;
2390 stmt_vec_info next_stmt_info;
2392 tree first_vec, second_vec, data_ref;
2394 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2396 /* Initialize the vect stmts of NODE to properly insert the generated
2398 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2399 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2400 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2402 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2403 for (i = 0; i < ncopies; i++)
2405 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2406 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2408 /* Generate the permute statement. */
2409 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2410 first_vec, second_vec, mask);
2411 data_ref = make_ssa_name (perm_dest, perm_stmt);
2412 gimple_set_lhs (perm_stmt, data_ref);
2413 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2415 /* Store the vector statement in NODE. */
2416 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2417 stride * i + vect_stmts_counter, perm_stmt);
2419 first_vec_indx += stride;
2420 second_vec_indx += stride;
2423 /* Mark the scalar stmt as vectorized. */
2424 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2425 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2429 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2430 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2431 representation. Check that the mask is valid and return FALSE if not.
2432 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2433 the next vector, i.e., the current first vector is not needed. */
2436 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2437 int mask_nunits, bool only_one_vec, int index,
2438 unsigned char *mask, int *current_mask_element,
2439 bool *need_next_vector, int *number_of_mask_fixes,
2440 bool *mask_fixed, bool *needs_first_vector)
2444 /* Convert to target specific representation. */
2445 *current_mask_element = first_mask_element + m;
2446 /* Adjust the value in case it's a mask for second and third vectors. */
2447 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2449 if (*current_mask_element < mask_nunits)
2450 *needs_first_vector = true;
2452 /* We have only one input vector to permute but the mask accesses values in
2453 the next vector as well. */
2454 if (only_one_vec && *current_mask_element >= mask_nunits)
2456 if (vect_print_dump_info (REPORT_DETAILS))
2458 fprintf (vect_dump, "permutation requires at least two vectors ");
2459 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2465 /* The mask requires the next vector. */
2466 if (*current_mask_element >= mask_nunits * 2)
2468 if (*needs_first_vector || *mask_fixed)
2470 /* We either need the first vector too or have already moved to the
2471 next vector. In both cases, this permutation needs three
2473 if (vect_print_dump_info (REPORT_DETAILS))
2475 fprintf (vect_dump, "permutation requires at "
2476 "least three vectors ");
2477 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2483 /* We move to the next vector, dropping the first one and working with
2484 the second and the third - we need to adjust the values of the mask
2486 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2488 for (i = 0; i < index; i++)
2489 mask[i] -= mask_nunits * *number_of_mask_fixes;
2491 (*number_of_mask_fixes)++;
2495 *need_next_vector = *mask_fixed;
2497 /* This was the last element of this mask. Start a new one. */
2498 if (index == mask_nunits - 1)
2500 *number_of_mask_fixes = 1;
2501 *mask_fixed = false;
2502 *needs_first_vector = false;
2509 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2510 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2511 permute statements for SLP_NODE_INSTANCE. */
2513 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2514 gimple_stmt_iterator *gsi, int vf,
2515 slp_instance slp_node_instance, bool analyze_only)
2517 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2518 tree mask_element_type = NULL_TREE, mask_type;
2519 int i, j, k, nunits, vec_index = 0, scalar_index;
2521 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2522 gimple next_scalar_stmt;
2523 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2524 int first_mask_element;
2525 int index, unroll_factor, current_mask_element, ncopies;
2526 unsigned char *mask;
2527 bool only_one_vec = false, need_next_vector = false;
2528 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2529 int number_of_mask_fixes = 1;
2530 bool mask_fixed = false;
2531 bool needs_first_vector = false;
2532 enum machine_mode mode;
2534 mode = TYPE_MODE (vectype);
2536 if (!can_vec_perm_p (mode, false, NULL))
2538 if (vect_print_dump_info (REPORT_DETAILS))
2540 fprintf (vect_dump, "no vect permute for ");
2541 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2546 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2547 same size as the vector element being permuted. */
2549 = lang_hooks.types.type_for_size
2550 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2551 mask_type = get_vectype_for_scalar_type (mask_element_type);
2552 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2553 mask = XALLOCAVEC (unsigned char, nunits);
2554 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2556 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2557 unrolling factor. */
2558 orig_vec_stmts_num = group_size *
2559 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2560 if (orig_vec_stmts_num == 1)
2561 only_one_vec = true;
2563 /* Number of copies is determined by the final vectorization factor
2564 relatively to SLP_NODE_INSTANCE unrolling factor. */
2565 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2567 /* Generate permutation masks for every NODE. Number of masks for each NODE
2568 is equal to GROUP_SIZE.
2569 E.g., we have a group of three nodes with three loads from the same
2570 location in each node, and the vector size is 4. I.e., we have a
2571 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2572 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2573 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2576 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2577 The last mask is illegal since we assume two operands for permute
2578 operation, and the mask element values can't be outside that range.
2579 Hence, the last mask must be converted into {2,5,5,5}.
2580 For the first two permutations we need the first and the second input
2581 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2582 we need the second and the third vectors: {b1,c1,a2,b2} and
2585 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2589 vect_stmts_counter = 0;
2591 first_vec_index = vec_index++;
2593 second_vec_index = first_vec_index;
2595 second_vec_index = vec_index++;
2597 for (j = 0; j < unroll_factor; j++)
2599 for (k = 0; k < group_size; k++)
2601 first_mask_element = i + j * group_size;
2602 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2603 nunits, only_one_vec, index,
2604 mask, ¤t_mask_element,
2606 &number_of_mask_fixes, &mask_fixed,
2607 &needs_first_vector))
2609 mask[index++] = current_mask_element;
2611 if (index == nunits)
2613 tree mask_vec = NULL;
2615 if (!can_vec_perm_p (mode, false, mask))
2617 if (vect_print_dump_info (REPORT_DETAILS))
2619 fprintf (vect_dump, "unsupported vect permute { ");
2620 for (i = 0; i < nunits; ++i)
2621 fprintf (vect_dump, "%d ", mask[i]);
2622 fprintf (vect_dump, "}\n");
2627 while (--index >= 0)
2629 tree t = build_int_cst (mask_element_type, mask[index]);
2630 mask_vec = tree_cons (NULL, t, mask_vec);
2632 mask_vec = build_vector (mask_type, mask_vec);
2637 if (need_next_vector)
2639 first_vec_index = second_vec_index;
2640 second_vec_index = vec_index;
2643 next_scalar_stmt = VEC_index (gimple,
2644 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2646 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2647 mask_vec, first_vec_index, second_vec_index,
2648 gsi, node, vectype, dr_chain,
2649 ncopies, vect_stmts_counter++);
2661 /* Vectorize SLP instance tree in postorder. */
2664 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2665 unsigned int vectorization_factor)
2668 bool strided_store, is_store;
2669 gimple_stmt_iterator si;
2670 stmt_vec_info stmt_info;
2671 unsigned int vec_stmts_size, nunits, group_size;
2674 slp_tree loads_node;
2680 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2681 vect_schedule_slp_instance ((slp_tree) child, instance,
2682 vectorization_factor);
2684 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2685 stmt_info = vinfo_for_stmt (stmt);
2687 /* VECTYPE is the type of the destination. */
2688 vectype = STMT_VINFO_VECTYPE (stmt_info);
2689 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2690 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2692 /* For each SLP instance calculate number of vector stmts to be created
2693 for the scalar stmts in each node of the SLP tree. Number of vector
2694 elements in one vector iteration is the number of scalar elements in
2695 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2697 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2699 /* In case of load permutation we have to allocate vectorized statements for
2700 all the nodes that participate in that permutation. */
2701 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2703 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2705 if (!SLP_TREE_VEC_STMTS (loads_node))
2707 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2709 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2714 if (!SLP_TREE_VEC_STMTS (node))
2716 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2717 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2720 if (vect_print_dump_info (REPORT_DETAILS))
2722 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2723 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2726 /* Loads should be inserted before the first load. */
2727 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2728 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2729 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2730 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2731 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2732 else if (is_pattern_stmt_p (stmt_info))
2733 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2735 si = gsi_for_stmt (stmt);
2737 /* Stores should be inserted just before the last store. */
2738 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2739 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2741 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2742 si = gsi_for_stmt (last_store);
2745 /* Mark the first element of the reduction chain as reduction to properly
2746 transform the node. In the analysis phase only the last element of the
2747 chain is marked as reduction. */
2748 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2749 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2751 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2752 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2755 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2760 /* Generate vector code for all SLP instances in the loop/basic block. */
2763 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2765 VEC (slp_instance, heap) *slp_instances;
2766 slp_instance instance;
2768 bool is_store = false;
2772 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2773 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2777 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2781 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2783 /* Schedule the tree of INSTANCE. */
2784 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2786 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2787 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2788 fprintf (vect_dump, "vectorizing stmts using SLP.");
2791 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2793 slp_tree root = SLP_INSTANCE_TREE (instance);
2796 gimple_stmt_iterator gsi;
2798 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2799 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2801 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2804 /* Free the attached stmt_vec_info and remove the stmt. */
2805 gsi = gsi_for_stmt (store);
2806 gsi_remove (&gsi, true);
2807 free_stmt_vec_info (store);
2815 /* Vectorize the basic block. */
2818 vect_slp_transform_bb (basic_block bb)
2820 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2821 gimple_stmt_iterator si;
2823 gcc_assert (bb_vinfo);
2825 if (vect_print_dump_info (REPORT_DETAILS))
2826 fprintf (vect_dump, "SLPing BB\n");
2828 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2830 gimple stmt = gsi_stmt (si);
2831 stmt_vec_info stmt_info;
2833 if (vect_print_dump_info (REPORT_DETAILS))
2835 fprintf (vect_dump, "------>SLPing statement: ");
2836 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2839 stmt_info = vinfo_for_stmt (stmt);
2840 gcc_assert (stmt_info);
2842 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2843 if (STMT_SLP_TYPE (stmt_info))
2845 vect_schedule_slp (NULL, bb_vinfo);
2850 mark_sym_for_renaming (gimple_vop (cfun));
2851 /* The memory tags and pointers in vectorized statements need to
2852 have their SSA forms updated. FIXME, why can't this be delayed
2853 until all the loops have been transformed? */
2854 update_ssa (TODO_update_ssa);
2856 if (vect_print_dump_info (REPORT_DETAILS))
2857 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2859 destroy_bb_vec_info (bb_vinfo);