1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb)
50 gimple_stmt_iterator si;
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node)
77 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78 vect_free_slp_tree ((slp_tree) child);
80 VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
81 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
83 if (SLP_TREE_VEC_STMTS (node))
84 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
90 /* Free the memory allocated for the SLP instance. */
93 vect_free_slp_instance (slp_instance instance)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
96 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
97 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
107 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
110 if (is_gimple_call (stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (is_gimple_assign (stmt))
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node) = NULL;
124 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
125 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
126 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
132 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
134 static VEC (slp_oprnd_info, heap) *
135 vect_create_oprnd_info (int nops, int group_size)
138 slp_oprnd_info oprnd_info;
139 VEC (slp_oprnd_info, heap) *oprnds_info;
141 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
142 for (i = 0; i < nops; i++)
144 oprnd_info = XNEW (struct _slp_oprnd_info);
145 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
146 oprnd_info->first_dt = vect_uninitialized_def;
147 oprnd_info->first_def_type = NULL_TREE;
148 oprnd_info->first_const_oprnd = NULL_TREE;
149 oprnd_info->first_pattern = false;
150 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
157 /* Free operands info. */
160 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
163 slp_oprnd_info oprnd_info;
165 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
167 VEC_free (gimple, heap, oprnd_info->def_stmts);
168 XDELETE (oprnd_info);
171 VEC_free (slp_oprnd_info, heap, *oprnds_info);
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
181 slp_tree slp_node, gimple stmt,
182 int ncopies_for_cost, bool first,
183 VEC (slp_oprnd_info, heap) **oprnds_info)
186 unsigned int i, number_of_oprnds;
187 tree def, def_op0 = NULL_TREE;
189 enum vect_def_type dt = vect_uninitialized_def;
190 enum vect_def_type dt_op0 = vect_uninitialized_def;
191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
192 tree lhs = gimple_get_lhs (stmt);
193 struct loop *loop = NULL;
194 enum tree_code rhs_code;
195 bool different_types = false;
196 bool pattern = false;
197 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
199 tree compare_rhs = NULL_TREE;
202 loop = LOOP_VINFO_LOOP (loop_vinfo);
204 if (is_gimple_call (stmt))
206 number_of_oprnds = gimple_call_num_args (stmt);
209 else if (is_gimple_assign (stmt))
211 number_of_oprnds = gimple_num_ops (stmt) - 1;
212 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
218 for (i = 0; i < number_of_oprnds; i++)
223 compare_rhs = NULL_TREE;
226 oprnd = gimple_op (stmt, op_idx++);
228 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
230 if (COMPARISON_CLASS_P (oprnd))
232 compare_rhs = TREE_OPERAND (oprnd, 1);
233 oprnd = TREE_OPERAND (oprnd, 0);
236 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
238 || (!def_stmt && dt != vect_constant_def))
240 if (vect_print_dump_info (REPORT_SLP))
242 fprintf (vect_dump, "Build SLP failed: can't find def for ");
243 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
249 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250 from the pattern. Check that all the stmts of the node are in the
252 if (loop && def_stmt && gimple_bb (def_stmt)
253 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
254 && vinfo_for_stmt (def_stmt)
255 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
256 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
257 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
260 if (!first && !oprnd_info->first_pattern)
262 if (vect_print_dump_info (REPORT_DETAILS))
264 fprintf (vect_dump, "Build SLP failed: some of the stmts"
265 " are in a pattern, and others are not ");
266 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
272 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
273 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
275 if (dt == vect_unknown_def_type)
277 if (vect_print_dump_info (REPORT_DETAILS))
278 fprintf (vect_dump, "Unsupported pattern.");
282 switch (gimple_code (def_stmt))
285 def = gimple_phi_result (def_stmt);
289 def = gimple_assign_lhs (def_stmt);
293 if (vect_print_dump_info (REPORT_DETAILS))
294 fprintf (vect_dump, "unsupported defining stmt: ");
301 oprnd_info->first_dt = dt;
302 oprnd_info->first_pattern = pattern;
305 oprnd_info->first_def_type = TREE_TYPE (def);
306 oprnd_info->first_const_oprnd = NULL_TREE;
310 oprnd_info->first_def_type = NULL_TREE;
311 oprnd_info->first_const_oprnd = oprnd;
318 /* Analyze costs (for the first stmt of the group only). */
319 if (REFERENCE_CLASS_P (lhs))
321 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
325 enum vect_def_type dts[2];
327 dts[1] = vect_uninitialized_def;
328 /* Not memory operation (we don't call this function for
330 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
337 /* Not first stmt of the group, check that the def-stmt/s match
338 the def-stmt/s of the first stmt. Allow different definition
339 types for reduction chains: the first stmt must be a
340 vect_reduction_def (a phi node), and the rest
341 vect_internal_def. */
342 if (((oprnd_info->first_dt != dt
343 && !(oprnd_info->first_dt == vect_reduction_def
344 && dt == vect_internal_def))
345 || (oprnd_info->first_def_type != NULL_TREE
347 && !types_compatible_p (oprnd_info->first_def_type,
350 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
354 if (number_of_oprnds != 2)
356 if (vect_print_dump_info (REPORT_SLP))
357 fprintf (vect_dump, "Build SLP failed: different types ");
362 /* Try to swap operands in case of binary operation. */
364 different_types = true;
367 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
368 if (is_gimple_assign (stmt)
369 && (rhs_code = gimple_assign_rhs_code (stmt))
370 && TREE_CODE_CLASS (rhs_code) == tcc_binary
371 && commutative_tree_code (rhs_code)
372 && oprnd0_info->first_dt == dt
373 && oprnd_info->first_dt == dt_op0
375 && !(oprnd0_info->first_def_type
376 && !types_compatible_p (oprnd0_info->first_def_type,
378 && !(oprnd_info->first_def_type
379 && !types_compatible_p (oprnd_info->first_def_type,
380 TREE_TYPE (def_op0))))
382 if (vect_print_dump_info (REPORT_SLP))
384 fprintf (vect_dump, "Swapping operands of ");
385 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
388 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
389 gimple_assign_rhs2_ptr (stmt));
393 if (vect_print_dump_info (REPORT_SLP))
394 fprintf (vect_dump, "Build SLP failed: different types ");
402 /* Check the types of the definitions. */
405 case vect_constant_def:
406 case vect_external_def:
407 case vect_reduction_def:
410 case vect_internal_def:
413 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
414 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
416 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
418 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
421 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
426 /* FORNOW: Not supported. */
427 if (vect_print_dump_info (REPORT_SLP))
429 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
430 print_generic_expr (vect_dump, def, TDF_SLIM);
441 /* Recursively build an SLP tree starting from NODE.
442 Fail (and return FALSE) if def-stmts are not isomorphic, require data
443 permutation or are of unsupported types of operation. Otherwise, return
447 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
448 slp_tree *node, unsigned int group_size,
449 int *inside_cost, int *outside_cost,
450 int ncopies_for_cost, unsigned int *max_nunits,
451 VEC (int, heap) **load_permutation,
452 VEC (slp_tree, heap) **loads,
453 unsigned int vectorization_factor, bool *loads_permuted)
456 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
457 gimple stmt = VEC_index (gimple, stmts, 0);
458 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
459 enum tree_code first_cond_code = ERROR_MARK;
461 bool stop_recursion = false, need_same_oprnds = false;
462 tree vectype, scalar_type, first_op1 = NULL_TREE;
463 unsigned int ncopies;
466 enum machine_mode optab_op2_mode;
467 enum machine_mode vec_mode;
468 struct data_reference *first_dr;
470 bool permutation = false;
471 unsigned int load_place;
472 gimple first_load, prev_first_load = NULL;
473 VEC (slp_oprnd_info, heap) *oprnds_info;
475 slp_oprnd_info oprnd_info;
478 if (is_gimple_call (stmt))
479 nops = gimple_call_num_args (stmt);
480 else if (is_gimple_assign (stmt))
482 nops = gimple_num_ops (stmt) - 1;
483 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
489 oprnds_info = vect_create_oprnd_info (nops, group_size);
491 /* For every stmt in NODE find its def stmt/s. */
492 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
494 if (vect_print_dump_info (REPORT_SLP))
496 fprintf (vect_dump, "Build SLP for ");
497 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
500 /* Fail to vectorize statements marked as unvectorizable. */
501 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
503 if (vect_print_dump_info (REPORT_SLP))
506 "Build SLP failed: unvectorizable statement ");
507 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
510 vect_free_oprnd_info (&oprnds_info);
514 lhs = gimple_get_lhs (stmt);
515 if (lhs == NULL_TREE)
517 if (vect_print_dump_info (REPORT_SLP))
520 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
521 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
524 vect_free_oprnd_info (&oprnds_info);
528 if (is_gimple_assign (stmt)
529 && gimple_assign_rhs_code (stmt) == COND_EXPR
530 && (cond = gimple_assign_rhs1 (stmt))
531 && !COMPARISON_CLASS_P (cond))
533 if (vect_print_dump_info (REPORT_SLP))
536 "Build SLP failed: condition is not comparison ");
537 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
540 vect_free_oprnd_info (&oprnds_info);
544 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
545 vectype = get_vectype_for_scalar_type (scalar_type);
548 if (vect_print_dump_info (REPORT_SLP))
550 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
551 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
554 vect_free_oprnd_info (&oprnds_info);
558 /* In case of multiple types we need to detect the smallest type. */
559 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
561 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
563 vectorization_factor = *max_nunits;
566 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
568 if (is_gimple_call (stmt))
570 rhs_code = CALL_EXPR;
571 if (gimple_call_internal_p (stmt)
572 || gimple_call_tail_p (stmt)
573 || gimple_call_noreturn_p (stmt)
574 || !gimple_call_nothrow_p (stmt)
575 || gimple_call_chain (stmt))
577 if (vect_print_dump_info (REPORT_SLP))
580 "Build SLP failed: unsupported call type ");
581 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
584 vect_free_oprnd_info (&oprnds_info);
589 rhs_code = gimple_assign_rhs_code (stmt);
591 /* Check the operation. */
594 first_stmt_code = rhs_code;
596 /* Shift arguments should be equal in all the packed stmts for a
597 vector shift with scalar shift operand. */
598 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
599 || rhs_code == LROTATE_EXPR
600 || rhs_code == RROTATE_EXPR)
602 vec_mode = TYPE_MODE (vectype);
604 /* First see if we have a vector/vector shift. */
605 optab = optab_for_tree_code (rhs_code, vectype,
609 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
611 /* No vector/vector shift, try for a vector/scalar shift. */
612 optab = optab_for_tree_code (rhs_code, vectype,
617 if (vect_print_dump_info (REPORT_SLP))
618 fprintf (vect_dump, "Build SLP failed: no optab.");
619 vect_free_oprnd_info (&oprnds_info);
622 icode = (int) optab_handler (optab, vec_mode);
623 if (icode == CODE_FOR_nothing)
625 if (vect_print_dump_info (REPORT_SLP))
626 fprintf (vect_dump, "Build SLP failed: "
627 "op not supported by target.");
628 vect_free_oprnd_info (&oprnds_info);
631 optab_op2_mode = insn_data[icode].operand[2].mode;
632 if (!VECTOR_MODE_P (optab_op2_mode))
634 need_same_oprnds = true;
635 first_op1 = gimple_assign_rhs2 (stmt);
639 else if (rhs_code == WIDEN_LSHIFT_EXPR)
641 need_same_oprnds = true;
642 first_op1 = gimple_assign_rhs2 (stmt);
647 if (first_stmt_code != rhs_code
648 && (first_stmt_code != IMAGPART_EXPR
649 || rhs_code != REALPART_EXPR)
650 && (first_stmt_code != REALPART_EXPR
651 || rhs_code != IMAGPART_EXPR)
652 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
653 && (first_stmt_code == ARRAY_REF
654 || first_stmt_code == INDIRECT_REF
655 || first_stmt_code == COMPONENT_REF
656 || first_stmt_code == MEM_REF)))
658 if (vect_print_dump_info (REPORT_SLP))
661 "Build SLP failed: different operation in stmt ");
662 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
665 vect_free_oprnd_info (&oprnds_info);
670 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
672 if (vect_print_dump_info (REPORT_SLP))
675 "Build SLP failed: different shift arguments in ");
676 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
679 vect_free_oprnd_info (&oprnds_info);
683 if (rhs_code == CALL_EXPR)
685 gimple first_stmt = VEC_index (gimple, stmts, 0);
686 if (gimple_call_num_args (stmt) != nops
687 || !operand_equal_p (gimple_call_fn (first_stmt),
688 gimple_call_fn (stmt), 0)
689 || gimple_call_fntype (first_stmt)
690 != gimple_call_fntype (stmt))
692 if (vect_print_dump_info (REPORT_SLP))
695 "Build SLP failed: different calls in ");
696 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
699 vect_free_oprnd_info (&oprnds_info);
705 /* Strided store or load. */
706 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
708 if (REFERENCE_CLASS_P (lhs))
711 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
712 stmt, ncopies_for_cost,
713 (i == 0), &oprnds_info))
715 vect_free_oprnd_info (&oprnds_info);
722 /* FORNOW: Check that there is no gap between the loads. */
723 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
724 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
725 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
726 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
728 if (vect_print_dump_info (REPORT_SLP))
730 fprintf (vect_dump, "Build SLP failed: strided "
732 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
735 vect_free_oprnd_info (&oprnds_info);
739 /* Check that the size of interleaved loads group is not
740 greater than the SLP group size. */
742 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
744 if (vect_print_dump_info (REPORT_SLP))
746 fprintf (vect_dump, "Build SLP failed: the number of "
747 "interleaved loads is greater than"
748 " the SLP group size ");
749 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
752 vect_free_oprnd_info (&oprnds_info);
756 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
759 /* Check that there are no loads from different interleaving
760 chains in the same node. The only exception is complex
762 if (prev_first_load != first_load
763 && rhs_code != REALPART_EXPR
764 && rhs_code != IMAGPART_EXPR)
766 if (vect_print_dump_info (REPORT_SLP))
768 fprintf (vect_dump, "Build SLP failed: different "
769 "interleaving chains in one node ");
770 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
773 vect_free_oprnd_info (&oprnds_info);
778 prev_first_load = first_load;
780 if (first_load == stmt)
782 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
783 if (vect_supportable_dr_alignment (first_dr, false)
784 == dr_unaligned_unsupported)
786 if (vect_print_dump_info (REPORT_SLP))
788 fprintf (vect_dump, "Build SLP failed: unsupported "
790 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
793 vect_free_oprnd_info (&oprnds_info);
797 /* Analyze costs (for the first stmt in the group). */
798 vect_model_load_cost (vinfo_for_stmt (stmt),
799 ncopies_for_cost, false, *node);
802 /* Store the place of this load in the interleaving chain. In
803 case that permutation is needed we later decide if a specific
804 permutation is supported. */
805 load_place = vect_get_place_in_interleaving_chain (stmt,
810 VEC_safe_push (int, heap, *load_permutation, load_place);
812 /* We stop the tree when we reach a group of loads. */
813 stop_recursion = true;
816 } /* Strided access. */
819 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
821 /* Not strided load. */
822 if (vect_print_dump_info (REPORT_SLP))
824 fprintf (vect_dump, "Build SLP failed: not strided load ");
825 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
828 /* FORNOW: Not strided loads are not supported. */
829 vect_free_oprnd_info (&oprnds_info);
833 /* Not memory operation. */
834 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
835 && TREE_CODE_CLASS (rhs_code) != tcc_unary
836 && rhs_code != COND_EXPR
837 && rhs_code != CALL_EXPR)
839 if (vect_print_dump_info (REPORT_SLP))
841 fprintf (vect_dump, "Build SLP failed: operation");
842 fprintf (vect_dump, " unsupported ");
843 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
846 vect_free_oprnd_info (&oprnds_info);
850 if (rhs_code == COND_EXPR)
852 tree cond_expr = gimple_assign_rhs1 (stmt);
855 first_cond_code = TREE_CODE (cond_expr);
856 else if (first_cond_code != TREE_CODE (cond_expr))
858 if (vect_print_dump_info (REPORT_SLP))
860 fprintf (vect_dump, "Build SLP failed: different"
862 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
865 vect_free_oprnd_info (&oprnds_info);
870 /* Find the def-stmts. */
871 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
872 ncopies_for_cost, (i == 0),
875 vect_free_oprnd_info (&oprnds_info);
881 /* Add the costs of the node to the overall instance costs. */
882 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
883 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
885 /* Strided loads were reached - stop the recursion. */
888 VEC_safe_push (slp_tree, heap, *loads, *node);
892 *loads_permuted = true;
894 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
899 /* We don't check here complex numbers chains, so we set
900 LOADS_PERMUTED for further check in
901 vect_supported_load_permutation_p. */
902 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
903 *loads_permuted = true;
906 vect_free_oprnd_info (&oprnds_info);
910 /* Create SLP_TREE nodes for the definition node/s. */
911 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
915 if (oprnd_info->first_dt != vect_internal_def)
918 child = vect_create_new_slp_node (oprnd_info->def_stmts);
920 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
921 inside_cost, outside_cost, ncopies_for_cost,
922 max_nunits, load_permutation, loads,
923 vectorization_factor, loads_permuted))
926 oprnd_info->def_stmts = NULL;
927 vect_free_slp_tree (child);
928 vect_free_oprnd_info (&oprnds_info);
932 oprnd_info->def_stmts = NULL;
933 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
936 vect_free_oprnd_info (&oprnds_info);
942 vect_print_slp_tree (slp_tree node)
951 fprintf (vect_dump, "node ");
952 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
954 fprintf (vect_dump, "\n\tstmt %d ", i);
955 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
957 fprintf (vect_dump, "\n");
959 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
960 vect_print_slp_tree ((slp_tree) child);
964 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
965 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
966 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
967 stmts in NODE are to be marked. */
970 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
979 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
981 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
983 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
984 vect_mark_slp_stmts ((slp_tree) child, mark, j);
988 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
991 vect_mark_slp_stmts_relevant (slp_tree node)
995 stmt_vec_info stmt_info;
1001 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1003 stmt_info = vinfo_for_stmt (stmt);
1004 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1005 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1006 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1009 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1010 vect_mark_slp_stmts_relevant ((slp_tree) child);
1014 /* Check if the permutation required by the SLP INSTANCE is supported.
1015 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1018 vect_supported_slp_permutation_p (slp_instance instance)
1020 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1021 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1022 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1023 VEC (slp_tree, heap) *sorted_loads = NULL;
1025 slp_tree *tmp_loads = NULL;
1026 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1029 /* FORNOW: The only supported loads permutation is loads from the same
1030 location in all the loads in the node, when the data-refs in
1031 nodes of LOADS constitute an interleaving chain.
1032 Sort the nodes according to the order of accesses in the chain. */
1033 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1035 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1036 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1037 i += group_size, j++)
1039 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1040 /* Check that the loads are all in the same interleaving chain. */
1041 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1043 if (vect_print_dump_info (REPORT_DETAILS))
1045 fprintf (vect_dump, "Build SLP failed: unsupported data "
1047 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1054 tmp_loads[index] = load;
1057 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1058 for (i = 0; i < group_size; i++)
1059 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1061 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1062 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1065 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1066 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1074 /* Rearrange the statements of NODE according to PERMUTATION. */
1077 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1078 VEC (int, heap) *permutation)
1081 VEC (gimple, heap) *tmp_stmts;
1082 unsigned int index, i;
1088 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1089 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1091 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1092 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1094 for (i = 0; i < group_size; i++)
1095 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1097 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1099 index = VEC_index (int, permutation, i);
1100 VEC_replace (gimple, tmp_stmts, index, stmt);
1103 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1104 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1108 /* Check if the required load permutation is supported.
1109 LOAD_PERMUTATION contains a list of indices of the loads.
1110 In SLP this permutation is relative to the order of strided stores that are
1111 the base of the SLP instance. */
1114 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1115 VEC (int, heap) *load_permutation)
1117 int i = 0, j, prev = -1, next, k, number_of_groups;
1118 bool supported, bad_permutation = false;
1120 slp_tree node, other_complex_node;
1121 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1122 unsigned complex_numbers = 0;
1123 struct data_reference *dr;
1124 bb_vec_info bb_vinfo;
1126 /* FORNOW: permutations are only supported in SLP. */
1130 if (vect_print_dump_info (REPORT_SLP))
1132 fprintf (vect_dump, "Load permutation ");
1133 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1134 fprintf (vect_dump, "%d ", next);
1137 /* In case of reduction every load permutation is allowed, since the order
1138 of the reduction statements is not important (as opposed to the case of
1139 strided stores). The only condition we need to check is that all the
1140 load nodes are of the same size and have the same permutation (and then
1141 rearrange all the nodes of the SLP instance according to this
1144 /* Check that all the load nodes are of the same size. */
1145 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1147 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1148 != (unsigned) group_size)
1151 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1152 if (is_gimple_assign (stmt)
1153 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1154 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1158 /* Complex operands can be swapped as following:
1159 real_c = real_b + real_a;
1160 imag_c = imag_a + imag_b;
1161 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1162 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1163 chains are mixed, they match the above pattern. */
1164 if (complex_numbers)
1166 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1168 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1174 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1176 if (complex_numbers != 2)
1184 other_complex_node = VEC_index (slp_tree,
1185 SLP_INSTANCE_LOADS (slp_instn), k);
1186 other_node_first = VEC_index (gimple,
1187 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1189 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1190 != other_node_first)
1198 /* We checked that this case ok, so there is no need to proceed with
1199 permutation tests. */
1200 if (complex_numbers == 2
1201 && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
1203 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1204 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1208 node = SLP_INSTANCE_TREE (slp_instn);
1209 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1210 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1211 instance, not all the loads belong to the same node or interleaving
1212 group. Hence, we need to divide them into groups according to
1214 number_of_groups = VEC_length (int, load_permutation) / group_size;
1216 /* Reduction (there are no data-refs in the root).
1217 In reduction chain the order of the loads is important. */
1218 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1219 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1221 int first_group_load_index;
1223 /* Compare all the permutation sequences to the first one. */
1224 for (i = 1; i < number_of_groups; i++)
1227 for (j = i * group_size; j < i * group_size + group_size; j++)
1229 next = VEC_index (int, load_permutation, j);
1230 first_group_load_index = VEC_index (int, load_permutation, k);
1232 if (next != first_group_load_index)
1234 bad_permutation = true;
1241 if (bad_permutation)
1245 if (!bad_permutation)
1247 /* Check that the loads in the first sequence are different and there
1248 are no gaps between them. */
1249 load_index = sbitmap_alloc (group_size);
1250 sbitmap_zero (load_index);
1251 for (k = 0; k < group_size; k++)
1253 first_group_load_index = VEC_index (int, load_permutation, k);
1254 if (TEST_BIT (load_index, first_group_load_index))
1256 bad_permutation = true;
1260 SET_BIT (load_index, first_group_load_index);
1263 if (!bad_permutation)
1264 for (k = 0; k < group_size; k++)
1265 if (!TEST_BIT (load_index, k))
1267 bad_permutation = true;
1271 sbitmap_free (load_index);
1274 if (!bad_permutation)
1276 /* This permutation is valid for reduction. Since the order of the
1277 statements in the nodes is not important unless they are memory
1278 accesses, we can rearrange the statements in all the nodes
1279 according to the order of the loads. */
1280 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1282 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1287 /* In basic block vectorization we allow any subchain of an interleaving
1289 FORNOW: not supported in loop SLP because of realignment compications. */
1290 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1291 bad_permutation = false;
1292 /* Check that for every node in the instance teh loads form a subchain. */
1295 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1299 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1302 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1304 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1306 bad_permutation = true;
1310 if (j != 0 && next_load != load)
1312 bad_permutation = true;
1316 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1319 if (bad_permutation)
1323 /* Check that the alignment of the first load in every subchain, i.e.,
1324 the first statement in every load node, is supported. */
1325 if (!bad_permutation)
1327 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1329 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1331 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1333 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1334 if (vect_supportable_dr_alignment (dr, false)
1335 == dr_unaligned_unsupported)
1337 if (vect_print_dump_info (REPORT_SLP))
1339 fprintf (vect_dump, "unsupported unaligned load ");
1340 print_gimple_stmt (vect_dump, first_load, 0,
1343 bad_permutation = true;
1349 if (!bad_permutation)
1351 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1357 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1358 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1359 well (unless it's reduction). */
1360 if (VEC_length (int, load_permutation)
1361 != (unsigned int) (group_size * group_size))
1365 load_index = sbitmap_alloc (group_size);
1366 sbitmap_zero (load_index);
1367 for (j = 0; j < group_size; j++)
1369 for (i = j * group_size, k = 0;
1370 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1373 if (i != j * group_size && next != prev)
1382 if (TEST_BIT (load_index, prev))
1388 SET_BIT (load_index, prev);
1391 for (j = 0; j < group_size; j++)
1392 if (!TEST_BIT (load_index, j))
1395 sbitmap_free (load_index);
1397 if (supported && i == group_size * group_size
1398 && vect_supported_slp_permutation_p (slp_instn))
1405 /* Find the first load in the loop that belongs to INSTANCE.
1406 When loads are in several SLP nodes, there can be a case in which the first
1407 load does not appear in the first SLP node to be transformed, causing
1408 incorrect order of statements. Since we generate all the loads together,
1409 they must be inserted before the first load of the SLP instance and not
1410 before the first load of the first node of the instance. */
1413 vect_find_first_load_in_slp_instance (slp_instance instance)
1417 gimple first_load = NULL, load;
1419 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1420 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1421 first_load = get_earlier_stmt (load, first_load);
1427 /* Find the last store in SLP INSTANCE. */
1430 vect_find_last_store_in_slp_instance (slp_instance instance)
1434 gimple last_store = NULL, store;
1436 node = SLP_INSTANCE_TREE (instance);
1438 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1440 last_store = get_later_stmt (store, last_store);
1446 /* Analyze an SLP instance starting from a group of strided stores. Call
1447 vect_build_slp_tree to build a tree of packed stmts if possible.
1448 Return FALSE if it's impossible to SLP any stmt in the loop. */
1451 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1454 slp_instance new_instance;
1456 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1457 unsigned int unrolling_factor = 1, nunits;
1458 tree vectype, scalar_type = NULL_TREE;
1460 unsigned int vectorization_factor = 0;
1461 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1462 unsigned int max_nunits = 0;
1463 VEC (int, heap) *load_permutation;
1464 VEC (slp_tree, heap) *loads;
1465 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1466 bool loads_permuted = false;
1467 VEC (gimple, heap) *scalar_stmts;
1469 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1473 scalar_type = TREE_TYPE (DR_REF (dr));
1474 vectype = get_vectype_for_scalar_type (scalar_type);
1478 gcc_assert (loop_vinfo);
1479 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1482 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1486 gcc_assert (loop_vinfo);
1487 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1488 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1493 if (vect_print_dump_info (REPORT_SLP))
1495 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1496 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1502 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1504 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1506 vectorization_factor = nunits;
1508 /* Calculate the unrolling factor. */
1509 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1510 if (unrolling_factor != 1 && !loop_vinfo)
1512 if (vect_print_dump_info (REPORT_SLP))
1513 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1519 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1520 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1522 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1524 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1527 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1528 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1529 VEC_safe_push (gimple, heap, scalar_stmts,
1530 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1532 VEC_safe_push (gimple, heap, scalar_stmts, next);
1533 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1538 /* Collect reduction statements. */
1539 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1540 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1541 VEC_safe_push (gimple, heap, scalar_stmts, next);
1544 node = vect_create_new_slp_node (scalar_stmts);
1546 /* Calculate the number of vector stmts to create based on the unrolling
1547 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1548 GROUP_SIZE / NUNITS otherwise. */
1549 ncopies_for_cost = unrolling_factor * group_size / nunits;
1551 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1552 loads = VEC_alloc (slp_tree, heap, group_size);
1554 /* Build the tree for the SLP instance. */
1555 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1556 &inside_cost, &outside_cost, ncopies_for_cost,
1557 &max_nunits, &load_permutation, &loads,
1558 vectorization_factor, &loads_permuted))
1560 /* Calculate the unrolling factor based on the smallest type. */
1561 if (max_nunits > nunits)
1562 unrolling_factor = least_common_multiple (max_nunits, group_size)
1565 if (unrolling_factor != 1 && !loop_vinfo)
1567 if (vect_print_dump_info (REPORT_SLP))
1568 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1573 /* Create a new SLP instance. */
1574 new_instance = XNEW (struct _slp_instance);
1575 SLP_INSTANCE_TREE (new_instance) = node;
1576 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1577 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1578 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1579 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1580 SLP_INSTANCE_LOADS (new_instance) = loads;
1581 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1582 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1586 if (!vect_supported_load_permutation_p (new_instance, group_size,
1589 if (vect_print_dump_info (REPORT_SLP))
1591 fprintf (vect_dump, "Build SLP failed: unsupported load "
1593 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1596 vect_free_slp_instance (new_instance);
1600 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1601 = vect_find_first_load_in_slp_instance (new_instance);
1604 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1607 VEC_safe_push (slp_instance, heap,
1608 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1611 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1614 if (vect_print_dump_info (REPORT_SLP))
1615 vect_print_slp_tree (node);
1620 /* Failed to SLP. */
1621 /* Free the allocated memory. */
1622 vect_free_slp_tree (node);
1623 VEC_free (int, heap, load_permutation);
1624 VEC_free (slp_tree, heap, loads);
1630 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1631 trees of packed scalar stmts if SLP is possible. */
1634 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1637 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1638 gimple first_element;
1641 if (vect_print_dump_info (REPORT_SLP))
1642 fprintf (vect_dump, "=== vect_analyze_slp ===");
1646 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1647 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1648 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1651 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1653 /* Find SLP sequences starting from groups of strided stores. */
1654 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1655 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1658 if (bb_vinfo && !ok)
1660 if (vect_print_dump_info (REPORT_SLP))
1661 fprintf (vect_dump, "Failed to SLP the basic block.");
1667 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1669 /* Find SLP sequences starting from reduction chains. */
1670 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1671 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1676 /* Don't try to vectorize SLP reductions if reduction chain was
1681 /* Find SLP sequences starting from groups of reductions. */
1682 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1683 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1684 VEC_index (gimple, reductions, 0)))
1691 /* For each possible SLP instance decide whether to SLP it and calculate overall
1692 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1693 least one instance. */
1696 vect_make_slp_decision (loop_vec_info loop_vinfo)
1698 unsigned int i, unrolling_factor = 1;
1699 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1700 slp_instance instance;
1701 int decided_to_slp = 0;
1703 if (vect_print_dump_info (REPORT_SLP))
1704 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1706 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1708 /* FORNOW: SLP if you can. */
1709 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1710 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1712 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1713 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1714 loop-based vectorization. Such stmts will be marked as HYBRID. */
1715 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1719 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1721 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1722 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1723 decided_to_slp, unrolling_factor);
1725 return (decided_to_slp > 0);
1729 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1730 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1733 vect_detect_hybrid_slp_stmts (slp_tree node)
1736 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1737 gimple stmt = VEC_index (gimple, stmts, 0);
1738 imm_use_iterator imm_iter;
1740 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1742 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1743 struct loop *loop = NULL;
1744 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1745 basic_block bb = NULL;
1751 loop = LOOP_VINFO_LOOP (loop_vinfo);
1753 bb = BB_VINFO_BB (bb_vinfo);
1755 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1756 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1757 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1758 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1759 if (gimple_bb (use_stmt)
1760 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1761 || bb == gimple_bb (use_stmt))
1762 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1763 && !STMT_SLP_TYPE (stmt_vinfo)
1764 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1765 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1766 && !(gimple_code (use_stmt) == GIMPLE_PHI
1767 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1768 == vect_reduction_def))
1769 vect_mark_slp_stmts (node, hybrid, i);
1771 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1772 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1776 /* Find stmts that must be both vectorized and SLPed. */
1779 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1782 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1783 slp_instance instance;
1785 if (vect_print_dump_info (REPORT_SLP))
1786 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1788 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1789 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1793 /* Create and initialize a new bb_vec_info struct for BB, as well as
1794 stmt_vec_info structs for all the stmts in it. */
1797 new_bb_vec_info (basic_block bb)
1799 bb_vec_info res = NULL;
1800 gimple_stmt_iterator gsi;
1802 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1803 BB_VINFO_BB (res) = bb;
1805 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1807 gimple stmt = gsi_stmt (gsi);
1808 gimple_set_uid (stmt, 0);
1809 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1812 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1813 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1820 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1821 stmts in the basic block. */
1824 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1827 gimple_stmt_iterator si;
1832 bb = BB_VINFO_BB (bb_vinfo);
1834 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1836 gimple stmt = gsi_stmt (si);
1837 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1840 /* Free stmt_vec_info. */
1841 free_stmt_vec_info (stmt);
1844 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1845 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1846 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1847 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1853 /* Analyze statements contained in SLP tree node after recursively analyzing
1854 the subtree. Return TRUE if the operations are supported. */
1857 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1867 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1868 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1871 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1873 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1874 gcc_assert (stmt_info);
1875 gcc_assert (PURE_SLP_STMT (stmt_info));
1877 if (!vect_analyze_stmt (stmt, &dummy, node))
1885 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1886 operations are supported. */
1889 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1891 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1892 slp_instance instance;
1895 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1897 if (!vect_slp_analyze_node_operations (bb_vinfo,
1898 SLP_INSTANCE_TREE (instance)))
1900 vect_free_slp_instance (instance);
1901 VEC_ordered_remove (slp_instance, slp_instances, i);
1907 if (!VEC_length (slp_instance, slp_instances))
1913 /* Check if vectorization of the basic block is profitable. */
1916 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1918 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1919 slp_instance instance;
1921 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1922 unsigned int stmt_cost;
1924 gimple_stmt_iterator si;
1925 basic_block bb = BB_VINFO_BB (bb_vinfo);
1926 stmt_vec_info stmt_info = NULL;
1927 tree dummy_type = NULL;
1930 /* Calculate vector costs. */
1931 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1933 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1934 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1937 /* Calculate scalar cost. */
1938 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1940 stmt = gsi_stmt (si);
1941 stmt_info = vinfo_for_stmt (stmt);
1943 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1944 || !PURE_SLP_STMT (stmt_info))
1947 if (STMT_VINFO_DATA_REF (stmt_info))
1949 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1950 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1951 (scalar_load, dummy_type, dummy);
1953 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1954 (scalar_store, dummy_type, dummy);
1957 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1958 (scalar_stmt, dummy_type, dummy);
1960 scalar_cost += stmt_cost;
1963 if (vect_print_dump_info (REPORT_COST))
1965 fprintf (vect_dump, "Cost model analysis: \n");
1966 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1968 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1970 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1973 /* Vectorization is profitable if its cost is less than the cost of scalar
1975 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1981 /* Check if the basic block can be vectorized. */
1984 vect_slp_analyze_bb_1 (basic_block bb)
1986 bb_vec_info bb_vinfo;
1987 VEC (ddr_p, heap) *ddrs;
1988 VEC (slp_instance, heap) *slp_instances;
1989 slp_instance instance;
1992 int max_vf = MAX_VECTORIZATION_FACTOR;
1994 bb_vinfo = new_bb_vec_info (bb);
1998 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2000 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2001 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
2004 destroy_bb_vec_info (bb_vinfo);
2008 ddrs = BB_VINFO_DDRS (bb_vinfo);
2009 if (!VEC_length (ddr_p, ddrs))
2011 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2012 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
2015 destroy_bb_vec_info (bb_vinfo);
2019 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2022 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2023 fprintf (vect_dump, "not vectorized: unhandled data dependence "
2024 "in basic block.\n");
2026 destroy_bb_vec_info (bb_vinfo);
2030 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2032 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2033 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2036 destroy_bb_vec_info (bb_vinfo);
2040 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2042 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2043 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2046 destroy_bb_vec_info (bb_vinfo);
2050 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2052 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2053 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2056 destroy_bb_vec_info (bb_vinfo);
2060 /* Check the SLP opportunities in the basic block, analyze and build SLP
2062 if (!vect_analyze_slp (NULL, bb_vinfo))
2064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2065 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2066 "in basic block.\n");
2068 destroy_bb_vec_info (bb_vinfo);
2072 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2074 /* Mark all the statements that we want to vectorize as pure SLP and
2076 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2078 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2079 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2082 if (!vect_slp_analyze_operations (bb_vinfo))
2084 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2085 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2087 destroy_bb_vec_info (bb_vinfo);
2091 /* Cost model: check if the vectorization is worthwhile. */
2092 if (flag_vect_cost_model
2093 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2095 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2096 fprintf (vect_dump, "not vectorized: vectorization is not "
2099 destroy_bb_vec_info (bb_vinfo);
2103 if (vect_print_dump_info (REPORT_DETAILS))
2104 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2111 vect_slp_analyze_bb (basic_block bb)
2113 bb_vec_info bb_vinfo;
2115 gimple_stmt_iterator gsi;
2116 unsigned int vector_sizes;
2118 if (vect_print_dump_info (REPORT_DETAILS))
2119 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2121 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2123 gimple stmt = gsi_stmt (gsi);
2124 if (!is_gimple_debug (stmt)
2125 && !gimple_nop_p (stmt)
2126 && gimple_code (stmt) != GIMPLE_LABEL)
2130 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2132 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2133 fprintf (vect_dump, "not vectorized: too many instructions in basic "
2139 /* Autodetect first vector size we try. */
2140 current_vector_size = 0;
2141 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2145 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2149 destroy_bb_vec_info (bb_vinfo);
2151 vector_sizes &= ~current_vector_size;
2152 if (vector_sizes == 0
2153 || current_vector_size == 0)
2156 /* Try the next biggest vector size. */
2157 current_vector_size = 1 << floor_log2 (vector_sizes);
2158 if (vect_print_dump_info (REPORT_DETAILS))
2159 fprintf (vect_dump, "***** Re-trying analysis with "
2160 "vector size %d\n", current_vector_size);
2165 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2166 the number of created vector stmts depends on the unrolling factor).
2167 However, the actual number of vector stmts for every SLP node depends on
2168 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2169 should be updated. In this function we assume that the inside costs
2170 calculated in vect_model_xxx_cost are linear in ncopies. */
2173 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2175 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2176 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2177 slp_instance instance;
2179 if (vect_print_dump_info (REPORT_SLP))
2180 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2182 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2183 /* We assume that costs are linear in ncopies. */
2184 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2185 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2189 /* For constant and loop invariant defs of SLP_NODE this function returns
2190 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2191 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2192 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2193 REDUC_INDEX is the index of the reduction operand in the statements, unless
2197 vect_get_constant_vectors (tree op, slp_tree slp_node,
2198 VEC (tree, heap) **vec_oprnds,
2199 unsigned int op_num, unsigned int number_of_vectors,
2202 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2203 gimple stmt = VEC_index (gimple, stmts, 0);
2204 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2208 int j, number_of_places_left_in_vector;
2211 int group_size = VEC_length (gimple, stmts);
2212 unsigned int vec_num, i;
2213 int number_of_copies = 1;
2214 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2215 bool constant_p, is_store;
2216 tree neutral_op = NULL;
2217 enum tree_code code = gimple_expr_code (stmt);
2221 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2222 && reduc_index != -1)
2224 op_num = reduc_index - 1;
2225 op = gimple_op (stmt, reduc_index);
2226 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2227 we need either neutral operands or the original operands. See
2228 get_initial_def_for_reduction() for details. */
2231 case WIDEN_SUM_EXPR:
2237 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2238 neutral_op = build_real (TREE_TYPE (op), dconst0);
2240 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2245 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2246 neutral_op = build_real (TREE_TYPE (op), dconst1);
2248 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2253 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2258 def_stmt = SSA_NAME_DEF_STMT (op);
2259 loop = (gimple_bb (stmt))->loop_father;
2260 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2261 loop_preheader_edge (loop));
2269 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2272 op = gimple_assign_rhs1 (stmt);
2279 if (CONSTANT_CLASS_P (op))
2284 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2285 gcc_assert (vector_type);
2286 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2288 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2289 created vectors. It is greater than 1 if unrolling is performed.
2291 For example, we have two scalar operands, s1 and s2 (e.g., group of
2292 strided accesses of size two), while NUNITS is four (i.e., four scalars
2293 of this type can be packed in a vector). The output vector will contain
2294 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2297 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2298 containing the operands.
2300 For example, NUNITS is four as before, and the group size is 8
2301 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2302 {s5, s6, s7, s8}. */
2304 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2306 number_of_places_left_in_vector = nunits;
2307 for (j = 0; j < number_of_copies; j++)
2309 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2312 op = gimple_assign_rhs1 (stmt);
2318 if (op_num == 0 || op_num == 1)
2320 tree cond = gimple_assign_rhs1 (stmt);
2321 op = TREE_OPERAND (cond, op_num);
2326 op = gimple_assign_rhs2 (stmt);
2328 op = gimple_assign_rhs3 (stmt);
2333 op = gimple_call_arg (stmt, op_num);
2337 op = gimple_op (stmt, op_num + 1);
2341 if (reduc_index != -1)
2343 loop = (gimple_bb (stmt))->loop_father;
2344 def_stmt = SSA_NAME_DEF_STMT (op);
2348 /* Get the def before the loop. In reduction chain we have only
2349 one initial value. */
2350 if ((j != (number_of_copies - 1)
2351 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2356 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2357 loop_preheader_edge (loop));
2360 /* Create 'vect_ = {op0,op1,...,opn}'. */
2361 t = tree_cons (NULL_TREE, op, t);
2363 number_of_places_left_in_vector--;
2365 if (number_of_places_left_in_vector == 0)
2367 number_of_places_left_in_vector = nunits;
2370 vec_cst = build_vector (vector_type, t);
2372 vec_cst = build_constructor_from_list (vector_type, t);
2373 VEC_quick_push (tree, voprnds,
2374 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2380 /* Since the vectors are created in the reverse order, we should invert
2382 vec_num = VEC_length (tree, voprnds);
2383 for (j = vec_num - 1; j >= 0; j--)
2385 vop = VEC_index (tree, voprnds, j);
2386 VEC_quick_push (tree, *vec_oprnds, vop);
2389 VEC_free (tree, heap, voprnds);
2391 /* In case that VF is greater than the unrolling factor needed for the SLP
2392 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2393 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2394 to replicate the vectors. */
2395 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2397 tree neutral_vec = NULL;
2402 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2404 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2408 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2409 VEC_quick_push (tree, *vec_oprnds, vop);
2415 /* Get vectorized definitions from SLP_NODE that contains corresponding
2416 vectorized def-stmts. */
2419 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2422 gimple vec_def_stmt;
2425 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2427 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2429 gcc_assert (vec_def_stmt);
2430 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2431 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2436 /* Get vectorized definitions for SLP_NODE.
2437 If the scalar definitions are loop invariants or constants, collect them and
2438 call vect_get_constant_vectors() to create vector stmts.
2439 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2440 must be stored in the corresponding child of SLP_NODE, and we call
2441 vect_get_slp_vect_defs () to retrieve them. */
2444 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2445 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2447 gimple first_stmt, first_def;
2448 int number_of_vects = 0, i;
2449 unsigned int child_index = 0;
2450 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2451 slp_tree child = NULL;
2452 VEC (tree, heap) *vec_defs;
2453 tree oprnd, def_lhs;
2454 bool vectorized_defs;
2456 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2457 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2459 /* For each operand we check if it has vectorized definitions in a child
2460 node or we need to create them (for invariants and constants). We
2461 check if the LHS of the first stmt of the next child matches OPRND.
2462 If it does, we found the correct child. Otherwise, we call
2463 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2464 to check this child node for the next operand. */
2465 vectorized_defs = false;
2466 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2468 child = (slp_tree) VEC_index (slp_void_p,
2469 SLP_TREE_CHILDREN (slp_node),
2471 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2473 /* In the end of a pattern sequence we have a use of the original stmt,
2474 so we need to compare OPRND with the original def. */
2475 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2476 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2477 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2478 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2480 if (is_gimple_call (first_def))
2481 def_lhs = gimple_call_lhs (first_def);
2483 def_lhs = gimple_assign_lhs (first_def);
2485 if (operand_equal_p (oprnd, def_lhs, 0))
2487 /* The number of vector defs is determined by the number of
2488 vector statements in the node from which we get those
2490 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2491 vectorized_defs = true;
2496 if (!vectorized_defs)
2500 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2501 /* Number of vector stmts was calculated according to LHS in
2502 vect_schedule_slp_instance (), fix it by replacing LHS with
2503 RHS, if necessary. See vect_get_smallest_scalar_type () for
2505 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2507 if (rhs_size_unit != lhs_size_unit)
2509 number_of_vects *= rhs_size_unit;
2510 number_of_vects /= lhs_size_unit;
2515 /* Allocate memory for vectorized defs. */
2516 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2518 /* For reduction defs we call vect_get_constant_vectors (), since we are
2519 looking for initial loop invariant values. */
2520 if (vectorized_defs && reduc_index == -1)
2521 /* The defs are already vectorized. */
2522 vect_get_slp_vect_defs (child, &vec_defs);
2524 /* Build vectors from scalar defs. */
2525 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2526 number_of_vects, reduc_index);
2528 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2530 /* For reductions, we only need initial values. */
2531 if (reduc_index != -1)
2537 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2538 building a vector of type MASK_TYPE from it) and two input vectors placed in
2539 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2540 shifting by STRIDE elements of DR_CHAIN for every copy.
2541 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2543 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2544 the created stmts must be inserted. */
2547 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2548 tree mask, int first_vec_indx, int second_vec_indx,
2549 gimple_stmt_iterator *gsi, slp_tree node,
2550 tree vectype, VEC(tree,heap) *dr_chain,
2551 int ncopies, int vect_stmts_counter)
2554 gimple perm_stmt = NULL;
2555 stmt_vec_info next_stmt_info;
2557 tree first_vec, second_vec, data_ref;
2559 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2561 /* Initialize the vect stmts of NODE to properly insert the generated
2563 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2564 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2565 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2567 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2568 for (i = 0; i < ncopies; i++)
2570 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2571 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2573 /* Generate the permute statement. */
2574 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2575 first_vec, second_vec, mask);
2576 data_ref = make_ssa_name (perm_dest, perm_stmt);
2577 gimple_set_lhs (perm_stmt, data_ref);
2578 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2580 /* Store the vector statement in NODE. */
2581 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2582 stride * i + vect_stmts_counter, perm_stmt);
2584 first_vec_indx += stride;
2585 second_vec_indx += stride;
2588 /* Mark the scalar stmt as vectorized. */
2589 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2590 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2594 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2595 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2596 representation. Check that the mask is valid and return FALSE if not.
2597 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2598 the next vector, i.e., the current first vector is not needed. */
2601 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2602 int mask_nunits, bool only_one_vec, int index,
2603 unsigned char *mask, int *current_mask_element,
2604 bool *need_next_vector, int *number_of_mask_fixes,
2605 bool *mask_fixed, bool *needs_first_vector)
2609 /* Convert to target specific representation. */
2610 *current_mask_element = first_mask_element + m;
2611 /* Adjust the value in case it's a mask for second and third vectors. */
2612 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2614 if (*current_mask_element < mask_nunits)
2615 *needs_first_vector = true;
2617 /* We have only one input vector to permute but the mask accesses values in
2618 the next vector as well. */
2619 if (only_one_vec && *current_mask_element >= mask_nunits)
2621 if (vect_print_dump_info (REPORT_DETAILS))
2623 fprintf (vect_dump, "permutation requires at least two vectors ");
2624 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2630 /* The mask requires the next vector. */
2631 if (*current_mask_element >= mask_nunits * 2)
2633 if (*needs_first_vector || *mask_fixed)
2635 /* We either need the first vector too or have already moved to the
2636 next vector. In both cases, this permutation needs three
2638 if (vect_print_dump_info (REPORT_DETAILS))
2640 fprintf (vect_dump, "permutation requires at "
2641 "least three vectors ");
2642 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2648 /* We move to the next vector, dropping the first one and working with
2649 the second and the third - we need to adjust the values of the mask
2651 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2653 for (i = 0; i < index; i++)
2654 mask[i] -= mask_nunits * *number_of_mask_fixes;
2656 (*number_of_mask_fixes)++;
2660 *need_next_vector = *mask_fixed;
2662 /* This was the last element of this mask. Start a new one. */
2663 if (index == mask_nunits - 1)
2665 *number_of_mask_fixes = 1;
2666 *mask_fixed = false;
2667 *needs_first_vector = false;
2674 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2675 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2676 permute statements for SLP_NODE_INSTANCE. */
2678 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2679 gimple_stmt_iterator *gsi, int vf,
2680 slp_instance slp_node_instance, bool analyze_only)
2682 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2683 tree mask_element_type = NULL_TREE, mask_type;
2684 int i, j, k, nunits, vec_index = 0, scalar_index;
2686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2687 gimple next_scalar_stmt;
2688 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2689 int first_mask_element;
2690 int index, unroll_factor, current_mask_element, ncopies;
2691 unsigned char *mask;
2692 bool only_one_vec = false, need_next_vector = false;
2693 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2694 int number_of_mask_fixes = 1;
2695 bool mask_fixed = false;
2696 bool needs_first_vector = false;
2697 enum machine_mode mode;
2699 mode = TYPE_MODE (vectype);
2701 if (!can_vec_perm_p (mode, false, NULL))
2703 if (vect_print_dump_info (REPORT_DETAILS))
2705 fprintf (vect_dump, "no vect permute for ");
2706 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2711 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2712 same size as the vector element being permuted. */
2714 = lang_hooks.types.type_for_size
2715 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2716 mask_type = get_vectype_for_scalar_type (mask_element_type);
2717 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2718 mask = XALLOCAVEC (unsigned char, nunits);
2719 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2721 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2722 unrolling factor. */
2723 orig_vec_stmts_num = group_size *
2724 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2725 if (orig_vec_stmts_num == 1)
2726 only_one_vec = true;
2728 /* Number of copies is determined by the final vectorization factor
2729 relatively to SLP_NODE_INSTANCE unrolling factor. */
2730 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2732 /* Generate permutation masks for every NODE. Number of masks for each NODE
2733 is equal to GROUP_SIZE.
2734 E.g., we have a group of three nodes with three loads from the same
2735 location in each node, and the vector size is 4. I.e., we have a
2736 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2737 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2738 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2741 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2742 The last mask is illegal since we assume two operands for permute
2743 operation, and the mask element values can't be outside that range.
2744 Hence, the last mask must be converted into {2,5,5,5}.
2745 For the first two permutations we need the first and the second input
2746 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2747 we need the second and the third vectors: {b1,c1,a2,b2} and
2750 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2754 vect_stmts_counter = 0;
2756 first_vec_index = vec_index++;
2758 second_vec_index = first_vec_index;
2760 second_vec_index = vec_index++;
2762 for (j = 0; j < unroll_factor; j++)
2764 for (k = 0; k < group_size; k++)
2766 first_mask_element = i + j * group_size;
2767 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2768 nunits, only_one_vec, index,
2769 mask, ¤t_mask_element,
2771 &number_of_mask_fixes, &mask_fixed,
2772 &needs_first_vector))
2774 mask[index++] = current_mask_element;
2776 if (index == nunits)
2778 tree mask_vec = NULL;
2780 if (!can_vec_perm_p (mode, false, mask))
2782 if (vect_print_dump_info (REPORT_DETAILS))
2784 fprintf (vect_dump, "unsupported vect permute { ");
2785 for (i = 0; i < nunits; ++i)
2786 fprintf (vect_dump, "%d ", mask[i]);
2787 fprintf (vect_dump, "}\n");
2792 while (--index >= 0)
2794 tree t = build_int_cst (mask_element_type, mask[index]);
2795 mask_vec = tree_cons (NULL, t, mask_vec);
2797 mask_vec = build_vector (mask_type, mask_vec);
2802 if (need_next_vector)
2804 first_vec_index = second_vec_index;
2805 second_vec_index = vec_index;
2808 next_scalar_stmt = VEC_index (gimple,
2809 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2811 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2812 mask_vec, first_vec_index, second_vec_index,
2813 gsi, node, vectype, dr_chain,
2814 ncopies, vect_stmts_counter++);
2826 /* Vectorize SLP instance tree in postorder. */
2829 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2830 unsigned int vectorization_factor)
2833 bool strided_store, is_store;
2834 gimple_stmt_iterator si;
2835 stmt_vec_info stmt_info;
2836 unsigned int vec_stmts_size, nunits, group_size;
2839 slp_tree loads_node;
2845 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2846 vect_schedule_slp_instance ((slp_tree) child, instance,
2847 vectorization_factor);
2849 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2850 stmt_info = vinfo_for_stmt (stmt);
2852 /* VECTYPE is the type of the destination. */
2853 vectype = STMT_VINFO_VECTYPE (stmt_info);
2854 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2855 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2857 /* For each SLP instance calculate number of vector stmts to be created
2858 for the scalar stmts in each node of the SLP tree. Number of vector
2859 elements in one vector iteration is the number of scalar elements in
2860 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2862 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2864 /* In case of load permutation we have to allocate vectorized statements for
2865 all the nodes that participate in that permutation. */
2866 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2868 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2870 if (!SLP_TREE_VEC_STMTS (loads_node))
2872 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2874 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2879 if (!SLP_TREE_VEC_STMTS (node))
2881 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2882 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2885 if (vect_print_dump_info (REPORT_DETAILS))
2887 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2888 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2891 /* Loads should be inserted before the first load. */
2892 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2893 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2894 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2895 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2896 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2897 else if (is_pattern_stmt_p (stmt_info))
2898 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2900 si = gsi_for_stmt (stmt);
2902 /* Stores should be inserted just before the last store. */
2903 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2904 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2906 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2907 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
2908 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
2909 si = gsi_for_stmt (last_store);
2912 /* Mark the first element of the reduction chain as reduction to properly
2913 transform the node. In the analysis phase only the last element of the
2914 chain is marked as reduction. */
2915 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2916 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2918 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2919 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2922 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2926 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2927 For loop vectorization this is done in vectorizable_call, but for SLP
2928 it needs to be deferred until end of vect_schedule_slp, because multiple
2929 SLP instances may refer to the same scalar stmt. */
2932 vect_remove_slp_scalar_calls (slp_tree node)
2934 gimple stmt, new_stmt;
2935 gimple_stmt_iterator gsi;
2939 stmt_vec_info stmt_info;
2944 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2945 vect_remove_slp_scalar_calls ((slp_tree) child);
2947 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
2949 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
2951 stmt_info = vinfo_for_stmt (stmt);
2952 if (stmt_info == NULL
2953 || is_pattern_stmt_p (stmt_info)
2954 || !PURE_SLP_STMT (stmt_info))
2956 lhs = gimple_call_lhs (stmt);
2957 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2958 set_vinfo_for_stmt (new_stmt, stmt_info);
2959 set_vinfo_for_stmt (stmt, NULL);
2960 STMT_VINFO_STMT (stmt_info) = new_stmt;
2961 gsi = gsi_for_stmt (stmt);
2962 gsi_replace (&gsi, new_stmt, false);
2963 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2967 /* Generate vector code for all SLP instances in the loop/basic block. */
2970 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2972 VEC (slp_instance, heap) *slp_instances;
2973 slp_instance instance;
2975 bool is_store = false;
2979 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2980 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2984 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2988 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2990 /* Schedule the tree of INSTANCE. */
2991 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2993 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2994 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2995 fprintf (vect_dump, "vectorizing stmts using SLP.");
2998 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3000 slp_tree root = SLP_INSTANCE_TREE (instance);
3003 gimple_stmt_iterator gsi;
3005 vect_remove_slp_scalar_calls (root);
3007 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3008 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3010 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3013 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3014 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3015 /* Free the attached stmt_vec_info and remove the stmt. */
3016 gsi = gsi_for_stmt (store);
3017 gsi_remove (&gsi, true);
3018 free_stmt_vec_info (store);
3026 /* Vectorize the basic block. */
3029 vect_slp_transform_bb (basic_block bb)
3031 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3032 gimple_stmt_iterator si;
3034 gcc_assert (bb_vinfo);
3036 if (vect_print_dump_info (REPORT_DETAILS))
3037 fprintf (vect_dump, "SLPing BB\n");
3039 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3041 gimple stmt = gsi_stmt (si);
3042 stmt_vec_info stmt_info;
3044 if (vect_print_dump_info (REPORT_DETAILS))
3046 fprintf (vect_dump, "------>SLPing statement: ");
3047 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3050 stmt_info = vinfo_for_stmt (stmt);
3051 gcc_assert (stmt_info);
3053 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3054 if (STMT_SLP_TYPE (stmt_info))
3056 vect_schedule_slp (NULL, bb_vinfo);
3061 mark_sym_for_renaming (gimple_vop (cfun));
3062 /* The memory tags and pointers in vectorized statements need to
3063 have their SSA forms updated. FIXME, why can't this be delayed
3064 until all the loops have been transformed? */
3065 update_ssa (TODO_update_ssa);
3067 if (vect_print_dump_info (REPORT_DETAILS))
3068 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
3070 destroy_bb_vec_info (bb_vinfo);