1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb)
50 gimple_stmt_iterator si;
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node)
77 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78 vect_free_slp_tree ((slp_tree)child);
80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
89 /* Free the memory allocated for the SLP instance. */
92 vect_free_slp_instance (slp_instance instance)
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
100 /* Create an SLP node for SCALAR_STMTS. */
103 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
105 slp_tree node = XNEW (struct _slp_tree);
106 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
109 if (is_gimple_call (stmt))
110 nops = gimple_call_num_args (stmt);
111 else if (is_gimple_assign (stmt))
113 nops = gimple_num_ops (stmt) - 1;
114 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121 SLP_TREE_VEC_STMTS (node) = NULL;
122 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
123 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
124 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
130 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
132 static VEC (slp_oprnd_info, heap) *
133 vect_create_oprnd_info (int nops, int group_size)
136 slp_oprnd_info oprnd_info;
137 VEC (slp_oprnd_info, heap) *oprnds_info;
139 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
140 for (i = 0; i < nops; i++)
142 oprnd_info = XNEW (struct _slp_oprnd_info);
143 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
144 oprnd_info->first_dt = vect_uninitialized_def;
145 oprnd_info->first_def_type = NULL_TREE;
146 oprnd_info->first_const_oprnd = NULL_TREE;
147 oprnd_info->first_pattern = false;
148 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
155 /* Free operands info. Free def-stmts in FREE_DEF_STMTS is true.
156 (FREE_DEF_STMTS is true when the SLP analysis fails, and false when it
157 succeds. In the later case we don't need the operands info that we used to
158 check isomorphism of the stmts, but we still need the def-stmts - they are
159 used as scalar stmts in SLP nodes. */
161 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info,
165 slp_oprnd_info oprnd_info;
168 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
169 VEC_free (gimple, heap, oprnd_info->def_stmts);
171 VEC_free (slp_oprnd_info, heap, *oprnds_info);
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
181 slp_tree slp_node, gimple stmt,
182 int ncopies_for_cost, bool first,
183 VEC (slp_oprnd_info, heap) **oprnds_info)
186 unsigned int i, number_of_oprnds;
187 tree def, def_op0 = NULL_TREE;
189 enum vect_def_type dt = vect_uninitialized_def;
190 enum vect_def_type dt_op0 = vect_uninitialized_def;
191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
192 tree lhs = gimple_get_lhs (stmt);
193 struct loop *loop = NULL;
194 enum tree_code rhs_code;
195 bool different_types = false;
196 bool pattern = false;
197 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
199 tree compare_rhs = NULL_TREE;
202 loop = LOOP_VINFO_LOOP (loop_vinfo);
204 if (is_gimple_call (stmt))
206 number_of_oprnds = gimple_call_num_args (stmt);
209 else if (is_gimple_assign (stmt))
211 number_of_oprnds = gimple_num_ops (stmt) - 1;
212 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
218 for (i = 0; i < number_of_oprnds; i++)
223 compare_rhs = NULL_TREE;
226 oprnd = gimple_op (stmt, op_idx++);
228 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
230 if (COMPARISON_CLASS_P (oprnd))
232 compare_rhs = TREE_OPERAND (oprnd, 1);
233 oprnd = TREE_OPERAND (oprnd, 0);
236 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
238 || (!def_stmt && dt != vect_constant_def))
240 if (vect_print_dump_info (REPORT_SLP))
242 fprintf (vect_dump, "Build SLP failed: can't find def for ");
243 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
249 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250 from the pattern. Check that all the stmts of the node are in the
252 if (loop && def_stmt && gimple_bb (def_stmt)
253 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
254 && vinfo_for_stmt (def_stmt)
255 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
256 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
257 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
260 if (!first && !oprnd_info->first_pattern)
262 if (vect_print_dump_info (REPORT_DETAILS))
264 fprintf (vect_dump, "Build SLP failed: some of the stmts"
265 " are in a pattern, and others are not ");
266 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
272 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
273 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
275 if (dt == vect_unknown_def_type)
277 if (vect_print_dump_info (REPORT_DETAILS))
278 fprintf (vect_dump, "Unsupported pattern.");
282 switch (gimple_code (def_stmt))
285 def = gimple_phi_result (def_stmt);
289 def = gimple_assign_lhs (def_stmt);
293 if (vect_print_dump_info (REPORT_DETAILS))
294 fprintf (vect_dump, "unsupported defining stmt: ");
301 oprnd_info->first_dt = dt;
302 oprnd_info->first_pattern = pattern;
305 oprnd_info->first_def_type = TREE_TYPE (def);
306 oprnd_info->first_const_oprnd = NULL_TREE;
310 oprnd_info->first_def_type = NULL_TREE;
311 oprnd_info->first_const_oprnd = oprnd;
318 /* Analyze costs (for the first stmt of the group only). */
319 if (REFERENCE_CLASS_P (lhs))
321 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
324 /* Not memory operation (we don't call this function for
326 vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
332 /* Not first stmt of the group, check that the def-stmt/s match
333 the def-stmt/s of the first stmt. Allow different definition
334 types for reduction chains: the first stmt must be a
335 vect_reduction_def (a phi node), and the rest
336 vect_internal_def. */
337 if (((oprnd_info->first_dt != dt
338 && !(oprnd_info->first_dt == vect_reduction_def
339 && dt == vect_internal_def))
340 || (oprnd_info->first_def_type != NULL_TREE
342 && !types_compatible_p (oprnd_info->first_def_type,
345 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
349 if (number_of_oprnds != 2)
351 if (vect_print_dump_info (REPORT_SLP))
352 fprintf (vect_dump, "Build SLP failed: different types ");
357 /* Try to swap operands in case of binary operation. */
359 different_types = true;
362 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
363 if (is_gimple_assign (stmt)
364 && (rhs_code = gimple_assign_rhs_code (stmt))
365 && TREE_CODE_CLASS (rhs_code) == tcc_binary
366 && commutative_tree_code (rhs_code)
367 && oprnd0_info->first_dt == dt
368 && oprnd_info->first_dt == dt_op0
370 && !(oprnd0_info->first_def_type
371 && !types_compatible_p (oprnd0_info->first_def_type,
373 && !(oprnd_info->first_def_type
374 && !types_compatible_p (oprnd_info->first_def_type,
375 TREE_TYPE (def_op0))))
377 if (vect_print_dump_info (REPORT_SLP))
379 fprintf (vect_dump, "Swapping operands of ");
380 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
383 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
384 gimple_assign_rhs2_ptr (stmt));
388 if (vect_print_dump_info (REPORT_SLP))
389 fprintf (vect_dump, "Build SLP failed: different types ");
397 /* Check the types of the definitions. */
400 case vect_constant_def:
401 case vect_external_def:
402 case vect_reduction_def:
405 case vect_internal_def:
408 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
409 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
411 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
413 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
416 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
421 /* FORNOW: Not supported. */
422 if (vect_print_dump_info (REPORT_SLP))
424 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
425 print_generic_expr (vect_dump, def, TDF_SLIM);
436 /* Recursively build an SLP tree starting from NODE.
437 Fail (and return FALSE) if def-stmts are not isomorphic, require data
438 permutation or are of unsupported types of operation. Otherwise, return
442 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
443 slp_tree *node, unsigned int group_size,
444 int *inside_cost, int *outside_cost,
445 int ncopies_for_cost, unsigned int *max_nunits,
446 VEC (int, heap) **load_permutation,
447 VEC (slp_tree, heap) **loads,
448 unsigned int vectorization_factor, bool *loads_permuted)
451 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
452 gimple stmt = VEC_index (gimple, stmts, 0);
453 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
454 enum tree_code first_cond_code = ERROR_MARK;
456 bool stop_recursion = false, need_same_oprnds = false;
457 tree vectype, scalar_type, first_op1 = NULL_TREE;
458 unsigned int ncopies;
461 enum machine_mode optab_op2_mode;
462 enum machine_mode vec_mode;
463 struct data_reference *first_dr;
465 bool permutation = false;
466 unsigned int load_place;
467 gimple first_load, prev_first_load = NULL;
468 VEC (slp_oprnd_info, heap) *oprnds_info;
470 slp_oprnd_info oprnd_info;
473 if (is_gimple_call (stmt))
474 nops = gimple_call_num_args (stmt);
475 else if (is_gimple_assign (stmt))
477 nops = gimple_num_ops (stmt) - 1;
478 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
484 oprnds_info = vect_create_oprnd_info (nops, group_size);
486 /* For every stmt in NODE find its def stmt/s. */
487 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
489 if (vect_print_dump_info (REPORT_SLP))
491 fprintf (vect_dump, "Build SLP for ");
492 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
495 /* Fail to vectorize statements marked as unvectorizable. */
496 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
498 if (vect_print_dump_info (REPORT_SLP))
501 "Build SLP failed: unvectorizable statement ");
502 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
505 vect_free_oprnd_info (&oprnds_info, true);
509 lhs = gimple_get_lhs (stmt);
510 if (lhs == NULL_TREE)
512 if (vect_print_dump_info (REPORT_SLP))
515 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
516 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
519 vect_free_oprnd_info (&oprnds_info, true);
523 if (is_gimple_assign (stmt)
524 && gimple_assign_rhs_code (stmt) == COND_EXPR
525 && (cond = gimple_assign_rhs1 (stmt))
526 && !COMPARISON_CLASS_P (cond))
528 if (vect_print_dump_info (REPORT_SLP))
531 "Build SLP failed: condition is not comparison ");
532 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
535 vect_free_oprnd_info (&oprnds_info, true);
539 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
540 vectype = get_vectype_for_scalar_type (scalar_type);
543 if (vect_print_dump_info (REPORT_SLP))
545 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
546 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
549 vect_free_oprnd_info (&oprnds_info, true);
553 /* In case of multiple types we need to detect the smallest type. */
554 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
556 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
558 vectorization_factor = *max_nunits;
561 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
563 if (is_gimple_call (stmt))
565 rhs_code = CALL_EXPR;
566 if (gimple_call_internal_p (stmt)
567 || gimple_call_tail_p (stmt)
568 || gimple_call_noreturn_p (stmt)
569 || !gimple_call_nothrow_p (stmt)
570 || gimple_call_chain (stmt))
572 if (vect_print_dump_info (REPORT_SLP))
575 "Build SLP failed: unsupported call type ");
576 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
579 vect_free_oprnd_info (&oprnds_info, true);
584 rhs_code = gimple_assign_rhs_code (stmt);
586 /* Check the operation. */
589 first_stmt_code = rhs_code;
591 /* Shift arguments should be equal in all the packed stmts for a
592 vector shift with scalar shift operand. */
593 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
594 || rhs_code == LROTATE_EXPR
595 || rhs_code == RROTATE_EXPR)
597 vec_mode = TYPE_MODE (vectype);
599 /* First see if we have a vector/vector shift. */
600 optab = optab_for_tree_code (rhs_code, vectype,
604 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
606 /* No vector/vector shift, try for a vector/scalar shift. */
607 optab = optab_for_tree_code (rhs_code, vectype,
612 if (vect_print_dump_info (REPORT_SLP))
613 fprintf (vect_dump, "Build SLP failed: no optab.");
614 vect_free_oprnd_info (&oprnds_info, true);
617 icode = (int) optab_handler (optab, vec_mode);
618 if (icode == CODE_FOR_nothing)
620 if (vect_print_dump_info (REPORT_SLP))
621 fprintf (vect_dump, "Build SLP failed: "
622 "op not supported by target.");
623 vect_free_oprnd_info (&oprnds_info, true);
626 optab_op2_mode = insn_data[icode].operand[2].mode;
627 if (!VECTOR_MODE_P (optab_op2_mode))
629 need_same_oprnds = true;
630 first_op1 = gimple_assign_rhs2 (stmt);
634 else if (rhs_code == WIDEN_LSHIFT_EXPR)
636 need_same_oprnds = true;
637 first_op1 = gimple_assign_rhs2 (stmt);
642 if (first_stmt_code != rhs_code
643 && (first_stmt_code != IMAGPART_EXPR
644 || rhs_code != REALPART_EXPR)
645 && (first_stmt_code != REALPART_EXPR
646 || rhs_code != IMAGPART_EXPR)
647 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
648 && (first_stmt_code == ARRAY_REF
649 || first_stmt_code == INDIRECT_REF
650 || first_stmt_code == COMPONENT_REF
651 || first_stmt_code == MEM_REF)))
653 if (vect_print_dump_info (REPORT_SLP))
656 "Build SLP failed: different operation in stmt ");
657 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
660 vect_free_oprnd_info (&oprnds_info, true);
665 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
667 if (vect_print_dump_info (REPORT_SLP))
670 "Build SLP failed: different shift arguments in ");
671 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
674 vect_free_oprnd_info (&oprnds_info, true);
678 if (rhs_code == CALL_EXPR)
680 gimple first_stmt = VEC_index (gimple, stmts, 0);
681 if (gimple_call_num_args (stmt) != nops
682 || !operand_equal_p (gimple_call_fn (first_stmt),
683 gimple_call_fn (stmt), 0)
684 || gimple_call_fntype (first_stmt)
685 != gimple_call_fntype (stmt))
687 if (vect_print_dump_info (REPORT_SLP))
690 "Build SLP failed: different calls in ");
691 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
694 vect_free_oprnd_info (&oprnds_info, true);
700 /* Strided store or load. */
701 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
703 if (REFERENCE_CLASS_P (lhs))
706 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
707 stmt, ncopies_for_cost,
708 (i == 0), &oprnds_info))
710 vect_free_oprnd_info (&oprnds_info, true);
717 /* FORNOW: Check that there is no gap between the loads. */
718 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
719 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
720 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
721 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
723 if (vect_print_dump_info (REPORT_SLP))
725 fprintf (vect_dump, "Build SLP failed: strided "
727 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
730 vect_free_oprnd_info (&oprnds_info, true);
734 /* Check that the size of interleaved loads group is not
735 greater than the SLP group size. */
737 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
739 if (vect_print_dump_info (REPORT_SLP))
741 fprintf (vect_dump, "Build SLP failed: the number of "
742 "interleaved loads is greater than"
743 " the SLP group size ");
744 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
747 vect_free_oprnd_info (&oprnds_info, true);
751 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
754 /* Check that there are no loads from different interleaving
755 chains in the same node. The only exception is complex
757 if (prev_first_load != first_load
758 && rhs_code != REALPART_EXPR
759 && rhs_code != IMAGPART_EXPR)
761 if (vect_print_dump_info (REPORT_SLP))
763 fprintf (vect_dump, "Build SLP failed: different "
764 "interleaving chains in one node ");
765 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
768 vect_free_oprnd_info (&oprnds_info, true);
773 prev_first_load = first_load;
775 if (first_load == stmt)
777 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
778 if (vect_supportable_dr_alignment (first_dr, false)
779 == dr_unaligned_unsupported)
781 if (vect_print_dump_info (REPORT_SLP))
783 fprintf (vect_dump, "Build SLP failed: unsupported "
785 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
788 vect_free_oprnd_info (&oprnds_info, true);
792 /* Analyze costs (for the first stmt in the group). */
793 vect_model_load_cost (vinfo_for_stmt (stmt),
794 ncopies_for_cost, false, *node);
797 /* Store the place of this load in the interleaving chain. In
798 case that permutation is needed we later decide if a specific
799 permutation is supported. */
800 load_place = vect_get_place_in_interleaving_chain (stmt,
805 VEC_safe_push (int, heap, *load_permutation, load_place);
807 /* We stop the tree when we reach a group of loads. */
808 stop_recursion = true;
811 } /* Strided access. */
814 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
816 /* Not strided load. */
817 if (vect_print_dump_info (REPORT_SLP))
819 fprintf (vect_dump, "Build SLP failed: not strided load ");
820 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
823 /* FORNOW: Not strided loads are not supported. */
824 vect_free_oprnd_info (&oprnds_info, true);
828 /* Not memory operation. */
829 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
830 && TREE_CODE_CLASS (rhs_code) != tcc_unary
831 && rhs_code != COND_EXPR
832 && rhs_code != CALL_EXPR)
834 if (vect_print_dump_info (REPORT_SLP))
836 fprintf (vect_dump, "Build SLP failed: operation");
837 fprintf (vect_dump, " unsupported ");
838 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
841 vect_free_oprnd_info (&oprnds_info, true);
845 if (rhs_code == COND_EXPR)
847 tree cond_expr = gimple_assign_rhs1 (stmt);
850 first_cond_code = TREE_CODE (cond_expr);
851 else if (first_cond_code != TREE_CODE (cond_expr))
853 if (vect_print_dump_info (REPORT_SLP))
855 fprintf (vect_dump, "Build SLP failed: different"
857 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
860 vect_free_oprnd_info (&oprnds_info, true);
865 /* Find the def-stmts. */
866 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
867 ncopies_for_cost, (i == 0),
870 vect_free_oprnd_info (&oprnds_info, true);
876 /* Add the costs of the node to the overall instance costs. */
877 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
878 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
880 /* Strided loads were reached - stop the recursion. */
883 VEC_safe_push (slp_tree, heap, *loads, *node);
887 *loads_permuted = true;
889 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
894 /* We don't check here complex numbers chains, so we set
895 LOADS_PERMUTED for further check in
896 vect_supported_load_permutation_p. */
897 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
898 *loads_permuted = true;
904 /* Create SLP_TREE nodes for the definition node/s. */
905 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
909 if (oprnd_info->first_dt != vect_internal_def)
912 child = vect_create_new_slp_node (oprnd_info->def_stmts);
914 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
915 inside_cost, outside_cost, ncopies_for_cost,
916 max_nunits, load_permutation, loads,
917 vectorization_factor, loads_permuted))
920 vect_free_oprnd_info (&oprnds_info, true);
924 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
927 vect_free_oprnd_info (&oprnds_info, false);
933 vect_print_slp_tree (slp_tree node)
942 fprintf (vect_dump, "node ");
943 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
945 fprintf (vect_dump, "\n\tstmt %d ", i);
946 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
948 fprintf (vect_dump, "\n");
950 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
951 vect_print_slp_tree ((slp_tree) child);
955 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
956 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
957 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
958 stmts in NODE are to be marked. */
961 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
970 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
972 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
974 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
975 vect_mark_slp_stmts ((slp_tree) child, mark, j);
979 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
982 vect_mark_slp_stmts_relevant (slp_tree node)
986 stmt_vec_info stmt_info;
992 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
994 stmt_info = vinfo_for_stmt (stmt);
995 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
996 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
997 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1000 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1001 vect_mark_slp_stmts_relevant ((slp_tree) child);
1005 /* Check if the permutation required by the SLP INSTANCE is supported.
1006 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1009 vect_supported_slp_permutation_p (slp_instance instance)
1011 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1012 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1013 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1014 VEC (slp_tree, heap) *sorted_loads = NULL;
1016 slp_tree *tmp_loads = NULL;
1017 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1020 /* FORNOW: The only supported loads permutation is loads from the same
1021 location in all the loads in the node, when the data-refs in
1022 nodes of LOADS constitute an interleaving chain.
1023 Sort the nodes according to the order of accesses in the chain. */
1024 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1026 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1027 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1028 i += group_size, j++)
1030 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1031 /* Check that the loads are all in the same interleaving chain. */
1032 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1034 if (vect_print_dump_info (REPORT_DETAILS))
1036 fprintf (vect_dump, "Build SLP failed: unsupported data "
1038 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1045 tmp_loads[index] = load;
1048 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1049 for (i = 0; i < group_size; i++)
1050 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1052 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1053 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1056 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1057 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1065 /* Rearrange the statements of NODE according to PERMUTATION. */
1068 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1069 VEC (int, heap) *permutation)
1072 VEC (gimple, heap) *tmp_stmts;
1073 unsigned int index, i;
1079 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1080 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1082 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1083 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1085 for (i = 0; i < group_size; i++)
1086 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1088 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1090 index = VEC_index (int, permutation, i);
1091 VEC_replace (gimple, tmp_stmts, index, stmt);
1094 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1095 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1099 /* Check if the required load permutation is supported.
1100 LOAD_PERMUTATION contains a list of indices of the loads.
1101 In SLP this permutation is relative to the order of strided stores that are
1102 the base of the SLP instance. */
1105 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1106 VEC (int, heap) *load_permutation)
1108 int i = 0, j, prev = -1, next, k, number_of_groups;
1109 bool supported, bad_permutation = false;
1111 slp_tree node, other_complex_node;
1112 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1113 unsigned complex_numbers = 0;
1114 struct data_reference *dr;
1115 bb_vec_info bb_vinfo;
1117 /* FORNOW: permutations are only supported in SLP. */
1121 if (vect_print_dump_info (REPORT_SLP))
1123 fprintf (vect_dump, "Load permutation ");
1124 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1125 fprintf (vect_dump, "%d ", next);
1128 /* In case of reduction every load permutation is allowed, since the order
1129 of the reduction statements is not important (as opposed to the case of
1130 strided stores). The only condition we need to check is that all the
1131 load nodes are of the same size and have the same permutation (and then
1132 rearrange all the nodes of the SLP instance according to this
1135 /* Check that all the load nodes are of the same size. */
1136 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1138 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1139 != (unsigned) group_size)
1142 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1143 if (is_gimple_assign (stmt)
1144 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1145 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1149 /* Complex operands can be swapped as following:
1150 real_c = real_b + real_a;
1151 imag_c = imag_a + imag_b;
1152 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1153 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1154 chains are mixed, they match the above pattern. */
1155 if (complex_numbers)
1157 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1159 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1165 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1167 if (complex_numbers != 2)
1175 other_complex_node = VEC_index (slp_tree,
1176 SLP_INSTANCE_LOADS (slp_instn), k);
1177 other_node_first = VEC_index (gimple,
1178 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1180 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1181 != other_node_first)
1189 /* We checked that this case ok, so there is no need to proceed with
1190 permutation tests. */
1191 if (complex_numbers == 2)
1193 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1194 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1198 node = SLP_INSTANCE_TREE (slp_instn);
1199 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1200 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1201 instance, not all the loads belong to the same node or interleaving
1202 group. Hence, we need to divide them into groups according to
1204 number_of_groups = VEC_length (int, load_permutation) / group_size;
1206 /* Reduction (there are no data-refs in the root).
1207 In reduction chain the order of the loads is important. */
1208 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1209 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1211 int first_group_load_index;
1213 /* Compare all the permutation sequences to the first one. */
1214 for (i = 1; i < number_of_groups; i++)
1217 for (j = i * group_size; j < i * group_size + group_size; j++)
1219 next = VEC_index (int, load_permutation, j);
1220 first_group_load_index = VEC_index (int, load_permutation, k);
1222 if (next != first_group_load_index)
1224 bad_permutation = true;
1231 if (bad_permutation)
1235 if (!bad_permutation)
1237 /* Check that the loads in the first sequence are different and there
1238 are no gaps between them. */
1239 load_index = sbitmap_alloc (group_size);
1240 sbitmap_zero (load_index);
1241 for (k = 0; k < group_size; k++)
1243 first_group_load_index = VEC_index (int, load_permutation, k);
1244 if (TEST_BIT (load_index, first_group_load_index))
1246 bad_permutation = true;
1250 SET_BIT (load_index, first_group_load_index);
1253 if (!bad_permutation)
1254 for (k = 0; k < group_size; k++)
1255 if (!TEST_BIT (load_index, k))
1257 bad_permutation = true;
1261 sbitmap_free (load_index);
1264 if (!bad_permutation)
1266 /* This permutation is valid for reduction. Since the order of the
1267 statements in the nodes is not important unless they are memory
1268 accesses, we can rearrange the statements in all the nodes
1269 according to the order of the loads. */
1270 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1272 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1277 /* In basic block vectorization we allow any subchain of an interleaving
1279 FORNOW: not supported in loop SLP because of realignment compications. */
1280 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1281 bad_permutation = false;
1282 /* Check that for every node in the instance teh loads form a subchain. */
1285 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1289 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1292 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1294 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1296 bad_permutation = true;
1300 if (j != 0 && next_load != load)
1302 bad_permutation = true;
1306 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1309 if (bad_permutation)
1313 /* Check that the alignment of the first load in every subchain, i.e.,
1314 the first statement in every load node, is supported. */
1315 if (!bad_permutation)
1317 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1319 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1321 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1323 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1324 if (vect_supportable_dr_alignment (dr, false)
1325 == dr_unaligned_unsupported)
1327 if (vect_print_dump_info (REPORT_SLP))
1329 fprintf (vect_dump, "unsupported unaligned load ");
1330 print_gimple_stmt (vect_dump, first_load, 0,
1333 bad_permutation = true;
1339 if (!bad_permutation)
1341 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1347 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1348 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1349 well (unless it's reduction). */
1350 if (VEC_length (int, load_permutation)
1351 != (unsigned int) (group_size * group_size))
1355 load_index = sbitmap_alloc (group_size);
1356 sbitmap_zero (load_index);
1357 for (j = 0; j < group_size; j++)
1359 for (i = j * group_size, k = 0;
1360 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1363 if (i != j * group_size && next != prev)
1372 if (TEST_BIT (load_index, prev))
1378 SET_BIT (load_index, prev);
1381 for (j = 0; j < group_size; j++)
1382 if (!TEST_BIT (load_index, j))
1385 sbitmap_free (load_index);
1387 if (supported && i == group_size * group_size
1388 && vect_supported_slp_permutation_p (slp_instn))
1395 /* Find the first load in the loop that belongs to INSTANCE.
1396 When loads are in several SLP nodes, there can be a case in which the first
1397 load does not appear in the first SLP node to be transformed, causing
1398 incorrect order of statements. Since we generate all the loads together,
1399 they must be inserted before the first load of the SLP instance and not
1400 before the first load of the first node of the instance. */
1403 vect_find_first_load_in_slp_instance (slp_instance instance)
1407 gimple first_load = NULL, load;
1409 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1410 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1411 first_load = get_earlier_stmt (load, first_load);
1417 /* Find the last store in SLP INSTANCE. */
1420 vect_find_last_store_in_slp_instance (slp_instance instance)
1424 gimple last_store = NULL, store;
1426 node = SLP_INSTANCE_TREE (instance);
1428 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1430 last_store = get_later_stmt (store, last_store);
1436 /* Analyze an SLP instance starting from a group of strided stores. Call
1437 vect_build_slp_tree to build a tree of packed stmts if possible.
1438 Return FALSE if it's impossible to SLP any stmt in the loop. */
1441 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1444 slp_instance new_instance;
1446 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1447 unsigned int unrolling_factor = 1, nunits;
1448 tree vectype, scalar_type = NULL_TREE;
1450 unsigned int vectorization_factor = 0;
1451 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1452 unsigned int max_nunits = 0;
1453 VEC (int, heap) *load_permutation;
1454 VEC (slp_tree, heap) *loads;
1455 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1456 bool loads_permuted = false;
1457 VEC (gimple, heap) *scalar_stmts;
1459 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1463 scalar_type = TREE_TYPE (DR_REF (dr));
1464 vectype = get_vectype_for_scalar_type (scalar_type);
1468 gcc_assert (loop_vinfo);
1469 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1472 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1476 gcc_assert (loop_vinfo);
1477 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1478 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1483 if (vect_print_dump_info (REPORT_SLP))
1485 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1486 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1492 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1494 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1496 vectorization_factor = nunits;
1498 /* Calculate the unrolling factor. */
1499 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1500 if (unrolling_factor != 1 && !loop_vinfo)
1502 if (vect_print_dump_info (REPORT_SLP))
1503 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1509 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1510 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1512 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1514 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1517 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1518 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1519 VEC_safe_push (gimple, heap, scalar_stmts,
1520 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1522 VEC_safe_push (gimple, heap, scalar_stmts, next);
1523 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1528 /* Collect reduction statements. */
1529 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1530 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1531 VEC_safe_push (gimple, heap, scalar_stmts, next);
1534 node = vect_create_new_slp_node (scalar_stmts);
1536 /* Calculate the number of vector stmts to create based on the unrolling
1537 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1538 GROUP_SIZE / NUNITS otherwise. */
1539 ncopies_for_cost = unrolling_factor * group_size / nunits;
1541 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1542 loads = VEC_alloc (slp_tree, heap, group_size);
1544 /* Build the tree for the SLP instance. */
1545 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1546 &inside_cost, &outside_cost, ncopies_for_cost,
1547 &max_nunits, &load_permutation, &loads,
1548 vectorization_factor, &loads_permuted))
1550 /* Calculate the unrolling factor based on the smallest type. */
1551 if (max_nunits > nunits)
1552 unrolling_factor = least_common_multiple (max_nunits, group_size)
1555 if (unrolling_factor != 1 && !loop_vinfo)
1557 if (vect_print_dump_info (REPORT_SLP))
1558 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1563 /* Create a new SLP instance. */
1564 new_instance = XNEW (struct _slp_instance);
1565 SLP_INSTANCE_TREE (new_instance) = node;
1566 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1567 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1568 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1569 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1570 SLP_INSTANCE_LOADS (new_instance) = loads;
1571 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1572 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1576 if (!vect_supported_load_permutation_p (new_instance, group_size,
1579 if (vect_print_dump_info (REPORT_SLP))
1581 fprintf (vect_dump, "Build SLP failed: unsupported load "
1583 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1586 vect_free_slp_instance (new_instance);
1590 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1591 = vect_find_first_load_in_slp_instance (new_instance);
1594 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1597 VEC_safe_push (slp_instance, heap,
1598 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1601 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1604 if (vect_print_dump_info (REPORT_SLP))
1605 vect_print_slp_tree (node);
1610 /* Failed to SLP. */
1611 /* Free the allocated memory. */
1612 vect_free_slp_tree (node);
1613 VEC_free (int, heap, load_permutation);
1614 VEC_free (slp_tree, heap, loads);
1620 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1621 trees of packed scalar stmts if SLP is possible. */
1624 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1627 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1628 gimple first_element;
1631 if (vect_print_dump_info (REPORT_SLP))
1632 fprintf (vect_dump, "=== vect_analyze_slp ===");
1636 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1637 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1638 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1641 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1643 /* Find SLP sequences starting from groups of strided stores. */
1644 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1645 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1648 if (bb_vinfo && !ok)
1650 if (vect_print_dump_info (REPORT_SLP))
1651 fprintf (vect_dump, "Failed to SLP the basic block.");
1657 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1659 /* Find SLP sequences starting from reduction chains. */
1660 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1661 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1666 /* Don't try to vectorize SLP reductions if reduction chain was
1671 /* Find SLP sequences starting from groups of reductions. */
1672 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1673 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1674 VEC_index (gimple, reductions, 0)))
1681 /* For each possible SLP instance decide whether to SLP it and calculate overall
1682 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1683 least one instance. */
1686 vect_make_slp_decision (loop_vec_info loop_vinfo)
1688 unsigned int i, unrolling_factor = 1;
1689 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1690 slp_instance instance;
1691 int decided_to_slp = 0;
1693 if (vect_print_dump_info (REPORT_SLP))
1694 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1696 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1698 /* FORNOW: SLP if you can. */
1699 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1700 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1702 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1703 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1704 loop-based vectorization. Such stmts will be marked as HYBRID. */
1705 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1709 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1711 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1712 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1713 decided_to_slp, unrolling_factor);
1715 return (decided_to_slp > 0);
1719 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1720 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1723 vect_detect_hybrid_slp_stmts (slp_tree node)
1727 imm_use_iterator imm_iter;
1729 stmt_vec_info stmt_vinfo;
1735 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1736 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1737 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1738 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1739 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1740 && !STMT_SLP_TYPE (stmt_vinfo)
1741 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1742 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1743 && !(gimple_code (use_stmt) == GIMPLE_PHI
1744 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1745 == vect_reduction_def))
1746 vect_mark_slp_stmts (node, hybrid, i);
1748 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1749 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1753 /* Find stmts that must be both vectorized and SLPed. */
1756 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1759 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1760 slp_instance instance;
1762 if (vect_print_dump_info (REPORT_SLP))
1763 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1765 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1766 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1770 /* Create and initialize a new bb_vec_info struct for BB, as well as
1771 stmt_vec_info structs for all the stmts in it. */
1774 new_bb_vec_info (basic_block bb)
1776 bb_vec_info res = NULL;
1777 gimple_stmt_iterator gsi;
1779 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1780 BB_VINFO_BB (res) = bb;
1782 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1784 gimple stmt = gsi_stmt (gsi);
1785 gimple_set_uid (stmt, 0);
1786 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1789 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1790 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1797 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1798 stmts in the basic block. */
1801 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1804 gimple_stmt_iterator si;
1809 bb = BB_VINFO_BB (bb_vinfo);
1811 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1813 gimple stmt = gsi_stmt (si);
1814 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1817 /* Free stmt_vec_info. */
1818 free_stmt_vec_info (stmt);
1821 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1822 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1823 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1824 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1830 /* Analyze statements contained in SLP tree node after recursively analyzing
1831 the subtree. Return TRUE if the operations are supported. */
1834 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1844 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1845 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1848 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1850 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1851 gcc_assert (stmt_info);
1852 gcc_assert (PURE_SLP_STMT (stmt_info));
1854 if (!vect_analyze_stmt (stmt, &dummy, node))
1862 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1863 operations are supported. */
1866 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1868 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1869 slp_instance instance;
1872 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1874 if (!vect_slp_analyze_node_operations (bb_vinfo,
1875 SLP_INSTANCE_TREE (instance)))
1877 vect_free_slp_instance (instance);
1878 VEC_ordered_remove (slp_instance, slp_instances, i);
1884 if (!VEC_length (slp_instance, slp_instances))
1890 /* Check if vectorization of the basic block is profitable. */
1893 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1895 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1896 slp_instance instance;
1898 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1899 unsigned int stmt_cost;
1901 gimple_stmt_iterator si;
1902 basic_block bb = BB_VINFO_BB (bb_vinfo);
1903 stmt_vec_info stmt_info = NULL;
1904 tree dummy_type = NULL;
1907 /* Calculate vector costs. */
1908 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1910 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1911 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1914 /* Calculate scalar cost. */
1915 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1917 stmt = gsi_stmt (si);
1918 stmt_info = vinfo_for_stmt (stmt);
1920 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1921 || !PURE_SLP_STMT (stmt_info))
1924 if (STMT_VINFO_DATA_REF (stmt_info))
1926 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1927 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1928 (scalar_load, dummy_type, dummy);
1930 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1931 (scalar_store, dummy_type, dummy);
1934 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1935 (scalar_stmt, dummy_type, dummy);
1937 scalar_cost += stmt_cost;
1940 if (vect_print_dump_info (REPORT_COST))
1942 fprintf (vect_dump, "Cost model analysis: \n");
1943 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1945 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1947 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1950 /* Vectorization is profitable if its cost is less than the cost of scalar
1952 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1958 /* Check if the basic block can be vectorized. */
1961 vect_slp_analyze_bb_1 (basic_block bb)
1963 bb_vec_info bb_vinfo;
1964 VEC (ddr_p, heap) *ddrs;
1965 VEC (slp_instance, heap) *slp_instances;
1966 slp_instance instance;
1969 int max_vf = MAX_VECTORIZATION_FACTOR;
1971 bb_vinfo = new_bb_vec_info (bb);
1975 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1977 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1978 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1981 destroy_bb_vec_info (bb_vinfo);
1985 ddrs = BB_VINFO_DDRS (bb_vinfo);
1986 if (!VEC_length (ddr_p, ddrs))
1988 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1989 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1992 destroy_bb_vec_info (bb_vinfo);
1996 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1999 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2000 fprintf (vect_dump, "not vectorized: unhandled data dependence "
2001 "in basic block.\n");
2003 destroy_bb_vec_info (bb_vinfo);
2007 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2009 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2010 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2013 destroy_bb_vec_info (bb_vinfo);
2017 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2019 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2020 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2023 destroy_bb_vec_info (bb_vinfo);
2027 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2029 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2030 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2033 destroy_bb_vec_info (bb_vinfo);
2037 /* Check the SLP opportunities in the basic block, analyze and build SLP
2039 if (!vect_analyze_slp (NULL, bb_vinfo))
2041 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2042 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2043 "in basic block.\n");
2045 destroy_bb_vec_info (bb_vinfo);
2049 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2051 /* Mark all the statements that we want to vectorize as pure SLP and
2053 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2055 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2056 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2059 if (!vect_slp_analyze_operations (bb_vinfo))
2061 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2062 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2064 destroy_bb_vec_info (bb_vinfo);
2068 /* Cost model: check if the vectorization is worthwhile. */
2069 if (flag_vect_cost_model
2070 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2072 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2073 fprintf (vect_dump, "not vectorized: vectorization is not "
2076 destroy_bb_vec_info (bb_vinfo);
2080 if (vect_print_dump_info (REPORT_DETAILS))
2081 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2088 vect_slp_analyze_bb (basic_block bb)
2090 bb_vec_info bb_vinfo;
2092 gimple_stmt_iterator gsi;
2093 unsigned int vector_sizes;
2095 if (vect_print_dump_info (REPORT_DETAILS))
2096 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2098 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2100 gimple stmt = gsi_stmt (gsi);
2101 if (!is_gimple_debug (stmt)
2102 && !gimple_nop_p (stmt)
2103 && gimple_code (stmt) != GIMPLE_LABEL)
2107 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2109 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2110 fprintf (vect_dump, "not vectorized: too many instructions in basic "
2116 /* Autodetect first vector size we try. */
2117 current_vector_size = 0;
2118 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2122 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2126 destroy_bb_vec_info (bb_vinfo);
2128 vector_sizes &= ~current_vector_size;
2129 if (vector_sizes == 0
2130 || current_vector_size == 0)
2133 /* Try the next biggest vector size. */
2134 current_vector_size = 1 << floor_log2 (vector_sizes);
2135 if (vect_print_dump_info (REPORT_DETAILS))
2136 fprintf (vect_dump, "***** Re-trying analysis with "
2137 "vector size %d\n", current_vector_size);
2142 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2143 the number of created vector stmts depends on the unrolling factor).
2144 However, the actual number of vector stmts for every SLP node depends on
2145 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2146 should be updated. In this function we assume that the inside costs
2147 calculated in vect_model_xxx_cost are linear in ncopies. */
2150 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2152 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2153 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2154 slp_instance instance;
2156 if (vect_print_dump_info (REPORT_SLP))
2157 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2159 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2160 /* We assume that costs are linear in ncopies. */
2161 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2162 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2166 /* For constant and loop invariant defs of SLP_NODE this function returns
2167 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2168 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2169 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2170 REDUC_INDEX is the index of the reduction operand in the statements, unless
2174 vect_get_constant_vectors (tree op, slp_tree slp_node,
2175 VEC (tree, heap) **vec_oprnds,
2176 unsigned int op_num, unsigned int number_of_vectors,
2179 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2180 gimple stmt = VEC_index (gimple, stmts, 0);
2181 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2185 int j, number_of_places_left_in_vector;
2188 int group_size = VEC_length (gimple, stmts);
2189 unsigned int vec_num, i;
2190 int number_of_copies = 1;
2191 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2192 bool constant_p, is_store;
2193 tree neutral_op = NULL;
2194 enum tree_code code = gimple_expr_code (stmt);
2198 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2199 && reduc_index != -1)
2201 op_num = reduc_index - 1;
2202 op = gimple_op (stmt, reduc_index);
2203 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2204 we need either neutral operands or the original operands. See
2205 get_initial_def_for_reduction() for details. */
2208 case WIDEN_SUM_EXPR:
2214 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2215 neutral_op = build_real (TREE_TYPE (op), dconst0);
2217 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2222 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2223 neutral_op = build_real (TREE_TYPE (op), dconst1);
2225 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2230 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2235 def_stmt = SSA_NAME_DEF_STMT (op);
2236 loop = (gimple_bb (stmt))->loop_father;
2237 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2238 loop_preheader_edge (loop));
2246 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2249 op = gimple_assign_rhs1 (stmt);
2256 if (CONSTANT_CLASS_P (op))
2261 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2262 gcc_assert (vector_type);
2263 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2265 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2266 created vectors. It is greater than 1 if unrolling is performed.
2268 For example, we have two scalar operands, s1 and s2 (e.g., group of
2269 strided accesses of size two), while NUNITS is four (i.e., four scalars
2270 of this type can be packed in a vector). The output vector will contain
2271 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2274 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2275 containing the operands.
2277 For example, NUNITS is four as before, and the group size is 8
2278 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2279 {s5, s6, s7, s8}. */
2281 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2283 number_of_places_left_in_vector = nunits;
2284 for (j = 0; j < number_of_copies; j++)
2286 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2289 op = gimple_assign_rhs1 (stmt);
2295 if (op_num == 0 || op_num == 1)
2297 tree cond = gimple_assign_rhs1 (stmt);
2298 op = TREE_OPERAND (cond, op_num);
2303 op = gimple_assign_rhs2 (stmt);
2305 op = gimple_assign_rhs3 (stmt);
2310 op = gimple_call_arg (stmt, op_num);
2314 op = gimple_op (stmt, op_num + 1);
2318 if (reduc_index != -1)
2320 loop = (gimple_bb (stmt))->loop_father;
2321 def_stmt = SSA_NAME_DEF_STMT (op);
2325 /* Get the def before the loop. In reduction chain we have only
2326 one initial value. */
2327 if ((j != (number_of_copies - 1)
2328 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2333 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2334 loop_preheader_edge (loop));
2337 /* Create 'vect_ = {op0,op1,...,opn}'. */
2338 t = tree_cons (NULL_TREE, op, t);
2340 number_of_places_left_in_vector--;
2342 if (number_of_places_left_in_vector == 0)
2344 number_of_places_left_in_vector = nunits;
2347 vec_cst = build_vector (vector_type, t);
2349 vec_cst = build_constructor_from_list (vector_type, t);
2350 VEC_quick_push (tree, voprnds,
2351 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2357 /* Since the vectors are created in the reverse order, we should invert
2359 vec_num = VEC_length (tree, voprnds);
2360 for (j = vec_num - 1; j >= 0; j--)
2362 vop = VEC_index (tree, voprnds, j);
2363 VEC_quick_push (tree, *vec_oprnds, vop);
2366 VEC_free (tree, heap, voprnds);
2368 /* In case that VF is greater than the unrolling factor needed for the SLP
2369 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2370 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2371 to replicate the vectors. */
2372 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2374 tree neutral_vec = NULL;
2379 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2381 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2385 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2386 VEC_quick_push (tree, *vec_oprnds, vop);
2392 /* Get vectorized definitions from SLP_NODE that contains corresponding
2393 vectorized def-stmts. */
2396 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2399 gimple vec_def_stmt;
2402 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2404 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2406 gcc_assert (vec_def_stmt);
2407 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2408 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2413 /* Get vectorized definitions for SLP_NODE.
2414 If the scalar definitions are loop invariants or constants, collect them and
2415 call vect_get_constant_vectors() to create vector stmts.
2416 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2417 must be stored in the corresponding child of SLP_NODE, and we call
2418 vect_get_slp_vect_defs () to retrieve them. */
2421 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2422 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2424 gimple first_stmt, first_def;
2425 int number_of_vects = 0, i;
2426 unsigned int child_index = 0;
2427 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2428 slp_tree child = NULL;
2429 VEC (tree, heap) *vec_defs;
2430 tree oprnd, def_lhs;
2431 bool vectorized_defs;
2433 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2434 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2436 /* For each operand we check if it has vectorized definitions in a child
2437 node or we need to create them (for invariants and constants). We
2438 check if the LHS of the first stmt of the next child matches OPRND.
2439 If it does, we found the correct child. Otherwise, we call
2440 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2441 to check this child node for the next operand. */
2442 vectorized_defs = false;
2443 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2445 child = (slp_tree) VEC_index (slp_void_p,
2446 SLP_TREE_CHILDREN (slp_node),
2448 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2450 /* In the end of a pattern sequence we have a use of the original stmt,
2451 so we need to compare OPRND with the original def. */
2452 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2453 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2454 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2455 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2457 if (is_gimple_call (first_def))
2458 def_lhs = gimple_call_lhs (first_def);
2460 def_lhs = gimple_assign_lhs (first_def);
2462 if (operand_equal_p (oprnd, def_lhs, 0))
2464 /* The number of vector defs is determined by the number of
2465 vector statements in the node from which we get those
2467 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2468 vectorized_defs = true;
2473 if (!vectorized_defs)
2477 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2478 /* Number of vector stmts was calculated according to LHS in
2479 vect_schedule_slp_instance (), fix it by replacing LHS with
2480 RHS, if necessary. See vect_get_smallest_scalar_type () for
2482 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2484 if (rhs_size_unit != lhs_size_unit)
2486 number_of_vects *= rhs_size_unit;
2487 number_of_vects /= lhs_size_unit;
2492 /* Allocate memory for vectorized defs. */
2493 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2495 /* For reduction defs we call vect_get_constant_vectors (), since we are
2496 looking for initial loop invariant values. */
2497 if (vectorized_defs && reduc_index == -1)
2498 /* The defs are already vectorized. */
2499 vect_get_slp_vect_defs (child, &vec_defs);
2501 /* Build vectors from scalar defs. */
2502 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2503 number_of_vects, reduc_index);
2505 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2507 /* For reductions, we only need initial values. */
2508 if (reduc_index != -1)
2514 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2515 building a vector of type MASK_TYPE from it) and two input vectors placed in
2516 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2517 shifting by STRIDE elements of DR_CHAIN for every copy.
2518 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2520 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2521 the created stmts must be inserted. */
2524 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2525 tree mask, int first_vec_indx, int second_vec_indx,
2526 gimple_stmt_iterator *gsi, slp_tree node,
2527 tree vectype, VEC(tree,heap) *dr_chain,
2528 int ncopies, int vect_stmts_counter)
2531 gimple perm_stmt = NULL;
2532 stmt_vec_info next_stmt_info;
2534 tree first_vec, second_vec, data_ref;
2536 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2538 /* Initialize the vect stmts of NODE to properly insert the generated
2540 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2541 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2542 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2544 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2545 for (i = 0; i < ncopies; i++)
2547 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2548 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2550 /* Generate the permute statement. */
2551 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2552 first_vec, second_vec, mask);
2553 data_ref = make_ssa_name (perm_dest, perm_stmt);
2554 gimple_set_lhs (perm_stmt, data_ref);
2555 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2557 /* Store the vector statement in NODE. */
2558 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2559 stride * i + vect_stmts_counter, perm_stmt);
2561 first_vec_indx += stride;
2562 second_vec_indx += stride;
2565 /* Mark the scalar stmt as vectorized. */
2566 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2567 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2571 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2572 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2573 representation. Check that the mask is valid and return FALSE if not.
2574 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2575 the next vector, i.e., the current first vector is not needed. */
2578 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2579 int mask_nunits, bool only_one_vec, int index,
2580 unsigned char *mask, int *current_mask_element,
2581 bool *need_next_vector, int *number_of_mask_fixes,
2582 bool *mask_fixed, bool *needs_first_vector)
2586 /* Convert to target specific representation. */
2587 *current_mask_element = first_mask_element + m;
2588 /* Adjust the value in case it's a mask for second and third vectors. */
2589 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2591 if (*current_mask_element < mask_nunits)
2592 *needs_first_vector = true;
2594 /* We have only one input vector to permute but the mask accesses values in
2595 the next vector as well. */
2596 if (only_one_vec && *current_mask_element >= mask_nunits)
2598 if (vect_print_dump_info (REPORT_DETAILS))
2600 fprintf (vect_dump, "permutation requires at least two vectors ");
2601 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2607 /* The mask requires the next vector. */
2608 if (*current_mask_element >= mask_nunits * 2)
2610 if (*needs_first_vector || *mask_fixed)
2612 /* We either need the first vector too or have already moved to the
2613 next vector. In both cases, this permutation needs three
2615 if (vect_print_dump_info (REPORT_DETAILS))
2617 fprintf (vect_dump, "permutation requires at "
2618 "least three vectors ");
2619 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2625 /* We move to the next vector, dropping the first one and working with
2626 the second and the third - we need to adjust the values of the mask
2628 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2630 for (i = 0; i < index; i++)
2631 mask[i] -= mask_nunits * *number_of_mask_fixes;
2633 (*number_of_mask_fixes)++;
2637 *need_next_vector = *mask_fixed;
2639 /* This was the last element of this mask. Start a new one. */
2640 if (index == mask_nunits - 1)
2642 *number_of_mask_fixes = 1;
2643 *mask_fixed = false;
2644 *needs_first_vector = false;
2651 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2652 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2653 permute statements for SLP_NODE_INSTANCE. */
2655 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2656 gimple_stmt_iterator *gsi, int vf,
2657 slp_instance slp_node_instance, bool analyze_only)
2659 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2660 tree mask_element_type = NULL_TREE, mask_type;
2661 int i, j, k, nunits, vec_index = 0, scalar_index;
2663 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2664 gimple next_scalar_stmt;
2665 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2666 int first_mask_element;
2667 int index, unroll_factor, current_mask_element, ncopies;
2668 unsigned char *mask;
2669 bool only_one_vec = false, need_next_vector = false;
2670 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2671 int number_of_mask_fixes = 1;
2672 bool mask_fixed = false;
2673 bool needs_first_vector = false;
2674 enum machine_mode mode;
2676 mode = TYPE_MODE (vectype);
2678 if (!can_vec_perm_p (mode, false, NULL))
2680 if (vect_print_dump_info (REPORT_DETAILS))
2682 fprintf (vect_dump, "no vect permute for ");
2683 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2688 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2689 same size as the vector element being permuted. */
2691 = lang_hooks.types.type_for_size
2692 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2693 mask_type = get_vectype_for_scalar_type (mask_element_type);
2694 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2695 mask = XALLOCAVEC (unsigned char, nunits);
2696 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2698 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2699 unrolling factor. */
2700 orig_vec_stmts_num = group_size *
2701 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2702 if (orig_vec_stmts_num == 1)
2703 only_one_vec = true;
2705 /* Number of copies is determined by the final vectorization factor
2706 relatively to SLP_NODE_INSTANCE unrolling factor. */
2707 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2709 /* Generate permutation masks for every NODE. Number of masks for each NODE
2710 is equal to GROUP_SIZE.
2711 E.g., we have a group of three nodes with three loads from the same
2712 location in each node, and the vector size is 4. I.e., we have a
2713 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2714 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2715 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2718 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2719 The last mask is illegal since we assume two operands for permute
2720 operation, and the mask element values can't be outside that range.
2721 Hence, the last mask must be converted into {2,5,5,5}.
2722 For the first two permutations we need the first and the second input
2723 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2724 we need the second and the third vectors: {b1,c1,a2,b2} and
2727 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2731 vect_stmts_counter = 0;
2733 first_vec_index = vec_index++;
2735 second_vec_index = first_vec_index;
2737 second_vec_index = vec_index++;
2739 for (j = 0; j < unroll_factor; j++)
2741 for (k = 0; k < group_size; k++)
2743 first_mask_element = i + j * group_size;
2744 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2745 nunits, only_one_vec, index,
2746 mask, ¤t_mask_element,
2748 &number_of_mask_fixes, &mask_fixed,
2749 &needs_first_vector))
2751 mask[index++] = current_mask_element;
2753 if (index == nunits)
2755 tree mask_vec = NULL;
2757 if (!can_vec_perm_p (mode, false, mask))
2759 if (vect_print_dump_info (REPORT_DETAILS))
2761 fprintf (vect_dump, "unsupported vect permute { ");
2762 for (i = 0; i < nunits; ++i)
2763 fprintf (vect_dump, "%d ", mask[i]);
2764 fprintf (vect_dump, "}\n");
2769 while (--index >= 0)
2771 tree t = build_int_cst (mask_element_type, mask[index]);
2772 mask_vec = tree_cons (NULL, t, mask_vec);
2774 mask_vec = build_vector (mask_type, mask_vec);
2779 if (need_next_vector)
2781 first_vec_index = second_vec_index;
2782 second_vec_index = vec_index;
2785 next_scalar_stmt = VEC_index (gimple,
2786 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2788 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2789 mask_vec, first_vec_index, second_vec_index,
2790 gsi, node, vectype, dr_chain,
2791 ncopies, vect_stmts_counter++);
2803 /* Vectorize SLP instance tree in postorder. */
2806 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2807 unsigned int vectorization_factor)
2810 bool strided_store, is_store;
2811 gimple_stmt_iterator si;
2812 stmt_vec_info stmt_info;
2813 unsigned int vec_stmts_size, nunits, group_size;
2816 slp_tree loads_node;
2822 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2823 vect_schedule_slp_instance ((slp_tree) child, instance,
2824 vectorization_factor);
2826 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2827 stmt_info = vinfo_for_stmt (stmt);
2829 /* VECTYPE is the type of the destination. */
2830 vectype = STMT_VINFO_VECTYPE (stmt_info);
2831 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2832 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2834 /* For each SLP instance calculate number of vector stmts to be created
2835 for the scalar stmts in each node of the SLP tree. Number of vector
2836 elements in one vector iteration is the number of scalar elements in
2837 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2839 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2841 /* In case of load permutation we have to allocate vectorized statements for
2842 all the nodes that participate in that permutation. */
2843 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2845 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2847 if (!SLP_TREE_VEC_STMTS (loads_node))
2849 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2851 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2856 if (!SLP_TREE_VEC_STMTS (node))
2858 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2859 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2862 if (vect_print_dump_info (REPORT_DETAILS))
2864 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2865 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2868 /* Loads should be inserted before the first load. */
2869 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2870 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2871 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2872 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2873 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2874 else if (is_pattern_stmt_p (stmt_info))
2875 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2877 si = gsi_for_stmt (stmt);
2879 /* Stores should be inserted just before the last store. */
2880 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2881 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2883 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2884 si = gsi_for_stmt (last_store);
2887 /* Mark the first element of the reduction chain as reduction to properly
2888 transform the node. In the analysis phase only the last element of the
2889 chain is marked as reduction. */
2890 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2891 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2893 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2894 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2897 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2902 /* Generate vector code for all SLP instances in the loop/basic block. */
2905 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2907 VEC (slp_instance, heap) *slp_instances;
2908 slp_instance instance;
2910 bool is_store = false;
2914 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2915 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2919 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2923 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2925 /* Schedule the tree of INSTANCE. */
2926 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2928 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2929 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2930 fprintf (vect_dump, "vectorizing stmts using SLP.");
2933 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2935 slp_tree root = SLP_INSTANCE_TREE (instance);
2938 gimple_stmt_iterator gsi;
2940 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2941 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2943 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2946 /* Free the attached stmt_vec_info and remove the stmt. */
2947 gsi = gsi_for_stmt (store);
2948 gsi_remove (&gsi, true);
2949 free_stmt_vec_info (store);
2957 /* Vectorize the basic block. */
2960 vect_slp_transform_bb (basic_block bb)
2962 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2963 gimple_stmt_iterator si;
2965 gcc_assert (bb_vinfo);
2967 if (vect_print_dump_info (REPORT_DETAILS))
2968 fprintf (vect_dump, "SLPing BB\n");
2970 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2972 gimple stmt = gsi_stmt (si);
2973 stmt_vec_info stmt_info;
2975 if (vect_print_dump_info (REPORT_DETAILS))
2977 fprintf (vect_dump, "------>SLPing statement: ");
2978 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2981 stmt_info = vinfo_for_stmt (stmt);
2982 gcc_assert (stmt_info);
2984 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2985 if (STMT_SLP_TYPE (stmt_info))
2987 vect_schedule_slp (NULL, bb_vinfo);
2992 mark_sym_for_renaming (gimple_vop (cfun));
2993 /* The memory tags and pointers in vectorized statements need to
2994 have their SSA forms updated. FIXME, why can't this be delayed
2995 until all the loops have been transformed? */
2996 update_ssa (TODO_update_ssa);
2998 if (vect_print_dump_info (REPORT_DETAILS))
2999 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
3001 destroy_bb_vec_info (bb_vinfo);