1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
54 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
56 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
57 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
58 vect_recog_widen_mult_pattern,
59 vect_recog_widen_sum_pattern,
60 vect_recog_dot_prod_pattern,
61 vect_recog_pow_pattern,
62 vect_recog_over_widening_pattern,
63 vect_recog_widen_shift_pattern,
64 vect_recog_mixed_size_cond_pattern,
65 vect_recog_bool_pattern};
67 /* Function widened_name_p
69 Check whether NAME, an ssa-name used in USE_STMT,
70 is a result of a type-promotion, such that:
71 DEF_STMT: NAME = NOP (name0)
72 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
73 If CHECK_SIGN is TRUE, check that either both types are signed or both are
77 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
82 loop_vec_info loop_vinfo;
83 stmt_vec_info stmt_vinfo;
84 tree type = TREE_TYPE (name);
86 enum vect_def_type dt;
89 stmt_vinfo = vinfo_for_stmt (use_stmt);
90 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
92 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
95 if (dt != vect_internal_def
96 && dt != vect_external_def && dt != vect_constant_def)
102 if (!is_gimple_assign (*def_stmt))
105 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
108 oprnd0 = gimple_assign_rhs1 (*def_stmt);
110 *half_type = TREE_TYPE (oprnd0);
111 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
112 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
113 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
116 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
123 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
124 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
127 vect_recog_temp_ssa_var (tree type, gimple stmt)
129 tree var = create_tmp_var (type, "patt");
131 add_referenced_var (var);
132 var = make_ssa_name (var, stmt);
136 /* Function vect_recog_dot_prod_pattern
138 Try to find the following pattern:
144 sum_0 = phi <init, sum_1>
147 S3 x_T = (TYPE1) x_t;
148 S4 y_T = (TYPE1) y_t;
150 [S6 prod = (TYPE2) prod; #optional]
151 S7 sum_1 = prod + sum_0;
153 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
154 same size of 'TYPE1' or bigger. This is a special case of a reduction
159 * STMTS: Contains a stmt from which the pattern search begins. In the
160 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
165 * TYPE_IN: The type of the input arguments to the pattern.
167 * TYPE_OUT: The type of the output of this pattern.
169 * Return value: A new stmt that will be used to replace the sequence of
170 stmts that constitute the pattern. In this case it will be:
171 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
173 Note: The dot-prod idiom is a widening reduction pattern that is
174 vectorized without preserving all the intermediate results. It
175 produces only N/2 (widened) results (by summing up pairs of
176 intermediate results) rather than all N results. Therefore, we
177 cannot allow this pattern when we want to get all the results and in
178 the correct order (as is the case when this computation is in an
179 inner-loop nested in an outer-loop that us being vectorized). */
182 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
185 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
187 tree oprnd00, oprnd01;
188 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
189 tree type, half_type;
192 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
193 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
196 if (!is_gimple_assign (last_stmt))
199 type = gimple_expr_type (last_stmt);
201 /* Look for the following pattern
205 DDPROD = (TYPE2) DPROD;
206 sum_1 = DDPROD + sum_0;
208 - DX is double the size of X
209 - DY is double the size of Y
210 - DX, DY, DPROD all have the same type
211 - sum is the same size of DPROD or bigger
212 - sum has been recognized as a reduction variable.
214 This is equivalent to:
215 DPROD = X w* Y; #widen mult
216 sum_1 = DPROD w+ sum_0; #widen summation
218 DPROD = X w* Y; #widen mult
219 sum_1 = DPROD + sum_0; #summation
222 /* Starting from LAST_STMT, follow the defs of its uses in search
223 of the above pattern. */
225 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
228 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
230 /* Has been detected as widening-summation? */
232 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
233 type = gimple_expr_type (stmt);
234 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
236 oprnd0 = gimple_assign_rhs1 (stmt);
237 oprnd1 = gimple_assign_rhs2 (stmt);
238 half_type = TREE_TYPE (oprnd0);
244 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
246 oprnd0 = gimple_assign_rhs1 (last_stmt);
247 oprnd1 = gimple_assign_rhs2 (last_stmt);
248 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
249 || !types_compatible_p (TREE_TYPE (oprnd1), type))
253 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
256 oprnd0 = gimple_assign_rhs1 (stmt);
262 /* So far so good. Since last_stmt was detected as a (summation) reduction,
263 we know that oprnd1 is the reduction variable (defined by a loop-header
264 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
265 Left to check that oprnd0 is defined by a (widen_)mult_expr */
266 if (TREE_CODE (oprnd0) != SSA_NAME)
269 prod_type = half_type;
270 stmt = SSA_NAME_DEF_STMT (oprnd0);
272 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
273 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
276 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
277 inside the loop (in case we are analyzing an outer-loop). */
278 if (!is_gimple_assign (stmt))
280 stmt_vinfo = vinfo_for_stmt (stmt);
281 gcc_assert (stmt_vinfo);
282 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
284 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
286 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
288 /* Has been detected as a widening multiplication? */
290 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
291 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
293 stmt_vinfo = vinfo_for_stmt (stmt);
294 gcc_assert (stmt_vinfo);
295 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
296 oprnd00 = gimple_assign_rhs1 (stmt);
297 oprnd01 = gimple_assign_rhs2 (stmt);
301 tree half_type0, half_type1;
305 oprnd0 = gimple_assign_rhs1 (stmt);
306 oprnd1 = gimple_assign_rhs2 (stmt);
307 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
308 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
310 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
312 oprnd00 = gimple_assign_rhs1 (def_stmt);
313 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
315 oprnd01 = gimple_assign_rhs1 (def_stmt);
316 if (!types_compatible_p (half_type0, half_type1))
318 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
322 half_type = TREE_TYPE (oprnd00);
323 *type_in = half_type;
326 /* Pattern detected. Create a stmt to be used to replace the pattern: */
327 var = vect_recog_temp_ssa_var (type, NULL);
328 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
329 oprnd00, oprnd01, oprnd1);
331 if (vect_print_dump_info (REPORT_DETAILS))
333 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
334 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
337 /* We don't allow changing the order of the computation in the inner-loop
338 when doing outer-loop vectorization. */
339 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
345 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
348 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
349 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
351 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
352 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
353 that satisfies the above restrictions, we can perform a widening opeartion
354 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
355 with a_it = (interm_type) a_t; */
358 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
359 tree const_oprnd, tree *oprnd,
360 VEC (gimple, heap) **stmts, tree type,
361 tree *half_type, gimple def_stmt)
363 tree new_type, new_oprnd, tmp;
365 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
366 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
368 if (code != MULT_EXPR && code != LSHIFT_EXPR)
371 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
372 || (code == LSHIFT_EXPR
373 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
375 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
377 /* CONST_OPRND is a constant of HALF_TYPE. */
378 *oprnd = gimple_assign_rhs1 (def_stmt);
382 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
383 || !gimple_bb (def_stmt)
384 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
385 || !vinfo_for_stmt (def_stmt))
388 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
389 a type 2 times bigger than HALF_TYPE. */
390 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
391 TYPE_UNSIGNED (type));
392 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
393 || (code == LSHIFT_EXPR
394 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
397 /* Use NEW_TYPE for widening operation. */
398 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
400 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
401 /* Check if the already created pattern stmt is what we need. */
402 if (!is_gimple_assign (new_stmt)
403 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
404 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
407 VEC_safe_push (gimple, heap, *stmts, def_stmt);
408 *oprnd = gimple_assign_lhs (new_stmt);
412 /* Create a_T = (NEW_TYPE) a_t; */
413 *oprnd = gimple_assign_rhs1 (def_stmt);
414 tmp = create_tmp_var (new_type, NULL);
415 add_referenced_var (tmp);
416 new_oprnd = make_ssa_name (tmp, NULL);
417 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
419 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
420 VEC_safe_push (gimple, heap, *stmts, def_stmt);
424 *half_type = new_type;
429 /* Function vect_recog_widen_mult_pattern
431 Try to find the following pattern:
434 TYPE a_T, b_T, prod_T;
440 S5 prod_T = a_T * b_T;
442 where type 'TYPE' is at least double the size of type 'type'.
444 Also detect unsgigned cases:
446 unsigned type a_t, b_t;
447 unsigned TYPE u_prod_T;
448 TYPE a_T, b_T, prod_T;
454 S5 prod_T = a_T * b_T;
455 S6 u_prod_T = (unsigned TYPE) prod_T;
457 and multiplication by constants:
464 S5 prod_T = a_T * CONST;
466 A special case of multiplication by constants is when 'TYPE' is 4 times
467 bigger than 'type', but CONST fits an intermediate type 2 times smaller
468 than 'TYPE'. In that case we create an additional pattern stmt for S3
469 to create a variable of the intermediate type, and perform widen-mult
470 on the intermediate type as well:
474 TYPE a_T, prod_T, prod_T';
478 '--> a_it = (interm_type) a_t;
479 S5 prod_T = a_T * CONST;
480 '--> prod_T' = a_it w* CONST;
484 * STMTS: Contains a stmt from which the pattern search begins. In the
485 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
486 is detected. In case of unsigned widen-mult, the original stmt (S5) is
487 replaced with S6 in STMTS. In case of multiplication by a constant
488 of an intermediate type (the last case above), STMTS also contains S3
489 (inserted before S5).
493 * TYPE_IN: The type of the input arguments to the pattern.
495 * TYPE_OUT: The type of the output of this pattern.
497 * Return value: A new stmt that will be used to replace the sequence of
498 stmts that constitute the pattern. In this case it will be:
499 WIDEN_MULT <a_t, b_t>
503 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
504 tree *type_in, tree *type_out)
506 gimple last_stmt = VEC_pop (gimple, *stmts);
507 gimple def_stmt0, def_stmt1;
509 tree type, half_type0, half_type1;
511 tree vectype, vectype_out = NULL_TREE;
514 enum tree_code dummy_code;
516 VEC (tree, heap) *dummy_vec;
519 if (!is_gimple_assign (last_stmt))
522 type = gimple_expr_type (last_stmt);
524 /* Starting from LAST_STMT, follow the defs of its uses in search
525 of the above pattern. */
527 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
530 oprnd0 = gimple_assign_rhs1 (last_stmt);
531 oprnd1 = gimple_assign_rhs2 (last_stmt);
532 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
533 || !types_compatible_p (TREE_TYPE (oprnd1), type))
536 /* Check argument 0. */
537 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
539 /* Check argument 1. */
540 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
544 oprnd0 = gimple_assign_rhs1 (def_stmt0);
545 oprnd1 = gimple_assign_rhs1 (def_stmt1);
549 if (TREE_CODE (oprnd1) == INTEGER_CST
550 && TREE_CODE (half_type0) == INTEGER_TYPE
551 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
552 &oprnd0, stmts, type,
553 &half_type0, def_stmt0))
554 half_type1 = half_type0;
559 /* Handle unsigned case. Look for
560 S6 u_prod_T = (unsigned TYPE) prod_T;
561 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
562 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
564 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
565 imm_use_iterator imm_iter;
568 gimple use_stmt = NULL;
571 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
574 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
576 if (is_gimple_debug (USE_STMT (use_p)))
578 use_stmt = USE_STMT (use_p);
582 if (nuses != 1 || !is_gimple_assign (use_stmt)
583 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
586 use_lhs = gimple_assign_lhs (use_stmt);
587 use_type = TREE_TYPE (use_lhs);
588 if (!INTEGRAL_TYPE_P (use_type)
589 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
590 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
594 last_stmt = use_stmt;
597 if (!types_compatible_p (half_type0, half_type1))
600 /* Pattern detected. */
601 if (vect_print_dump_info (REPORT_DETAILS))
602 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
604 /* Check target support */
605 vectype = get_vectype_for_scalar_type (half_type0);
606 vectype_out = get_vectype_for_scalar_type (type);
609 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
610 vectype_out, vectype,
611 &dummy, &dummy, &dummy_code,
612 &dummy_code, &dummy_int, &dummy_vec))
616 *type_out = vectype_out;
618 /* Pattern supported. Create a stmt to be used to replace the pattern: */
619 var = vect_recog_temp_ssa_var (type, NULL);
620 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
623 if (vect_print_dump_info (REPORT_DETAILS))
624 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
626 VEC_safe_push (gimple, heap, *stmts, last_stmt);
631 /* Function vect_recog_pow_pattern
633 Try to find the following pattern:
637 with POW being one of pow, powf, powi, powif and N being
642 * LAST_STMT: A stmt from which the pattern search begins.
646 * TYPE_IN: The type of the input arguments to the pattern.
648 * TYPE_OUT: The type of the output of this pattern.
650 * Return value: A new stmt that will be used to replace the sequence of
651 stmts that constitute the pattern. In this case it will be:
658 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
661 gimple last_stmt = VEC_index (gimple, *stmts, 0);
662 tree fn, base, exp = NULL;
666 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
669 fn = gimple_call_fndecl (last_stmt);
670 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
673 switch (DECL_FUNCTION_CODE (fn))
679 base = gimple_call_arg (last_stmt, 0);
680 exp = gimple_call_arg (last_stmt, 1);
681 if (TREE_CODE (exp) != REAL_CST
682 && TREE_CODE (exp) != INTEGER_CST)
690 /* We now have a pow or powi builtin function call with a constant
693 *type_out = NULL_TREE;
695 /* Catch squaring. */
696 if ((host_integerp (exp, 0)
697 && tree_low_cst (exp, 0) == 2)
698 || (TREE_CODE (exp) == REAL_CST
699 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
701 *type_in = TREE_TYPE (base);
703 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
704 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
708 /* Catch square root. */
709 if (TREE_CODE (exp) == REAL_CST
710 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
712 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
713 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
716 gimple stmt = gimple_build_call (newfn, 1, base);
717 if (vectorizable_function (stmt, *type_in, *type_in)
720 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
721 gimple_call_set_lhs (stmt, var);
731 /* Function vect_recog_widen_sum_pattern
733 Try to find the following pattern:
736 TYPE x_T, sum = init;
738 sum_0 = phi <init, sum_1>
741 S3 sum_1 = x_T + sum_0;
743 where type 'TYPE' is at least double the size of type 'type', i.e - we're
744 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
745 a special case of a reduction computation.
749 * LAST_STMT: A stmt from which the pattern search begins. In the example,
750 when this function is called with S3, the pattern {S2,S3} will be detected.
754 * TYPE_IN: The type of the input arguments to the pattern.
756 * TYPE_OUT: The type of the output of this pattern.
758 * Return value: A new stmt that will be used to replace the sequence of
759 stmts that constitute the pattern. In this case it will be:
760 WIDEN_SUM <x_t, sum_0>
762 Note: The widening-sum idiom is a widening reduction pattern that is
763 vectorized without preserving all the intermediate results. It
764 produces only N/2 (widened) results (by summing up pairs of
765 intermediate results) rather than all N results. Therefore, we
766 cannot allow this pattern when we want to get all the results and in
767 the correct order (as is the case when this computation is in an
768 inner-loop nested in an outer-loop that us being vectorized). */
771 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
774 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
776 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
777 tree type, half_type;
779 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
780 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
783 if (!is_gimple_assign (last_stmt))
786 type = gimple_expr_type (last_stmt);
788 /* Look for the following pattern
791 In which DX is at least double the size of X, and sum_1 has been
792 recognized as a reduction variable.
795 /* Starting from LAST_STMT, follow the defs of its uses in search
796 of the above pattern. */
798 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
801 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
804 oprnd0 = gimple_assign_rhs1 (last_stmt);
805 oprnd1 = gimple_assign_rhs2 (last_stmt);
806 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
807 || !types_compatible_p (TREE_TYPE (oprnd1), type))
810 /* So far so good. Since last_stmt was detected as a (summation) reduction,
811 we know that oprnd1 is the reduction variable (defined by a loop-header
812 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
813 Left to check that oprnd0 is defined by a cast from type 'type' to type
816 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
819 oprnd0 = gimple_assign_rhs1 (stmt);
820 *type_in = half_type;
823 /* Pattern detected. Create a stmt to be used to replace the pattern: */
824 var = vect_recog_temp_ssa_var (type, NULL);
825 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
828 if (vect_print_dump_info (REPORT_DETAILS))
830 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
831 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
834 /* We don't allow changing the order of the computation in the inner-loop
835 when doing outer-loop vectorization. */
836 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
842 /* Return TRUE if the operation in STMT can be performed on a smaller type.
845 STMT - a statement to check.
846 DEF - we support operations with two operands, one of which is constant.
847 The other operand can be defined by a demotion operation, or by a
848 previous statement in a sequence of over-promoted operations. In the
849 later case DEF is used to replace that operand. (It is defined by a
850 pattern statement we created for the previous statement in the
854 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
855 NULL, it's the type of DEF.
856 STMTS - additional pattern statements. If a pattern statement (type
857 conversion) is created in this function, its original statement is
861 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
862 operands to use in the new pattern statement for STMT (will be created
863 in vect_recog_over_widening_pattern ()).
864 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
865 statements for STMT: the first one is a type promotion and the second
866 one is the operation itself. We return the type promotion statement
867 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
868 the second pattern statement. */
871 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
872 tree *op0, tree *op1, gimple *new_def_stmt,
873 VEC (gimple, heap) **stmts)
876 tree const_oprnd, oprnd;
877 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
878 gimple def_stmt, new_stmt;
880 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
881 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
883 *new_def_stmt = NULL;
885 if (!is_gimple_assign (stmt))
888 code = gimple_assign_rhs_code (stmt);
889 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
890 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
893 oprnd = gimple_assign_rhs1 (stmt);
894 const_oprnd = gimple_assign_rhs2 (stmt);
895 type = gimple_expr_type (stmt);
897 if (TREE_CODE (oprnd) != SSA_NAME
898 || TREE_CODE (const_oprnd) != INTEGER_CST)
901 /* If we are in the middle of a sequence, we use DEF from a previous
902 statement. Otherwise, OPRND has to be a result of type promotion. */
905 half_type = *new_type;
911 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
912 || !gimple_bb (def_stmt)
913 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
914 || !vinfo_for_stmt (def_stmt))
918 /* Can we perform the operation on a smaller type? */
924 if (!int_fits_type_p (const_oprnd, half_type))
926 /* HALF_TYPE is not enough. Try a bigger type if possible. */
927 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
930 interm_type = build_nonstandard_integer_type (
931 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
932 if (!int_fits_type_p (const_oprnd, interm_type))
939 /* Try intermediate type - HALF_TYPE is not enough for sure. */
940 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
943 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
944 (e.g., if the original value was char, the shift amount is at most 8
945 if we want to use short). */
946 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
949 interm_type = build_nonstandard_integer_type (
950 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
952 if (!vect_supportable_shift (code, interm_type))
958 if (vect_supportable_shift (code, half_type))
961 /* Try intermediate type - HALF_TYPE is not supported. */
962 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
965 interm_type = build_nonstandard_integer_type (
966 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
968 if (!vect_supportable_shift (code, interm_type))
977 /* There are four possible cases:
978 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
979 the first statement in the sequence)
980 a. The original, HALF_TYPE, is not enough - we replace the promotion
981 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
982 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
984 2. OPRND is defined by a pattern statement we created.
985 a. Its type is not sufficient for the operation, we create a new stmt:
986 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
987 this statement in NEW_DEF_STMT, and it is later put in
988 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
989 b. OPRND is good to use in the new statement. */
994 /* Replace the original type conversion HALF_TYPE->TYPE with
995 HALF_TYPE->INTERM_TYPE. */
996 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
998 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
999 /* Check if the already created pattern stmt is what we need. */
1000 if (!is_gimple_assign (new_stmt)
1001 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1002 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1005 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1006 oprnd = gimple_assign_lhs (new_stmt);
1010 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1011 oprnd = gimple_assign_rhs1 (def_stmt);
1012 tmp = create_tmp_reg (interm_type, NULL);
1013 add_referenced_var (tmp);
1014 new_oprnd = make_ssa_name (tmp, NULL);
1015 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1017 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1018 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1024 /* Retrieve the operand before the type promotion. */
1025 oprnd = gimple_assign_rhs1 (def_stmt);
1032 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1033 tmp = create_tmp_reg (interm_type, NULL);
1034 add_referenced_var (tmp);
1035 new_oprnd = make_ssa_name (tmp, NULL);
1036 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1039 *new_def_stmt = new_stmt;
1042 /* Otherwise, OPRND is already set. */
1046 *new_type = interm_type;
1048 *new_type = half_type;
1051 *op1 = fold_convert (*new_type, const_oprnd);
1057 /* Try to find a statement or a sequence of statements that can be performed
1061 TYPE x_T, res0_T, res1_T;
1064 S2 x_T = (TYPE) x_t;
1065 S3 res0_T = op (x_T, C0);
1066 S4 res1_T = op (res0_T, C1);
1067 S5 ... = () res1_T; - type demotion
1069 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1071 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1072 be 'type' or some intermediate type. For now, we expect S5 to be a type
1073 demotion operation. We also check that S3 and S4 have only one use. */
1076 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1077 tree *type_in, tree *type_out)
1079 gimple stmt = VEC_pop (gimple, *stmts);
1080 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1081 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1082 imm_use_iterator imm_iter;
1083 use_operand_p use_p;
1085 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1087 struct loop *loop = (gimple_bb (stmt))->loop_father;
1092 if (!vinfo_for_stmt (stmt)
1093 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1096 new_def_stmt = NULL;
1097 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1098 &op0, &op1, &new_def_stmt,
1107 /* STMT can be performed on a smaller type. Check its uses. */
1108 lhs = gimple_assign_lhs (stmt);
1110 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1112 if (is_gimple_debug (USE_STMT (use_p)))
1114 use_stmt = USE_STMT (use_p);
1118 if (nuses != 1 || !is_gimple_assign (use_stmt)
1119 || !gimple_bb (use_stmt)
1120 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1123 /* Create pattern statement for STMT. */
1124 vectype = get_vectype_for_scalar_type (new_type);
1128 /* We want to collect all the statements for which we create pattern
1129 statetments, except for the case when the last statement in the
1130 sequence doesn't have a corresponding pattern statement. In such
1131 case we associate the last pattern statement with the last statement
1132 in the sequence. Therefore, we only add the original statement to
1133 the list if we know that it is not the last. */
1135 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1137 var = vect_recog_temp_ssa_var (new_type, NULL);
1139 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1141 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1142 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1144 if (vect_print_dump_info (REPORT_DETAILS))
1146 fprintf (vect_dump, "created pattern stmt: ");
1147 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1156 /* We got a sequence. We expect it to end with a type demotion operation.
1157 Otherwise, we quit (for now). There are three possible cases: the
1158 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1159 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1160 NEW_TYPE differs (we create a new conversion statement). */
1161 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1163 use_lhs = gimple_assign_lhs (use_stmt);
1164 use_type = TREE_TYPE (use_lhs);
1165 /* Support only type promotion or signedess change. */
1166 if (!INTEGRAL_TYPE_P (use_type)
1167 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1170 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1171 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1173 /* Create NEW_TYPE->USE_TYPE conversion. */
1174 tmp = create_tmp_reg (use_type, NULL);
1175 add_referenced_var (tmp);
1176 new_oprnd = make_ssa_name (tmp, NULL);
1177 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1179 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1181 *type_in = get_vectype_for_scalar_type (new_type);
1182 *type_out = get_vectype_for_scalar_type (use_type);
1184 /* We created a pattern statement for the last statement in the
1185 sequence, so we don't need to associate it with the pattern
1186 statement created for PREV_STMT. Therefore, we add PREV_STMT
1187 to the list in order to mark it later in vect_pattern_recog_1. */
1189 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1194 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1195 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1198 *type_out = NULL_TREE;
1201 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1204 /* TODO: support general case, create a conversion to the correct type. */
1207 /* Pattern detected. */
1208 if (vect_print_dump_info (REPORT_DETAILS))
1210 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1211 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1214 return pattern_stmt;
1217 /* Detect widening shift pattern:
1223 S2 a_T = (TYPE) a_t;
1224 S3 res_T = a_T << CONST;
1226 where type 'TYPE' is at least double the size of type 'type'.
1228 Also detect unsgigned cases:
1231 unsigned TYPE u_res_T;
1235 S2 a_T = (TYPE) a_t;
1236 S3 res_T = a_T << CONST;
1237 S4 u_res_T = (unsigned TYPE) res_T;
1239 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1240 create an additional pattern stmt for S2 to create a variable of an
1241 intermediate type, and perform widen-shift on the intermediate type:
1245 TYPE a_T, res_T, res_T';
1248 S2 a_T = (TYPE) a_t;
1249 '--> a_it = (interm_type) a_t;
1250 S3 res_T = a_T << CONST;
1251 '--> res_T' = a_it <<* CONST;
1255 * STMTS: Contains a stmt from which the pattern search begins.
1256 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1257 in STMTS. When an intermediate type is used and a pattern statement is
1258 created for S2, we also put S2 here (before S3).
1262 * TYPE_IN: The type of the input arguments to the pattern.
1264 * TYPE_OUT: The type of the output of this pattern.
1266 * Return value: A new stmt that will be used to replace the sequence of
1267 stmts that constitute the pattern. In this case it will be:
1268 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1271 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1272 tree *type_in, tree *type_out)
1274 gimple last_stmt = VEC_pop (gimple, *stmts);
1276 tree oprnd0, oprnd1;
1277 tree type, half_type0;
1278 gimple pattern_stmt, orig_stmt = NULL;
1279 tree vectype, vectype_out = NULL_TREE;
1282 enum tree_code dummy_code;
1284 VEC (tree, heap) * dummy_vec;
1285 gimple use_stmt = NULL;
1286 bool over_widen = false;
1288 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1291 orig_stmt = last_stmt;
1292 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1294 /* This statement was also detected as over-widening operation (it can't
1295 be any other pattern, because only over-widening detects shifts).
1296 LAST_STMT is the final type demotion statement, but its related
1297 statement is shift. We analyze the related statement to catch cases:
1304 S1 a_T = (TYPE) a_t;
1305 S2 res_T = a_T << CONST;
1306 S3 res = (itype)res_T;
1308 (size of type * 2 <= size of itype
1309 and size of itype * 2 <= size of TYPE)
1311 code after over-widening pattern detection:
1313 S1 a_T = (TYPE) a_t;
1314 --> a_it = (itype) a_t;
1315 S2 res_T = a_T << CONST;
1316 S3 res = (itype)res_T; <--- LAST_STMT
1317 --> res = a_it << CONST;
1321 S1 a_T = (TYPE) a_t;
1322 --> a_it = (itype) a_t; - redundant
1323 S2 res_T = a_T << CONST;
1324 S3 res = (itype)res_T;
1325 --> res = a_t w<< CONST;
1327 i.e., we replace the three statements with res = a_t w<< CONST. */
1328 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1332 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1335 oprnd0 = gimple_assign_rhs1 (last_stmt);
1336 oprnd1 = gimple_assign_rhs2 (last_stmt);
1337 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1340 /* Check operand 0: it has to be defined by a type promotion. */
1341 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1344 /* Check operand 1: has to be positive. We check that it fits the type
1345 in vect_handle_widen_op_by_const (). */
1346 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1349 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1350 type = gimple_expr_type (last_stmt);
1352 /* Check if this a widening operation. */
1353 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1355 type, &half_type0, def_stmt0))
1358 /* Handle unsigned case. Look for
1359 S4 u_res_T = (unsigned TYPE) res_T;
1360 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1361 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1363 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1364 imm_use_iterator imm_iter;
1365 use_operand_p use_p;
1371 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1372 We check here that TYPE is the correct type for the operation,
1373 i.e., it's the type of the original result. */
1374 tree orig_type = gimple_expr_type (orig_stmt);
1375 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1376 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1381 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1383 if (is_gimple_debug (USE_STMT (use_p)))
1385 use_stmt = USE_STMT (use_p);
1389 if (nuses != 1 || !is_gimple_assign (use_stmt)
1390 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1393 use_lhs = gimple_assign_lhs (use_stmt);
1394 use_type = TREE_TYPE (use_lhs);
1396 if (!INTEGRAL_TYPE_P (use_type)
1397 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1398 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1405 /* Pattern detected. */
1406 if (vect_print_dump_info (REPORT_DETAILS))
1407 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1409 /* Check target support. */
1410 vectype = get_vectype_for_scalar_type (half_type0);
1411 vectype_out = get_vectype_for_scalar_type (type);
1415 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1416 vectype_out, vectype,
1417 &dummy, &dummy, &dummy_code,
1418 &dummy_code, &dummy_int,
1423 *type_out = vectype_out;
1425 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1426 var = vect_recog_temp_ssa_var (type, NULL);
1428 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1430 if (vect_print_dump_info (REPORT_DETAILS))
1431 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1434 last_stmt = use_stmt;
1436 last_stmt = orig_stmt;
1438 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1439 return pattern_stmt;
1442 /* Function vect_recog_mixed_size_cond_pattern
1444 Try to find the following pattern:
1449 S1 a_T = x_t CMP y_t ? b_T : c_T;
1451 where type 'TYPE' is an integral type which has different size
1452 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1453 than 'type', the constants need to fit into an integer type
1454 with the same width as 'type'.
1458 * LAST_STMT: A stmt from which the pattern search begins.
1462 * TYPE_IN: The type of the input arguments to the pattern.
1464 * TYPE_OUT: The type of the output of this pattern.
1466 * Return value: A new stmt that will be used to replace the pattern.
1467 Additionally a def_stmt is added.
1469 a_it = x_t CMP y_t ? b_it : c_it;
1470 a_T = (TYPE) a_it; */
1473 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1476 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1477 tree cond_expr, then_clause, else_clause;
1478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1479 tree type, vectype, comp_vectype, itype, vecitype;
1480 enum machine_mode cmpmode;
1481 gimple pattern_stmt, def_stmt;
1482 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1484 if (!is_gimple_assign (last_stmt)
1485 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1486 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1489 cond_expr = gimple_assign_rhs1 (last_stmt);
1490 then_clause = gimple_assign_rhs2 (last_stmt);
1491 else_clause = gimple_assign_rhs3 (last_stmt);
1493 if (TREE_CODE (then_clause) != INTEGER_CST
1494 || TREE_CODE (else_clause) != INTEGER_CST)
1497 if (!COMPARISON_CLASS_P (cond_expr))
1501 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1502 if (comp_vectype == NULL_TREE)
1505 type = gimple_expr_type (last_stmt);
1506 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1508 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1511 vectype = get_vectype_for_scalar_type (type);
1512 if (vectype == NULL_TREE)
1515 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1518 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1519 TYPE_UNSIGNED (type));
1520 if (itype == NULL_TREE
1521 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1524 vecitype = get_vectype_for_scalar_type (itype);
1525 if (vecitype == NULL_TREE)
1528 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1531 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1533 if (!int_fits_type_p (then_clause, itype)
1534 || !int_fits_type_p (else_clause, itype))
1539 = gimple_build_assign_with_ops3 (COND_EXPR,
1540 vect_recog_temp_ssa_var (itype, NULL),
1541 unshare_expr (cond_expr),
1542 fold_convert (itype, then_clause),
1543 fold_convert (itype, else_clause));
1545 = gimple_build_assign_with_ops (NOP_EXPR,
1546 vect_recog_temp_ssa_var (type, NULL),
1547 gimple_assign_lhs (def_stmt), NULL_TREE);
1549 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
1550 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1551 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1552 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1553 *type_in = vecitype;
1554 *type_out = vectype;
1556 return pattern_stmt;
1560 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1561 true if bool VAR can be optimized that way. */
1564 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1567 enum vect_def_type dt;
1569 enum tree_code rhs_code;
1571 if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
1574 if (dt != vect_internal_def)
1577 if (!is_gimple_assign (def_stmt))
1580 if (!has_single_use (def))
1583 rhs1 = gimple_assign_rhs1 (def_stmt);
1584 rhs_code = gimple_assign_rhs_code (def_stmt);
1588 return check_bool_pattern (rhs1, loop_vinfo);
1591 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1592 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1593 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1595 return check_bool_pattern (rhs1, loop_vinfo);
1598 return check_bool_pattern (rhs1, loop_vinfo);
1603 if (!check_bool_pattern (rhs1, loop_vinfo))
1605 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1608 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1610 tree vecitype, comp_vectype;
1612 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1613 if (comp_vectype == NULL_TREE)
1616 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1618 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1620 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
1621 vecitype = get_vectype_for_scalar_type (itype);
1622 if (vecitype == NULL_TREE)
1626 vecitype = comp_vectype;
1627 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1634 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1635 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1636 to PATTERN_DEF_STMT and adding a cast as RELATED_STMT. */
1639 adjust_bool_pattern_cast (tree type, tree var)
1641 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1642 gimple cast_stmt, pattern_stmt;
1644 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo));
1645 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1646 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt;
1648 = gimple_build_assign_with_ops (NOP_EXPR,
1649 vect_recog_temp_ssa_var (type, NULL),
1650 gimple_assign_lhs (pattern_stmt),
1652 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
1653 return gimple_assign_lhs (cast_stmt);
1657 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
1658 recursively. VAR is an SSA_NAME that should be transformed from bool
1659 to a wider integer type, OUT_TYPE is the desired final integer type of
1660 the whole pattern, TRUEVAL should be NULL unless optimizing
1661 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1662 in the then_clause, STMTS is where statements with added pattern stmts
1663 should be pushed to. */
1666 adjust_bool_pattern (tree var, tree out_type, tree trueval,
1667 VEC (gimple, heap) **stmts)
1669 gimple stmt = SSA_NAME_DEF_STMT (var);
1670 enum tree_code rhs_code, def_rhs_code;
1671 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
1673 gimple pattern_stmt, def_stmt;
1675 rhs1 = gimple_assign_rhs1 (stmt);
1676 rhs2 = gimple_assign_rhs2 (stmt);
1677 rhs_code = gimple_assign_rhs_code (stmt);
1678 loc = gimple_location (stmt);
1683 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1684 itype = TREE_TYPE (irhs1);
1686 = gimple_build_assign_with_ops (SSA_NAME,
1687 vect_recog_temp_ssa_var (itype, NULL),
1692 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1693 itype = TREE_TYPE (irhs1);
1695 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
1696 vect_recog_temp_ssa_var (itype, NULL),
1697 irhs1, build_int_cst (itype, 1));
1701 /* Try to optimize x = y & (a < b ? 1 : 0); into
1702 x = (a < b ? y : 0);
1708 S1 a_b = x1 CMP1 y1;
1709 S2 b_b = x2 CMP2 y2;
1711 S4 d_T = (TYPE) c_b;
1713 we would normally emit:
1715 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1716 S2' b_T = x2 CMP2 y2 ? 1 : 0;
1717 S3' c_T = a_T & b_T;
1720 but we can save one stmt by using the
1721 result of one of the COND_EXPRs in the other COND_EXPR and leave
1722 BIT_AND_EXPR stmt out:
1724 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1725 S3' c_T = x2 CMP2 y2 ? a_T : 0;
1728 At least when VEC_COND_EXPR is implemented using masks
1729 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
1730 computes the comparison masks and ands it, in one case with
1731 all ones vector, in the other case with a vector register.
1732 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
1733 often more expensive. */
1734 def_stmt = SSA_NAME_DEF_STMT (rhs2);
1735 def_rhs_code = gimple_assign_rhs_code (def_stmt);
1736 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
1738 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
1739 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1740 if (TYPE_PRECISION (TREE_TYPE (irhs1))
1741 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
1744 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
1745 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
1746 tstmt = VEC_pop (gimple, *stmts);
1747 gcc_assert (tstmt == def_stmt);
1748 VEC_quick_push (gimple, *stmts, stmt);
1749 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
1750 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
1751 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
1752 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
1756 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1759 def_stmt = SSA_NAME_DEF_STMT (rhs1);
1760 def_rhs_code = gimple_assign_rhs_code (def_stmt);
1761 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
1763 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
1764 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1765 if (TYPE_PRECISION (TREE_TYPE (irhs2))
1766 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
1769 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
1770 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
1771 tstmt = VEC_pop (gimple, *stmts);
1772 gcc_assert (tstmt == def_stmt);
1773 VEC_quick_push (gimple, *stmts, stmt);
1774 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
1775 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
1776 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
1777 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
1781 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1787 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1788 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1790 if (TYPE_PRECISION (TREE_TYPE (irhs1))
1791 != TYPE_PRECISION (TREE_TYPE (irhs2)))
1793 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
1794 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
1795 int out_prec = TYPE_PRECISION (out_type);
1796 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
1797 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
1798 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
1799 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
1802 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
1803 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
1806 itype = TREE_TYPE (irhs1);
1808 = gimple_build_assign_with_ops (rhs_code,
1809 vect_recog_temp_ssa_var (itype, NULL),
1814 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
1815 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
1816 || TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1818 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1820 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
1823 itype = TREE_TYPE (rhs1);
1824 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
1825 if (trueval == NULL_TREE)
1826 trueval = build_int_cst (itype, 1);
1828 gcc_checking_assert (useless_type_conversion_p (itype,
1829 TREE_TYPE (trueval)));
1831 = gimple_build_assign_with_ops3 (COND_EXPR,
1832 vect_recog_temp_ssa_var (itype, NULL),
1834 build_int_cst (itype, 0));
1838 VEC_safe_push (gimple, heap, *stmts, stmt);
1839 gimple_set_location (pattern_stmt, loc);
1840 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1841 return gimple_assign_lhs (pattern_stmt);
1845 /* Function vect_recog_bool_pattern
1847 Try to find pattern like following:
1849 bool a_b, b_b, c_b, d_b, e_b;
1852 S1 a_b = x1 CMP1 y1;
1853 S2 b_b = x2 CMP2 y2;
1855 S4 d_b = x3 CMP3 y3;
1857 S6 f_T = (TYPE) e_b;
1859 where type 'TYPE' is an integral type.
1863 * LAST_STMT: A stmt at the end from which the pattern
1864 search begins, i.e. cast of a bool to
1869 * TYPE_IN: The type of the input arguments to the pattern.
1871 * TYPE_OUT: The type of the output of this pattern.
1873 * Return value: A new stmt that will be used to replace the pattern.
1875 Assuming size of TYPE is the same as size of all comparisons
1876 (otherwise some casts would be added where needed), the above
1877 sequence we create related pattern stmts:
1878 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1879 S3' c_T = x2 CMP2 y2 ? a_T : 0;
1880 S4' d_T = x3 CMP3 y3 ? 1 : 0;
1881 S5' e_T = c_T | d_T;
1884 Instead of the above S3' we could emit:
1885 S2' b_T = x2 CMP2 y2 ? 1 : 0;
1886 S3' c_T = a_T | b_T;
1887 but the above is more efficient. */
1890 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1893 gimple last_stmt = VEC_pop (gimple, *stmts);
1894 enum tree_code rhs_code;
1895 tree var, lhs, rhs, vectype;
1896 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1897 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1898 gimple pattern_stmt;
1900 if (!is_gimple_assign (last_stmt))
1903 var = gimple_assign_rhs1 (last_stmt);
1904 lhs = gimple_assign_lhs (last_stmt);
1906 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
1907 || !TYPE_UNSIGNED (TREE_TYPE (var)))
1908 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
1911 rhs_code = gimple_assign_rhs_code (last_stmt);
1912 if (CONVERT_EXPR_CODE_P (rhs_code))
1914 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE)
1916 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
1917 if (vectype == NULL_TREE)
1920 if (!check_bool_pattern (var, loop_vinfo))
1923 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
1924 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1925 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
1927 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
1930 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
1931 *type_out = vectype;
1933 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1934 return pattern_stmt;
1941 /* Mark statements that are involved in a pattern. */
1944 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
1945 tree pattern_vectype)
1947 stmt_vec_info pattern_stmt_info, def_stmt_info;
1948 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1949 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
1952 set_vinfo_for_stmt (pattern_stmt,
1953 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1954 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1955 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1957 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
1958 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1959 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1960 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1961 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
1962 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
1963 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
1964 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
1965 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
1967 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
1968 def_stmt_info = vinfo_for_stmt (def_stmt);
1969 if (def_stmt_info == NULL)
1971 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1972 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1974 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
1975 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
1976 STMT_VINFO_DEF_TYPE (def_stmt_info)
1977 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1978 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
1979 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
1983 /* Function vect_pattern_recog_1
1986 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
1987 computation pattern.
1988 STMT: A stmt from which the pattern search should start.
1990 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
1991 expression that computes the same functionality and can be used to
1992 replace the sequence of stmts that are involved in the pattern.
1995 This function checks if the expression returned by PATTERN_RECOG_FUNC is
1996 supported in vector form by the target. We use 'TYPE_IN' to obtain the
1997 relevant vector type. If 'TYPE_IN' is already a vector type, then this
1998 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
1999 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2000 to the available target pattern.
2002 This function also does some bookkeeping, as explained in the documentation
2003 for vect_recog_pattern. */
2006 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2007 gimple_stmt_iterator si,
2008 VEC (gimple, heap) **stmts_to_replace)
2010 gimple stmt = gsi_stmt (si), pattern_stmt;
2011 stmt_vec_info stmt_info;
2012 loop_vec_info loop_vinfo;
2013 tree pattern_vectype;
2014 tree type_in, type_out;
2015 enum tree_code code;
2019 VEC_truncate (gimple, *stmts_to_replace, 0);
2020 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2021 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2025 stmt = VEC_last (gimple, *stmts_to_replace);
2026 stmt_info = vinfo_for_stmt (stmt);
2027 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2029 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2031 /* No need to check target support (already checked by the pattern
2032 recognition function). */
2033 pattern_vectype = type_out ? type_out : type_in;
2037 enum machine_mode vec_mode;
2038 enum insn_code icode;
2041 /* Check target support */
2042 type_in = get_vectype_for_scalar_type (type_in);
2046 type_out = get_vectype_for_scalar_type (type_out);
2051 pattern_vectype = type_out;
2053 if (is_gimple_assign (pattern_stmt))
2054 code = gimple_assign_rhs_code (pattern_stmt);
2057 gcc_assert (is_gimple_call (pattern_stmt));
2061 optab = optab_for_tree_code (code, type_in, optab_default);
2062 vec_mode = TYPE_MODE (type_in);
2064 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2065 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2069 /* Found a vectorizable pattern. */
2070 if (vect_print_dump_info (REPORT_DETAILS))
2072 fprintf (vect_dump, "pattern recognized: ");
2073 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2076 /* Mark the stmts that are involved in the pattern. */
2077 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2079 /* Patterns cannot be vectorized using SLP, because they change the order of
2081 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2083 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2085 /* It is possible that additional pattern stmts are created and inserted in
2086 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2087 relevant statements. */
2088 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2089 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2092 stmt_info = vinfo_for_stmt (stmt);
2093 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2094 if (vect_print_dump_info (REPORT_DETAILS))
2096 fprintf (vect_dump, "additional pattern stmt: ");
2097 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2100 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2105 /* Function vect_pattern_recog
2108 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2111 Output - for each computation idiom that is detected we create a new stmt
2112 that provides the same functionality and that can be vectorized. We
2113 also record some information in the struct_stmt_info of the relevant
2114 stmts, as explained below:
2116 At the entry to this function we have the following stmts, with the
2117 following initial value in the STMT_VINFO fields:
2119 stmt in_pattern_p related_stmt vec_stmt
2120 S1: a_i = .... - - -
2121 S2: a_2 = ..use(a_i).. - - -
2122 S3: a_1 = ..use(a_2).. - - -
2123 S4: a_0 = ..use(a_1).. - - -
2124 S5: ... = ..use(a_0).. - - -
2126 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2127 represented by a single stmt. We then:
2128 - create a new stmt S6 equivalent to the pattern (the stmt is not
2129 inserted into the code)
2130 - fill in the STMT_VINFO fields as follows:
2132 in_pattern_p related_stmt vec_stmt
2133 S1: a_i = .... - - -
2134 S2: a_2 = ..use(a_i).. - - -
2135 S3: a_1 = ..use(a_2).. - - -
2136 S4: a_0 = ..use(a_1).. true S6 -
2137 '---> S6: a_new = .... - S4 -
2138 S5: ... = ..use(a_0).. - - -
2140 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2141 to each other through the RELATED_STMT field).
2143 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2144 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2145 remain irrelevant unless used by stmts other than S4.
2147 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2148 (because they are marked as irrelevant). It will vectorize S6, and record
2149 a pointer to the new vector stmt VS6 from S6 (as usual).
2150 S4 will be skipped, and S5 will be vectorized as usual:
2152 in_pattern_p related_stmt vec_stmt
2153 S1: a_i = .... - - -
2154 S2: a_2 = ..use(a_i).. - - -
2155 S3: a_1 = ..use(a_2).. - - -
2156 > VS6: va_new = .... - - -
2157 S4: a_0 = ..use(a_1).. true S6 VS6
2158 '---> S6: a_new = .... - S4 VS6
2159 > VS5: ... = ..vuse(va_new).. - - -
2160 S5: ... = ..use(a_0).. - - -
2162 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2163 elsewhere), and we'll end up with:
2166 VS5: ... = ..vuse(va_new)..
2168 In case of more than one pattern statements, e.g., widen-mult with
2172 S2 a_T = (TYPE) a_t;
2173 '--> S3: a_it = (interm_type) a_t;
2174 S4 prod_T = a_T * CONST;
2175 '--> S5: prod_T' = a_it w* CONST;
2177 there may be other users of a_T outside the pattern. In that case S2 will
2178 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2179 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2180 be recorded in S3. */
2183 vect_pattern_recog (loop_vec_info loop_vinfo)
2185 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2186 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2187 unsigned int nbbs = loop->num_nodes;
2188 gimple_stmt_iterator si;
2190 vect_recog_func_ptr vect_recog_func;
2191 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2193 if (vect_print_dump_info (REPORT_DETAILS))
2194 fprintf (vect_dump, "=== vect_pattern_recog ===");
2196 /* Scan through the loop stmts, applying the pattern recognition
2197 functions starting at each stmt visited: */
2198 for (i = 0; i < nbbs; i++)
2200 basic_block bb = bbs[i];
2201 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2203 /* Scan over all generic vect_recog_xxx_pattern functions. */
2204 for (j = 0; j < NUM_PATTERNS; j++)
2206 vect_recog_func = vect_vect_recog_func_ptrs[j];
2207 vect_pattern_recog_1 (vect_recog_func, si,
2213 VEC_free (gimple, heap, stmts_to_replace);