1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
52 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
54 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
55 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
56 vect_recog_widen_mult_pattern,
57 vect_recog_widen_sum_pattern,
58 vect_recog_dot_prod_pattern,
59 vect_recog_pow_pattern,
60 vect_recog_over_widening_pattern,
61 vect_recog_mixed_size_cond_pattern,
62 vect_recog_bool_pattern};
65 /* Function widened_name_p
67 Check whether NAME, an ssa-name used in USE_STMT,
68 is a result of a type-promotion, such that:
69 DEF_STMT: NAME = NOP (name0)
70 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
71 If CHECK_SIGN is TRUE, check that either both types are signed or both are
75 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
80 loop_vec_info loop_vinfo;
81 stmt_vec_info stmt_vinfo;
82 tree type = TREE_TYPE (name);
84 enum vect_def_type dt;
87 stmt_vinfo = vinfo_for_stmt (use_stmt);
88 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
90 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
93 if (dt != vect_internal_def
94 && dt != vect_external_def && dt != vect_constant_def)
100 if (!is_gimple_assign (*def_stmt))
103 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
106 oprnd0 = gimple_assign_rhs1 (*def_stmt);
108 *half_type = TREE_TYPE (oprnd0);
109 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
110 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
111 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
114 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
121 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
122 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
125 vect_recog_temp_ssa_var (tree type, gimple stmt)
127 tree var = create_tmp_var (type, "patt");
129 add_referenced_var (var);
130 var = make_ssa_name (var, stmt);
134 /* Function vect_recog_dot_prod_pattern
136 Try to find the following pattern:
142 sum_0 = phi <init, sum_1>
145 S3 x_T = (TYPE1) x_t;
146 S4 y_T = (TYPE1) y_t;
148 [S6 prod = (TYPE2) prod; #optional]
149 S7 sum_1 = prod + sum_0;
151 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
152 same size of 'TYPE1' or bigger. This is a special case of a reduction
157 * STMTS: Contains a stmt from which the pattern search begins. In the
158 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
163 * TYPE_IN: The type of the input arguments to the pattern.
165 * TYPE_OUT: The type of the output of this pattern.
167 * Return value: A new stmt that will be used to replace the sequence of
168 stmts that constitute the pattern. In this case it will be:
169 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
171 Note: The dot-prod idiom is a widening reduction pattern that is
172 vectorized without preserving all the intermediate results. It
173 produces only N/2 (widened) results (by summing up pairs of
174 intermediate results) rather than all N results. Therefore, we
175 cannot allow this pattern when we want to get all the results and in
176 the correct order (as is the case when this computation is in an
177 inner-loop nested in an outer-loop that us being vectorized). */
180 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
183 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
185 tree oprnd00, oprnd01;
186 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
187 tree type, half_type;
190 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
191 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
194 if (!is_gimple_assign (last_stmt))
197 type = gimple_expr_type (last_stmt);
199 /* Look for the following pattern
203 DDPROD = (TYPE2) DPROD;
204 sum_1 = DDPROD + sum_0;
206 - DX is double the size of X
207 - DY is double the size of Y
208 - DX, DY, DPROD all have the same type
209 - sum is the same size of DPROD or bigger
210 - sum has been recognized as a reduction variable.
212 This is equivalent to:
213 DPROD = X w* Y; #widen mult
214 sum_1 = DPROD w+ sum_0; #widen summation
216 DPROD = X w* Y; #widen mult
217 sum_1 = DPROD + sum_0; #summation
220 /* Starting from LAST_STMT, follow the defs of its uses in search
221 of the above pattern. */
223 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
226 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
228 /* Has been detected as widening-summation? */
230 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
231 type = gimple_expr_type (stmt);
232 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
234 oprnd0 = gimple_assign_rhs1 (stmt);
235 oprnd1 = gimple_assign_rhs2 (stmt);
236 half_type = TREE_TYPE (oprnd0);
242 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
244 oprnd0 = gimple_assign_rhs1 (last_stmt);
245 oprnd1 = gimple_assign_rhs2 (last_stmt);
246 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
247 || !types_compatible_p (TREE_TYPE (oprnd1), type))
251 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
254 oprnd0 = gimple_assign_rhs1 (stmt);
260 /* So far so good. Since last_stmt was detected as a (summation) reduction,
261 we know that oprnd1 is the reduction variable (defined by a loop-header
262 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
263 Left to check that oprnd0 is defined by a (widen_)mult_expr */
264 if (TREE_CODE (oprnd0) != SSA_NAME)
267 prod_type = half_type;
268 stmt = SSA_NAME_DEF_STMT (oprnd0);
270 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
271 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
274 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
275 inside the loop (in case we are analyzing an outer-loop). */
276 if (!is_gimple_assign (stmt))
278 stmt_vinfo = vinfo_for_stmt (stmt);
279 gcc_assert (stmt_vinfo);
280 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
282 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
284 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
286 /* Has been detected as a widening multiplication? */
288 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
289 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
291 stmt_vinfo = vinfo_for_stmt (stmt);
292 gcc_assert (stmt_vinfo);
293 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
294 oprnd00 = gimple_assign_rhs1 (stmt);
295 oprnd01 = gimple_assign_rhs2 (stmt);
299 tree half_type0, half_type1;
303 oprnd0 = gimple_assign_rhs1 (stmt);
304 oprnd1 = gimple_assign_rhs2 (stmt);
305 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
306 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
308 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
310 oprnd00 = gimple_assign_rhs1 (def_stmt);
311 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
313 oprnd01 = gimple_assign_rhs1 (def_stmt);
314 if (!types_compatible_p (half_type0, half_type1))
316 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
320 half_type = TREE_TYPE (oprnd00);
321 *type_in = half_type;
324 /* Pattern detected. Create a stmt to be used to replace the pattern: */
325 var = vect_recog_temp_ssa_var (type, NULL);
326 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
327 oprnd00, oprnd01, oprnd1);
329 if (vect_print_dump_info (REPORT_DETAILS))
331 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
332 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
335 /* We don't allow changing the order of the computation in the inner-loop
336 when doing outer-loop vectorization. */
337 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
343 /* Handle two cases of multiplication by a constant. The first one is when
344 the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
345 operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
348 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
349 HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
350 TYPE), we can perform widen-mult from the intermediate type to TYPE and
351 replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
354 vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
355 VEC (gimple, heap) **stmts, tree type,
356 tree *half_type, gimple def_stmt)
358 tree new_type, new_oprnd, tmp;
360 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
361 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
363 if (int_fits_type_p (const_oprnd, *half_type))
365 /* CONST_OPRND is a constant of HALF_TYPE. */
366 *oprnd = gimple_assign_rhs1 (def_stmt);
370 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
371 || !gimple_bb (def_stmt)
372 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
373 || !vinfo_for_stmt (def_stmt))
376 /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
377 a type 2 times bigger than HALF_TYPE. */
378 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
379 TYPE_UNSIGNED (type));
380 if (!int_fits_type_p (const_oprnd, new_type))
383 /* Use NEW_TYPE for widen_mult. */
384 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
386 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
387 /* Check if the already created pattern stmt is what we need. */
388 if (!is_gimple_assign (new_stmt)
389 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
390 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
393 VEC_safe_push (gimple, heap, *stmts, def_stmt);
394 *oprnd = gimple_assign_lhs (new_stmt);
398 /* Create a_T = (NEW_TYPE) a_t; */
399 *oprnd = gimple_assign_rhs1 (def_stmt);
400 tmp = create_tmp_var (new_type, NULL);
401 add_referenced_var (tmp);
402 new_oprnd = make_ssa_name (tmp, NULL);
403 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
405 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
406 VEC_safe_push (gimple, heap, *stmts, def_stmt);
410 *half_type = new_type;
415 /* Function vect_recog_widen_mult_pattern
417 Try to find the following pattern:
420 TYPE a_T, b_T, prod_T;
426 S5 prod_T = a_T * b_T;
428 where type 'TYPE' is at least double the size of type 'type'.
430 Also detect unsgigned cases:
432 unsigned type a_t, b_t;
433 unsigned TYPE u_prod_T;
434 TYPE a_T, b_T, prod_T;
440 S5 prod_T = a_T * b_T;
441 S6 u_prod_T = (unsigned TYPE) prod_T;
443 and multiplication by constants:
450 S5 prod_T = a_T * CONST;
452 A special case of multiplication by constants is when 'TYPE' is 4 times
453 bigger than 'type', but CONST fits an intermediate type 2 times smaller
454 than 'TYPE'. In that case we create an additional pattern stmt for S3
455 to create a variable of the intermediate type, and perform widen-mult
456 on the intermediate type as well:
460 TYPE a_T, prod_T, prod_T';
464 '--> a_it = (interm_type) a_t;
465 S5 prod_T = a_T * CONST;
466 '--> prod_T' = a_it w* CONST;
470 * STMTS: Contains a stmt from which the pattern search begins. In the
471 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
472 is detected. In case of unsigned widen-mult, the original stmt (S5) is
473 replaced with S6 in STMTS. In case of multiplication by a constant
474 of an intermediate type (the last case above), STMTS also contains S3
475 (inserted before S5).
479 * TYPE_IN: The type of the input arguments to the pattern.
481 * TYPE_OUT: The type of the output of this pattern.
483 * Return value: A new stmt that will be used to replace the sequence of
484 stmts that constitute the pattern. In this case it will be:
485 WIDEN_MULT <a_t, b_t>
489 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
490 tree *type_in, tree *type_out)
492 gimple last_stmt = VEC_pop (gimple, *stmts);
493 gimple def_stmt0, def_stmt1;
495 tree type, half_type0, half_type1;
497 tree vectype, vectype_out = NULL_TREE;
500 enum tree_code dummy_code;
502 VEC (tree, heap) *dummy_vec;
505 if (!is_gimple_assign (last_stmt))
508 type = gimple_expr_type (last_stmt);
510 /* Starting from LAST_STMT, follow the defs of its uses in search
511 of the above pattern. */
513 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
516 oprnd0 = gimple_assign_rhs1 (last_stmt);
517 oprnd1 = gimple_assign_rhs2 (last_stmt);
518 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
519 || !types_compatible_p (TREE_TYPE (oprnd1), type))
522 /* Check argument 0. */
523 op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
524 /* Check argument 1. */
525 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
527 /* In case of multiplication by a constant one of the operands may not match
528 the pattern, but not both. */
529 if (!op0_ok && !op1_ok)
532 if (op0_ok && op1_ok)
534 oprnd0 = gimple_assign_rhs1 (def_stmt0);
535 oprnd1 = gimple_assign_rhs1 (def_stmt1);
539 if (TREE_CODE (oprnd0) == INTEGER_CST
540 && TREE_CODE (half_type1) == INTEGER_TYPE
541 && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
543 &half_type1, def_stmt1))
544 half_type0 = half_type1;
550 if (TREE_CODE (oprnd1) == INTEGER_CST
551 && TREE_CODE (half_type0) == INTEGER_TYPE
552 && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
554 &half_type0, def_stmt0))
555 half_type1 = half_type0;
560 /* Handle unsigned case. Look for
561 S6 u_prod_T = (unsigned TYPE) prod_T;
562 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
563 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
565 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
566 imm_use_iterator imm_iter;
569 gimple use_stmt = NULL;
572 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
575 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
577 if (is_gimple_debug (USE_STMT (use_p)))
579 use_stmt = USE_STMT (use_p);
583 if (nuses != 1 || !is_gimple_assign (use_stmt)
584 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
587 use_lhs = gimple_assign_lhs (use_stmt);
588 use_type = TREE_TYPE (use_lhs);
589 if (!INTEGRAL_TYPE_P (use_type)
590 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
591 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
595 last_stmt = use_stmt;
598 if (!types_compatible_p (half_type0, half_type1))
601 /* Pattern detected. */
602 if (vect_print_dump_info (REPORT_DETAILS))
603 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
605 /* Check target support */
606 vectype = get_vectype_for_scalar_type (half_type0);
607 vectype_out = get_vectype_for_scalar_type (type);
610 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
611 vectype_out, vectype,
612 &dummy, &dummy, &dummy_code,
613 &dummy_code, &dummy_int, &dummy_vec))
617 *type_out = vectype_out;
619 /* Pattern supported. Create a stmt to be used to replace the pattern: */
620 var = vect_recog_temp_ssa_var (type, NULL);
621 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
624 if (vect_print_dump_info (REPORT_DETAILS))
625 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
627 VEC_safe_push (gimple, heap, *stmts, last_stmt);
632 /* Function vect_recog_pow_pattern
634 Try to find the following pattern:
638 with POW being one of pow, powf, powi, powif and N being
643 * LAST_STMT: A stmt from which the pattern search begins.
647 * TYPE_IN: The type of the input arguments to the pattern.
649 * TYPE_OUT: The type of the output of this pattern.
651 * Return value: A new stmt that will be used to replace the sequence of
652 stmts that constitute the pattern. In this case it will be:
659 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
662 gimple last_stmt = VEC_index (gimple, *stmts, 0);
663 tree fn, base, exp = NULL;
667 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
670 fn = gimple_call_fndecl (last_stmt);
671 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
674 switch (DECL_FUNCTION_CODE (fn))
680 base = gimple_call_arg (last_stmt, 0);
681 exp = gimple_call_arg (last_stmt, 1);
682 if (TREE_CODE (exp) != REAL_CST
683 && TREE_CODE (exp) != INTEGER_CST)
691 /* We now have a pow or powi builtin function call with a constant
694 *type_out = NULL_TREE;
696 /* Catch squaring. */
697 if ((host_integerp (exp, 0)
698 && tree_low_cst (exp, 0) == 2)
699 || (TREE_CODE (exp) == REAL_CST
700 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
702 *type_in = TREE_TYPE (base);
704 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
705 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
709 /* Catch square root. */
710 if (TREE_CODE (exp) == REAL_CST
711 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
713 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
714 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
717 gimple stmt = gimple_build_call (newfn, 1, base);
718 if (vectorizable_function (stmt, *type_in, *type_in)
721 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
722 gimple_call_set_lhs (stmt, var);
732 /* Function vect_recog_widen_sum_pattern
734 Try to find the following pattern:
737 TYPE x_T, sum = init;
739 sum_0 = phi <init, sum_1>
742 S3 sum_1 = x_T + sum_0;
744 where type 'TYPE' is at least double the size of type 'type', i.e - we're
745 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
746 a special case of a reduction computation.
750 * LAST_STMT: A stmt from which the pattern search begins. In the example,
751 when this function is called with S3, the pattern {S2,S3} will be detected.
755 * TYPE_IN: The type of the input arguments to the pattern.
757 * TYPE_OUT: The type of the output of this pattern.
759 * Return value: A new stmt that will be used to replace the sequence of
760 stmts that constitute the pattern. In this case it will be:
761 WIDEN_SUM <x_t, sum_0>
763 Note: The widening-sum idiom is a widening reduction pattern that is
764 vectorized without preserving all the intermediate results. It
765 produces only N/2 (widened) results (by summing up pairs of
766 intermediate results) rather than all N results. Therefore, we
767 cannot allow this pattern when we want to get all the results and in
768 the correct order (as is the case when this computation is in an
769 inner-loop nested in an outer-loop that us being vectorized). */
772 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
775 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
777 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
778 tree type, half_type;
780 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
781 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
784 if (!is_gimple_assign (last_stmt))
787 type = gimple_expr_type (last_stmt);
789 /* Look for the following pattern
792 In which DX is at least double the size of X, and sum_1 has been
793 recognized as a reduction variable.
796 /* Starting from LAST_STMT, follow the defs of its uses in search
797 of the above pattern. */
799 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
802 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
805 oprnd0 = gimple_assign_rhs1 (last_stmt);
806 oprnd1 = gimple_assign_rhs2 (last_stmt);
807 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
808 || !types_compatible_p (TREE_TYPE (oprnd1), type))
811 /* So far so good. Since last_stmt was detected as a (summation) reduction,
812 we know that oprnd1 is the reduction variable (defined by a loop-header
813 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
814 Left to check that oprnd0 is defined by a cast from type 'type' to type
817 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
820 oprnd0 = gimple_assign_rhs1 (stmt);
821 *type_in = half_type;
824 /* Pattern detected. Create a stmt to be used to replace the pattern: */
825 var = vect_recog_temp_ssa_var (type, NULL);
826 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
829 if (vect_print_dump_info (REPORT_DETAILS))
831 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
832 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
835 /* We don't allow changing the order of the computation in the inner-loop
836 when doing outer-loop vectorization. */
837 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
843 /* Return TRUE if the operation in STMT can be performed on a smaller type.
846 STMT - a statement to check.
847 DEF - we support operations with two operands, one of which is constant.
848 The other operand can be defined by a demotion operation, or by a
849 previous statement in a sequence of over-promoted operations. In the
850 later case DEF is used to replace that operand. (It is defined by a
851 pattern statement we created for the previous statement in the
855 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
856 NULL, it's the type of DEF.
857 STMTS - additional pattern statements. If a pattern statement (type
858 conversion) is created in this function, its original statement is
862 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
863 operands to use in the new pattern statement for STMT (will be created
864 in vect_recog_over_widening_pattern ()).
865 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
866 statements for STMT: the first one is a type promotion and the second
867 one is the operation itself. We return the type promotion statement
868 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
869 the second pattern statement. */
872 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
873 tree *op0, tree *op1, gimple *new_def_stmt,
874 VEC (gimple, heap) **stmts)
877 tree const_oprnd, oprnd;
878 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
879 gimple def_stmt, new_stmt;
881 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
882 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
884 *new_def_stmt = NULL;
886 if (!is_gimple_assign (stmt))
889 code = gimple_assign_rhs_code (stmt);
890 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
891 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
894 oprnd = gimple_assign_rhs1 (stmt);
895 const_oprnd = gimple_assign_rhs2 (stmt);
896 type = gimple_expr_type (stmt);
898 if (TREE_CODE (oprnd) != SSA_NAME
899 || TREE_CODE (const_oprnd) != INTEGER_CST)
902 /* If we are in the middle of a sequence, we use DEF from a previous
903 statement. Otherwise, OPRND has to be a result of type promotion. */
906 half_type = *new_type;
912 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
913 || !gimple_bb (def_stmt)
914 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
915 || !vinfo_for_stmt (def_stmt))
919 /* Can we perform the operation on a smaller type? */
925 if (!int_fits_type_p (const_oprnd, half_type))
927 /* HALF_TYPE is not enough. Try a bigger type if possible. */
928 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
931 interm_type = build_nonstandard_integer_type (
932 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
933 if (!int_fits_type_p (const_oprnd, interm_type))
940 /* Try intermediate type - HALF_TYPE is not enough for sure. */
941 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
944 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
945 (e.g., if the original value was char, the shift amount is at most 8
946 if we want to use short). */
947 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
950 interm_type = build_nonstandard_integer_type (
951 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
953 if (!vect_supportable_shift (code, interm_type))
959 if (vect_supportable_shift (code, half_type))
962 /* Try intermediate type - HALF_TYPE is not supported. */
963 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
966 interm_type = build_nonstandard_integer_type (
967 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
969 if (!vect_supportable_shift (code, interm_type))
978 /* There are four possible cases:
979 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
980 the first statement in the sequence)
981 a. The original, HALF_TYPE, is not enough - we replace the promotion
982 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
983 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
985 2. OPRND is defined by a pattern statement we created.
986 a. Its type is not sufficient for the operation, we create a new stmt:
987 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
988 this statement in NEW_DEF_STMT, and it is later put in
989 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
990 b. OPRND is good to use in the new statement. */
995 /* Replace the original type conversion HALF_TYPE->TYPE with
996 HALF_TYPE->INTERM_TYPE. */
997 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
999 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1000 /* Check if the already created pattern stmt is what we need. */
1001 if (!is_gimple_assign (new_stmt)
1002 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1003 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1006 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1007 oprnd = gimple_assign_lhs (new_stmt);
1011 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1012 oprnd = gimple_assign_rhs1 (def_stmt);
1013 tmp = create_tmp_reg (interm_type, NULL);
1014 add_referenced_var (tmp);
1015 new_oprnd = make_ssa_name (tmp, NULL);
1016 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1018 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1019 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1025 /* Retrieve the operand before the type promotion. */
1026 oprnd = gimple_assign_rhs1 (def_stmt);
1033 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1034 tmp = create_tmp_reg (interm_type, NULL);
1035 add_referenced_var (tmp);
1036 new_oprnd = make_ssa_name (tmp, NULL);
1037 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1040 *new_def_stmt = new_stmt;
1043 /* Otherwise, OPRND is already set. */
1047 *new_type = interm_type;
1049 *new_type = half_type;
1052 *op1 = fold_convert (*new_type, const_oprnd);
1058 /* Try to find a statement or a sequence of statements that can be performed
1062 TYPE x_T, res0_T, res1_T;
1065 S2 x_T = (TYPE) x_t;
1066 S3 res0_T = op (x_T, C0);
1067 S4 res1_T = op (res0_T, C1);
1068 S5 ... = () res1_T; - type demotion
1070 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1072 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1073 be 'type' or some intermediate type. For now, we expect S5 to be a type
1074 demotion operation. We also check that S3 and S4 have only one use. */
1077 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1078 tree *type_in, tree *type_out)
1080 gimple stmt = VEC_pop (gimple, *stmts);
1081 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1082 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1083 imm_use_iterator imm_iter;
1084 use_operand_p use_p;
1086 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1088 struct loop *loop = (gimple_bb (stmt))->loop_father;
1093 if (!vinfo_for_stmt (stmt)
1094 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1097 new_def_stmt = NULL;
1098 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1099 &op0, &op1, &new_def_stmt,
1108 /* STMT can be performed on a smaller type. Check its uses. */
1109 lhs = gimple_assign_lhs (stmt);
1111 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1113 if (is_gimple_debug (USE_STMT (use_p)))
1115 use_stmt = USE_STMT (use_p);
1119 if (nuses != 1 || !is_gimple_assign (use_stmt)
1120 || !gimple_bb (use_stmt)
1121 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1124 /* Create pattern statement for STMT. */
1125 vectype = get_vectype_for_scalar_type (new_type);
1129 /* We want to collect all the statements for which we create pattern
1130 statetments, except for the case when the last statement in the
1131 sequence doesn't have a corresponding pattern statement. In such
1132 case we associate the last pattern statement with the last statement
1133 in the sequence. Therefore, we only add an original statetement to
1134 the list if we know that it is not the last. */
1136 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1138 var = vect_recog_temp_ssa_var (new_type, NULL);
1140 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1142 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1143 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1145 if (vect_print_dump_info (REPORT_DETAILS))
1147 fprintf (vect_dump, "created pattern stmt: ");
1148 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1157 /* We got a sequence. We expect it to end with a type demotion operation.
1158 Otherwise, we quit (for now). There are three possible cases: the
1159 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1160 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1161 NEW_TYPE differs (we create a new conversion statement). */
1162 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1164 use_lhs = gimple_assign_lhs (use_stmt);
1165 use_type = TREE_TYPE (use_lhs);
1166 /* Support only type promotion or signedess change. */
1167 if (!INTEGRAL_TYPE_P (use_type)
1168 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1171 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1172 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1174 /* Create NEW_TYPE->USE_TYPE conversion. */
1175 tmp = create_tmp_reg (use_type, NULL);
1176 add_referenced_var (tmp);
1177 new_oprnd = make_ssa_name (tmp, NULL);
1178 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1180 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1182 *type_in = get_vectype_for_scalar_type (new_type);
1183 *type_out = get_vectype_for_scalar_type (use_type);
1185 /* We created a pattern statement for the last statement in the
1186 sequence, so we don't need to associate it with the pattern
1187 statement created for PREV_STMT. Therefore, we add PREV_STMT
1188 to the list in order to mark it later in vect_pattern_recog_1. */
1190 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1195 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1196 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1199 *type_out = NULL_TREE;
1202 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1205 /* TODO: support general case, create a conversion to the correct type. */
1208 /* Pattern detected. */
1209 if (vect_print_dump_info (REPORT_DETAILS))
1211 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1212 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1215 return pattern_stmt;
1219 /* Function vect_recog_mixed_size_cond_pattern
1221 Try to find the following pattern:
1226 S1 a_T = x_t CMP y_t ? b_T : c_T;
1228 where type 'TYPE' is an integral type which has different size
1229 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1230 than 'type', the constants need to fit into an integer type
1231 with the same width as 'type'.
1235 * LAST_STMT: A stmt from which the pattern search begins.
1239 * TYPE_IN: The type of the input arguments to the pattern.
1241 * TYPE_OUT: The type of the output of this pattern.
1243 * Return value: A new stmt that will be used to replace the pattern.
1244 Additionally a def_stmt is added.
1246 a_it = x_t CMP y_t ? b_it : c_it;
1247 a_T = (TYPE) a_it; */
1250 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1253 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1254 tree cond_expr, then_clause, else_clause;
1255 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1256 tree type, vectype, comp_vectype, itype, vecitype;
1257 enum machine_mode cmpmode;
1258 gimple pattern_stmt, def_stmt;
1259 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1261 if (!is_gimple_assign (last_stmt)
1262 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1263 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1266 cond_expr = gimple_assign_rhs1 (last_stmt);
1267 then_clause = gimple_assign_rhs2 (last_stmt);
1268 else_clause = gimple_assign_rhs3 (last_stmt);
1270 if (TREE_CODE (then_clause) != INTEGER_CST
1271 || TREE_CODE (else_clause) != INTEGER_CST)
1274 if (!COMPARISON_CLASS_P (cond_expr))
1278 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1279 if (comp_vectype == NULL_TREE)
1282 type = gimple_expr_type (last_stmt);
1283 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1285 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1288 vectype = get_vectype_for_scalar_type (type);
1289 if (vectype == NULL_TREE)
1292 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1295 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1296 TYPE_UNSIGNED (type));
1297 if (itype == NULL_TREE
1298 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1301 vecitype = get_vectype_for_scalar_type (itype);
1302 if (vecitype == NULL_TREE)
1305 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1308 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1310 if (!int_fits_type_p (then_clause, itype)
1311 || !int_fits_type_p (else_clause, itype))
1316 = gimple_build_assign_with_ops3 (COND_EXPR,
1317 vect_recog_temp_ssa_var (itype, NULL),
1318 unshare_expr (cond_expr),
1319 fold_convert (itype, then_clause),
1320 fold_convert (itype, else_clause));
1322 = gimple_build_assign_with_ops (NOP_EXPR,
1323 vect_recog_temp_ssa_var (type, NULL),
1324 gimple_assign_lhs (def_stmt), NULL_TREE);
1326 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
1327 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1328 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1329 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1330 *type_in = vecitype;
1331 *type_out = vectype;
1333 return pattern_stmt;
1337 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1338 true if bool VAR can be optimized that way. */
1341 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1344 enum vect_def_type dt;
1346 enum tree_code rhs_code;
1348 if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
1351 if (dt != vect_internal_def)
1354 if (!is_gimple_assign (def_stmt))
1357 if (!has_single_use (def))
1360 rhs1 = gimple_assign_rhs1 (def_stmt);
1361 rhs_code = gimple_assign_rhs_code (def_stmt);
1365 return check_bool_pattern (rhs1, loop_vinfo);
1368 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1369 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1370 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1372 return check_bool_pattern (rhs1, loop_vinfo);
1375 return check_bool_pattern (rhs1, loop_vinfo);
1380 if (!check_bool_pattern (rhs1, loop_vinfo))
1382 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1385 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1387 tree vecitype, comp_vectype;
1389 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1390 if (comp_vectype == NULL_TREE)
1393 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1395 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1397 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
1398 vecitype = get_vectype_for_scalar_type (itype);
1399 if (vecitype == NULL_TREE)
1403 vecitype = comp_vectype;
1404 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1411 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1412 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1413 to PATTERN_DEF_STMT and adding a cast as RELATED_STMT. */
1416 adjust_bool_pattern_cast (tree type, tree var)
1418 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1419 gimple cast_stmt, pattern_stmt;
1421 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo));
1422 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1423 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt;
1425 = gimple_build_assign_with_ops (NOP_EXPR,
1426 vect_recog_temp_ssa_var (type, NULL),
1427 gimple_assign_lhs (pattern_stmt),
1429 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
1430 return gimple_assign_lhs (cast_stmt);
1434 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
1435 recursively. VAR is an SSA_NAME that should be transformed from bool
1436 to a wider integer type, OUT_TYPE is the desired final integer type of
1437 the whole pattern, TRUEVAL should be NULL unless optimizing
1438 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1439 in the then_clause, STMTS is where statements with added pattern stmts
1440 should be pushed to. */
1443 adjust_bool_pattern (tree var, tree out_type, tree trueval,
1444 VEC (gimple, heap) **stmts)
1446 gimple stmt = SSA_NAME_DEF_STMT (var);
1447 enum tree_code rhs_code, def_rhs_code;
1448 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
1450 gimple pattern_stmt, def_stmt;
1452 rhs1 = gimple_assign_rhs1 (stmt);
1453 rhs2 = gimple_assign_rhs2 (stmt);
1454 rhs_code = gimple_assign_rhs_code (stmt);
1455 loc = gimple_location (stmt);
1460 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1461 itype = TREE_TYPE (irhs1);
1463 = gimple_build_assign_with_ops (SSA_NAME,
1464 vect_recog_temp_ssa_var (itype, NULL),
1469 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1470 itype = TREE_TYPE (irhs1);
1472 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
1473 vect_recog_temp_ssa_var (itype, NULL),
1474 irhs1, build_int_cst (itype, 1));
1478 /* Try to optimize x = y & (a < b ? 1 : 0); into
1479 x = (a < b ? y : 0);
1485 S1 a_b = x1 CMP1 y1;
1486 S2 b_b = x2 CMP2 y2;
1488 S4 d_T = (TYPE) c_b;
1490 we would normally emit:
1492 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1493 S2' b_T = x2 CMP2 y2 ? 1 : 0;
1494 S3' c_T = a_T & b_T;
1497 but we can save one stmt by using the
1498 result of one of the COND_EXPRs in the other COND_EXPR and leave
1499 BIT_AND_EXPR stmt out:
1501 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1502 S3' c_T = x2 CMP2 y2 ? a_T : 0;
1505 At least when VEC_COND_EXPR is implemented using masks
1506 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
1507 computes the comparison masks and ands it, in one case with
1508 all ones vector, in the other case with a vector register.
1509 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
1510 often more expensive. */
1511 def_stmt = SSA_NAME_DEF_STMT (rhs2);
1512 def_rhs_code = gimple_assign_rhs_code (def_stmt);
1513 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
1515 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
1516 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1517 if (TYPE_PRECISION (TREE_TYPE (irhs1))
1518 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
1521 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
1522 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
1523 tstmt = VEC_pop (gimple, *stmts);
1524 gcc_assert (tstmt == def_stmt);
1525 VEC_quick_push (gimple, *stmts, stmt);
1526 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
1527 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
1528 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
1529 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
1533 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1536 def_stmt = SSA_NAME_DEF_STMT (rhs1);
1537 def_rhs_code = gimple_assign_rhs_code (def_stmt);
1538 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
1540 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
1541 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1542 if (TYPE_PRECISION (TREE_TYPE (irhs2))
1543 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
1546 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
1547 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
1548 tstmt = VEC_pop (gimple, *stmts);
1549 gcc_assert (tstmt == def_stmt);
1550 VEC_quick_push (gimple, *stmts, stmt);
1551 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
1552 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
1553 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
1554 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
1558 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1564 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1565 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
1567 if (TYPE_PRECISION (TREE_TYPE (irhs1))
1568 != TYPE_PRECISION (TREE_TYPE (irhs2)))
1570 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
1571 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
1572 int out_prec = TYPE_PRECISION (out_type);
1573 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
1574 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
1575 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
1576 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
1579 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
1580 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
1583 itype = TREE_TYPE (irhs1);
1585 = gimple_build_assign_with_ops (rhs_code,
1586 vect_recog_temp_ssa_var (itype, NULL),
1591 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
1592 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
1593 || TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1595 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1597 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
1600 itype = TREE_TYPE (rhs1);
1601 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
1602 if (trueval == NULL_TREE)
1603 trueval = build_int_cst (itype, 1);
1605 gcc_checking_assert (useless_type_conversion_p (itype,
1606 TREE_TYPE (trueval)));
1608 = gimple_build_assign_with_ops3 (COND_EXPR,
1609 vect_recog_temp_ssa_var (itype, NULL),
1611 build_int_cst (itype, 0));
1615 VEC_safe_push (gimple, heap, *stmts, stmt);
1616 gimple_set_location (pattern_stmt, loc);
1617 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1618 return gimple_assign_lhs (pattern_stmt);
1622 /* Function vect_recog_bool_pattern
1624 Try to find pattern like following:
1626 bool a_b, b_b, c_b, d_b, e_b;
1629 S1 a_b = x1 CMP1 y1;
1630 S2 b_b = x2 CMP2 y2;
1632 S4 d_b = x3 CMP3 y3;
1634 S6 f_T = (TYPE) e_b;
1636 where type 'TYPE' is an integral type.
1640 * LAST_STMT: A stmt at the end from which the pattern
1641 search begins, i.e. cast of a bool to
1646 * TYPE_IN: The type of the input arguments to the pattern.
1648 * TYPE_OUT: The type of the output of this pattern.
1650 * Return value: A new stmt that will be used to replace the pattern.
1652 Assuming size of TYPE is the same as size of all comparisons
1653 (otherwise some casts would be added where needed), the above
1654 sequence we create related pattern stmts:
1655 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1656 S3' c_T = x2 CMP2 y2 ? a_T : 0;
1657 S4' d_T = x3 CMP3 y3 ? 1 : 0;
1658 S5' e_T = c_T | d_T;
1661 Instead of the above S3' we could emit:
1662 S2' b_T = x2 CMP2 y2 ? 1 : 0;
1663 S3' c_T = a_T | b_T;
1664 but the above is more efficient. */
1667 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1670 gimple last_stmt = VEC_pop (gimple, *stmts);
1671 enum tree_code rhs_code;
1672 tree var, lhs, rhs, vectype;
1673 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1674 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1675 gimple pattern_stmt;
1677 if (!is_gimple_assign (last_stmt))
1680 var = gimple_assign_rhs1 (last_stmt);
1681 lhs = gimple_assign_lhs (last_stmt);
1683 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
1684 || !TYPE_UNSIGNED (TREE_TYPE (var)))
1685 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
1688 rhs_code = gimple_assign_rhs_code (last_stmt);
1689 if (CONVERT_EXPR_CODE_P (rhs_code))
1691 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE)
1693 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
1694 if (vectype == NULL_TREE)
1697 if (!check_bool_pattern (var, loop_vinfo))
1700 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
1701 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1702 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
1704 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
1707 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
1708 *type_out = vectype;
1710 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1711 return pattern_stmt;
1718 /* Mark statements that are involved in a pattern. */
1721 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
1722 tree pattern_vectype)
1724 stmt_vec_info pattern_stmt_info, def_stmt_info;
1725 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1726 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
1729 set_vinfo_for_stmt (pattern_stmt,
1730 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1731 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1732 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1734 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
1735 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1736 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1737 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1738 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
1739 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
1740 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
1741 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
1742 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
1744 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
1745 def_stmt_info = vinfo_for_stmt (def_stmt);
1746 if (def_stmt_info == NULL)
1748 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1749 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1751 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
1752 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
1753 STMT_VINFO_DEF_TYPE (def_stmt_info)
1754 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1755 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
1756 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
1760 /* Function vect_pattern_recog_1
1763 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
1764 computation pattern.
1765 STMT: A stmt from which the pattern search should start.
1767 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
1768 expression that computes the same functionality and can be used to
1769 replace the sequence of stmts that are involved in the pattern.
1772 This function checks if the expression returned by PATTERN_RECOG_FUNC is
1773 supported in vector form by the target. We use 'TYPE_IN' to obtain the
1774 relevant vector type. If 'TYPE_IN' is already a vector type, then this
1775 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
1776 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
1777 to the available target pattern.
1779 This function also does some bookkeeping, as explained in the documentation
1780 for vect_recog_pattern. */
1783 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
1784 gimple_stmt_iterator si,
1785 VEC (gimple, heap) **stmts_to_replace)
1787 gimple stmt = gsi_stmt (si), pattern_stmt;
1788 stmt_vec_info stmt_info;
1789 loop_vec_info loop_vinfo;
1790 tree pattern_vectype;
1791 tree type_in, type_out;
1792 enum tree_code code;
1796 VEC_truncate (gimple, *stmts_to_replace, 0);
1797 VEC_quick_push (gimple, *stmts_to_replace, stmt);
1798 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
1802 stmt = VEC_last (gimple, *stmts_to_replace);
1803 stmt_info = vinfo_for_stmt (stmt);
1804 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1806 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
1808 /* No need to check target support (already checked by the pattern
1809 recognition function). */
1810 pattern_vectype = type_out ? type_out : type_in;
1814 enum machine_mode vec_mode;
1815 enum insn_code icode;
1818 /* Check target support */
1819 type_in = get_vectype_for_scalar_type (type_in);
1823 type_out = get_vectype_for_scalar_type (type_out);
1828 pattern_vectype = type_out;
1830 if (is_gimple_assign (pattern_stmt))
1831 code = gimple_assign_rhs_code (pattern_stmt);
1834 gcc_assert (is_gimple_call (pattern_stmt));
1838 optab = optab_for_tree_code (code, type_in, optab_default);
1839 vec_mode = TYPE_MODE (type_in);
1841 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
1842 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
1846 /* Found a vectorizable pattern. */
1847 if (vect_print_dump_info (REPORT_DETAILS))
1849 fprintf (vect_dump, "pattern recognized: ");
1850 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1853 /* Mark the stmts that are involved in the pattern. */
1854 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
1856 /* Patterns cannot be vectorized using SLP, because they change the order of
1858 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
1860 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
1862 /* It is possible that additional pattern stmts are created and inserted in
1863 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1864 relevant statements. */
1865 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
1866 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
1869 stmt_info = vinfo_for_stmt (stmt);
1870 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1871 if (vect_print_dump_info (REPORT_DETAILS))
1873 fprintf (vect_dump, "additional pattern stmt: ");
1874 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1877 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1882 /* Function vect_pattern_recog
1885 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
1888 Output - for each computation idiom that is detected we create a new stmt
1889 that provides the same functionality and that can be vectorized. We
1890 also record some information in the struct_stmt_info of the relevant
1891 stmts, as explained below:
1893 At the entry to this function we have the following stmts, with the
1894 following initial value in the STMT_VINFO fields:
1896 stmt in_pattern_p related_stmt vec_stmt
1897 S1: a_i = .... - - -
1898 S2: a_2 = ..use(a_i).. - - -
1899 S3: a_1 = ..use(a_2).. - - -
1900 S4: a_0 = ..use(a_1).. - - -
1901 S5: ... = ..use(a_0).. - - -
1903 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
1904 represented by a single stmt. We then:
1905 - create a new stmt S6 equivalent to the pattern (the stmt is not
1906 inserted into the code)
1907 - fill in the STMT_VINFO fields as follows:
1909 in_pattern_p related_stmt vec_stmt
1910 S1: a_i = .... - - -
1911 S2: a_2 = ..use(a_i).. - - -
1912 S3: a_1 = ..use(a_2).. - - -
1913 S4: a_0 = ..use(a_1).. true S6 -
1914 '---> S6: a_new = .... - S4 -
1915 S5: ... = ..use(a_0).. - - -
1917 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
1918 to each other through the RELATED_STMT field).
1920 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
1921 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
1922 remain irrelevant unless used by stmts other than S4.
1924 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
1925 (because they are marked as irrelevant). It will vectorize S6, and record
1926 a pointer to the new vector stmt VS6 from S6 (as usual).
1927 S4 will be skipped, and S5 will be vectorized as usual:
1929 in_pattern_p related_stmt vec_stmt
1930 S1: a_i = .... - - -
1931 S2: a_2 = ..use(a_i).. - - -
1932 S3: a_1 = ..use(a_2).. - - -
1933 > VS6: va_new = .... - - -
1934 S4: a_0 = ..use(a_1).. true S6 VS6
1935 '---> S6: a_new = .... - S4 VS6
1936 > VS5: ... = ..vuse(va_new).. - - -
1937 S5: ... = ..use(a_0).. - - -
1939 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
1940 elsewhere), and we'll end up with:
1943 VS5: ... = ..vuse(va_new)..
1945 In case of more than one pattern statements, e.g., widen-mult with
1949 S2 a_T = (TYPE) a_t;
1950 '--> S3: a_it = (interm_type) a_t;
1951 S4 prod_T = a_T * CONST;
1952 '--> S5: prod_T' = a_it w* CONST;
1954 there may be other users of a_T outside the pattern. In that case S2 will
1955 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
1956 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
1957 be recorded in S3. */
1960 vect_pattern_recog (loop_vec_info loop_vinfo)
1962 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1963 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1964 unsigned int nbbs = loop->num_nodes;
1965 gimple_stmt_iterator si;
1967 vect_recog_func_ptr vect_recog_func;
1968 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1970 if (vect_print_dump_info (REPORT_DETAILS))
1971 fprintf (vect_dump, "=== vect_pattern_recog ===");
1973 /* Scan through the loop stmts, applying the pattern recognition
1974 functions starting at each stmt visited: */
1975 for (i = 0; i < nbbs; i++)
1977 basic_block bb = bbs[i];
1978 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1980 /* Scan over all generic vect_recog_xxx_pattern functions. */
1981 for (j = 0; j < NUM_PATTERNS; j++)
1983 vect_recog_func = vect_vect_recog_func_ptrs[j];
1984 vect_pattern_recog_1 (vect_recog_func, si,
1990 VEC_free (gimple, heap, stmts_to_replace);