1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_sdivmod_pow2_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* If the LHS of DEF_STMT has a single use, and that statement is
88 in the same loop, return it. */
91 vect_single_imm_use (gimple def_stmt)
93 stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
94 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 tree lhs = gimple_assign_lhs (def_stmt);
100 if (!single_imm_use (lhs, &use_p, &use_stmt))
103 if (!gimple_bb (use_stmt))
106 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
109 gcc_assert (vinfo_for_stmt (use_stmt));
113 /* Function widened_name_p
115 Check whether NAME, an ssa-name used in USE_STMT,
116 is a result of a type-promotion, such that:
117 DEF_STMT: NAME = NOP (name0)
118 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
119 If CHECK_SIGN is TRUE, check that either both types are signed or both are
123 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
128 loop_vec_info loop_vinfo;
129 stmt_vec_info stmt_vinfo;
130 tree type = TREE_TYPE (name);
132 enum vect_def_type dt;
135 stmt_vinfo = vinfo_for_stmt (use_stmt);
136 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
138 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, NULL, def_stmt, &def,
142 if (dt != vect_internal_def
143 && dt != vect_external_def && dt != vect_constant_def)
149 if (!is_gimple_assign (*def_stmt))
152 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
155 oprnd0 = gimple_assign_rhs1 (*def_stmt);
157 *half_type = TREE_TYPE (oprnd0);
158 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
159 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
160 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
163 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
164 NULL, &dummy_gimple, &dummy, &dt))
170 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
171 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
174 vect_recog_temp_ssa_var (tree type, gimple stmt)
176 tree var = create_tmp_var (type, "patt");
178 add_referenced_var (var);
179 var = make_ssa_name (var, stmt);
183 /* Function vect_recog_dot_prod_pattern
185 Try to find the following pattern:
191 sum_0 = phi <init, sum_1>
194 S3 x_T = (TYPE1) x_t;
195 S4 y_T = (TYPE1) y_t;
197 [S6 prod = (TYPE2) prod; #optional]
198 S7 sum_1 = prod + sum_0;
200 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
201 same size of 'TYPE1' or bigger. This is a special case of a reduction
206 * STMTS: Contains a stmt from which the pattern search begins. In the
207 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
212 * TYPE_IN: The type of the input arguments to the pattern.
214 * TYPE_OUT: The type of the output of this pattern.
216 * Return value: A new stmt that will be used to replace the sequence of
217 stmts that constitute the pattern. In this case it will be:
218 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
220 Note: The dot-prod idiom is a widening reduction pattern that is
221 vectorized without preserving all the intermediate results. It
222 produces only N/2 (widened) results (by summing up pairs of
223 intermediate results) rather than all N results. Therefore, we
224 cannot allow this pattern when we want to get all the results and in
225 the correct order (as is the case when this computation is in an
226 inner-loop nested in an outer-loop that us being vectorized). */
229 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
232 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
234 tree oprnd00, oprnd01;
235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
236 tree type, half_type;
239 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
240 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
243 if (!is_gimple_assign (last_stmt))
246 type = gimple_expr_type (last_stmt);
248 /* Look for the following pattern
252 DDPROD = (TYPE2) DPROD;
253 sum_1 = DDPROD + sum_0;
255 - DX is double the size of X
256 - DY is double the size of Y
257 - DX, DY, DPROD all have the same type
258 - sum is the same size of DPROD or bigger
259 - sum has been recognized as a reduction variable.
261 This is equivalent to:
262 DPROD = X w* Y; #widen mult
263 sum_1 = DPROD w+ sum_0; #widen summation
265 DPROD = X w* Y; #widen mult
266 sum_1 = DPROD + sum_0; #summation
269 /* Starting from LAST_STMT, follow the defs of its uses in search
270 of the above pattern. */
272 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
275 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
277 /* Has been detected as widening-summation? */
279 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
280 type = gimple_expr_type (stmt);
281 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
283 oprnd0 = gimple_assign_rhs1 (stmt);
284 oprnd1 = gimple_assign_rhs2 (stmt);
285 half_type = TREE_TYPE (oprnd0);
291 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
293 oprnd0 = gimple_assign_rhs1 (last_stmt);
294 oprnd1 = gimple_assign_rhs2 (last_stmt);
295 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
296 || !types_compatible_p (TREE_TYPE (oprnd1), type))
300 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
303 oprnd0 = gimple_assign_rhs1 (stmt);
309 /* So far so good. Since last_stmt was detected as a (summation) reduction,
310 we know that oprnd1 is the reduction variable (defined by a loop-header
311 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
312 Left to check that oprnd0 is defined by a (widen_)mult_expr */
313 if (TREE_CODE (oprnd0) != SSA_NAME)
316 prod_type = half_type;
317 stmt = SSA_NAME_DEF_STMT (oprnd0);
319 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
320 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
323 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
324 inside the loop (in case we are analyzing an outer-loop). */
325 if (!is_gimple_assign (stmt))
327 stmt_vinfo = vinfo_for_stmt (stmt);
328 gcc_assert (stmt_vinfo);
329 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
331 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
333 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
335 /* Has been detected as a widening multiplication? */
337 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
338 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
340 stmt_vinfo = vinfo_for_stmt (stmt);
341 gcc_assert (stmt_vinfo);
342 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
343 oprnd00 = gimple_assign_rhs1 (stmt);
344 oprnd01 = gimple_assign_rhs2 (stmt);
348 tree half_type0, half_type1;
352 oprnd0 = gimple_assign_rhs1 (stmt);
353 oprnd1 = gimple_assign_rhs2 (stmt);
354 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
355 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
357 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
359 oprnd00 = gimple_assign_rhs1 (def_stmt);
360 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
362 oprnd01 = gimple_assign_rhs1 (def_stmt);
363 if (!types_compatible_p (half_type0, half_type1))
365 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
369 half_type = TREE_TYPE (oprnd00);
370 *type_in = half_type;
373 /* Pattern detected. Create a stmt to be used to replace the pattern: */
374 var = vect_recog_temp_ssa_var (type, NULL);
375 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
376 oprnd00, oprnd01, oprnd1);
378 if (vect_print_dump_info (REPORT_DETAILS))
380 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
381 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
384 /* We don't allow changing the order of the computation in the inner-loop
385 when doing outer-loop vectorization. */
386 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
392 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
395 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
396 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
398 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
399 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
400 that satisfies the above restrictions, we can perform a widening opeartion
401 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
402 with a_it = (interm_type) a_t; */
405 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
406 tree const_oprnd, tree *oprnd,
407 VEC (gimple, heap) **stmts, tree type,
408 tree *half_type, gimple def_stmt)
410 tree new_type, new_oprnd, tmp;
412 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
413 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
415 if (code != MULT_EXPR && code != LSHIFT_EXPR)
418 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
419 || (code == LSHIFT_EXPR
420 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
422 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
424 /* CONST_OPRND is a constant of HALF_TYPE. */
425 *oprnd = gimple_assign_rhs1 (def_stmt);
429 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
430 || !gimple_bb (def_stmt)
431 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
432 || !vinfo_for_stmt (def_stmt))
435 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
436 a type 2 times bigger than HALF_TYPE. */
437 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
438 TYPE_UNSIGNED (type));
439 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
440 || (code == LSHIFT_EXPR
441 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
444 /* Use NEW_TYPE for widening operation. */
445 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
447 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
448 /* Check if the already created pattern stmt is what we need. */
449 if (!is_gimple_assign (new_stmt)
450 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
451 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
454 VEC_safe_push (gimple, heap, *stmts, def_stmt);
455 *oprnd = gimple_assign_lhs (new_stmt);
459 /* Create a_T = (NEW_TYPE) a_t; */
460 *oprnd = gimple_assign_rhs1 (def_stmt);
461 tmp = create_tmp_var (new_type, NULL);
462 add_referenced_var (tmp);
463 new_oprnd = make_ssa_name (tmp, NULL);
464 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
466 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
467 VEC_safe_push (gimple, heap, *stmts, def_stmt);
471 *half_type = new_type;
476 /* Function vect_recog_widen_mult_pattern
478 Try to find the following pattern:
481 TYPE a_T, b_T, prod_T;
487 S5 prod_T = a_T * b_T;
489 where type 'TYPE' is at least double the size of type 'type'.
491 Also detect unsgigned cases:
493 unsigned type a_t, b_t;
494 unsigned TYPE u_prod_T;
495 TYPE a_T, b_T, prod_T;
501 S5 prod_T = a_T * b_T;
502 S6 u_prod_T = (unsigned TYPE) prod_T;
504 and multiplication by constants:
511 S5 prod_T = a_T * CONST;
513 A special case of multiplication by constants is when 'TYPE' is 4 times
514 bigger than 'type', but CONST fits an intermediate type 2 times smaller
515 than 'TYPE'. In that case we create an additional pattern stmt for S3
516 to create a variable of the intermediate type, and perform widen-mult
517 on the intermediate type as well:
521 TYPE a_T, prod_T, prod_T';
525 '--> a_it = (interm_type) a_t;
526 S5 prod_T = a_T * CONST;
527 '--> prod_T' = a_it w* CONST;
531 * STMTS: Contains a stmt from which the pattern search begins. In the
532 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
533 is detected. In case of unsigned widen-mult, the original stmt (S5) is
534 replaced with S6 in STMTS. In case of multiplication by a constant
535 of an intermediate type (the last case above), STMTS also contains S3
536 (inserted before S5).
540 * TYPE_IN: The type of the input arguments to the pattern.
542 * TYPE_OUT: The type of the output of this pattern.
544 * Return value: A new stmt that will be used to replace the sequence of
545 stmts that constitute the pattern. In this case it will be:
546 WIDEN_MULT <a_t, b_t>
550 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
551 tree *type_in, tree *type_out)
553 gimple last_stmt = VEC_pop (gimple, *stmts);
554 gimple def_stmt0, def_stmt1;
556 tree type, half_type0, half_type1;
558 tree vectype, vectype_out = NULL_TREE;
561 enum tree_code dummy_code;
563 VEC (tree, heap) *dummy_vec;
566 if (!is_gimple_assign (last_stmt))
569 type = gimple_expr_type (last_stmt);
571 /* Starting from LAST_STMT, follow the defs of its uses in search
572 of the above pattern. */
574 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
577 oprnd0 = gimple_assign_rhs1 (last_stmt);
578 oprnd1 = gimple_assign_rhs2 (last_stmt);
579 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
580 || !types_compatible_p (TREE_TYPE (oprnd1), type))
583 /* Check argument 0. */
584 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
586 /* Check argument 1. */
587 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
591 oprnd0 = gimple_assign_rhs1 (def_stmt0);
592 oprnd1 = gimple_assign_rhs1 (def_stmt1);
596 if (TREE_CODE (oprnd1) == INTEGER_CST
597 && TREE_CODE (half_type0) == INTEGER_TYPE
598 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
599 &oprnd0, stmts, type,
600 &half_type0, def_stmt0))
601 half_type1 = half_type0;
606 /* Handle unsigned case. Look for
607 S6 u_prod_T = (unsigned TYPE) prod_T;
608 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
609 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
615 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
618 use_stmt = vect_single_imm_use (last_stmt);
619 if (!use_stmt || !is_gimple_assign (use_stmt)
620 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
623 use_lhs = gimple_assign_lhs (use_stmt);
624 use_type = TREE_TYPE (use_lhs);
625 if (!INTEGRAL_TYPE_P (use_type)
626 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
627 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
631 last_stmt = use_stmt;
634 if (!types_compatible_p (half_type0, half_type1))
637 /* Pattern detected. */
638 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
641 /* Check target support */
642 vectype = get_vectype_for_scalar_type (half_type0);
643 vectype_out = get_vectype_for_scalar_type (type);
646 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
647 vectype_out, vectype,
648 &dummy, &dummy, &dummy_code,
649 &dummy_code, &dummy_int, &dummy_vec))
653 *type_out = vectype_out;
655 /* Pattern supported. Create a stmt to be used to replace the pattern: */
656 var = vect_recog_temp_ssa_var (type, NULL);
657 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
660 if (vect_print_dump_info (REPORT_DETAILS))
661 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
663 VEC_safe_push (gimple, heap, *stmts, last_stmt);
668 /* Function vect_recog_pow_pattern
670 Try to find the following pattern:
674 with POW being one of pow, powf, powi, powif and N being
679 * LAST_STMT: A stmt from which the pattern search begins.
683 * TYPE_IN: The type of the input arguments to the pattern.
685 * TYPE_OUT: The type of the output of this pattern.
687 * Return value: A new stmt that will be used to replace the sequence of
688 stmts that constitute the pattern. In this case it will be:
695 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
698 gimple last_stmt = VEC_index (gimple, *stmts, 0);
699 tree fn, base, exp = NULL;
703 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
706 fn = gimple_call_fndecl (last_stmt);
707 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
710 switch (DECL_FUNCTION_CODE (fn))
716 base = gimple_call_arg (last_stmt, 0);
717 exp = gimple_call_arg (last_stmt, 1);
718 if (TREE_CODE (exp) != REAL_CST
719 && TREE_CODE (exp) != INTEGER_CST)
727 /* We now have a pow or powi builtin function call with a constant
730 *type_out = NULL_TREE;
732 /* Catch squaring. */
733 if ((host_integerp (exp, 0)
734 && tree_low_cst (exp, 0) == 2)
735 || (TREE_CODE (exp) == REAL_CST
736 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
738 *type_in = TREE_TYPE (base);
740 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
741 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
745 /* Catch square root. */
746 if (TREE_CODE (exp) == REAL_CST
747 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
749 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
750 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
753 gimple stmt = gimple_build_call (newfn, 1, base);
754 if (vectorizable_function (stmt, *type_in, *type_in)
757 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
758 gimple_call_set_lhs (stmt, var);
768 /* Function vect_recog_widen_sum_pattern
770 Try to find the following pattern:
773 TYPE x_T, sum = init;
775 sum_0 = phi <init, sum_1>
778 S3 sum_1 = x_T + sum_0;
780 where type 'TYPE' is at least double the size of type 'type', i.e - we're
781 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
782 a special case of a reduction computation.
786 * LAST_STMT: A stmt from which the pattern search begins. In the example,
787 when this function is called with S3, the pattern {S2,S3} will be detected.
791 * TYPE_IN: The type of the input arguments to the pattern.
793 * TYPE_OUT: The type of the output of this pattern.
795 * Return value: A new stmt that will be used to replace the sequence of
796 stmts that constitute the pattern. In this case it will be:
797 WIDEN_SUM <x_t, sum_0>
799 Note: The widening-sum idiom is a widening reduction pattern that is
800 vectorized without preserving all the intermediate results. It
801 produces only N/2 (widened) results (by summing up pairs of
802 intermediate results) rather than all N results. Therefore, we
803 cannot allow this pattern when we want to get all the results and in
804 the correct order (as is the case when this computation is in an
805 inner-loop nested in an outer-loop that us being vectorized). */
808 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
811 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
813 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
814 tree type, half_type;
816 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
817 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
820 if (!is_gimple_assign (last_stmt))
823 type = gimple_expr_type (last_stmt);
825 /* Look for the following pattern
828 In which DX is at least double the size of X, and sum_1 has been
829 recognized as a reduction variable.
832 /* Starting from LAST_STMT, follow the defs of its uses in search
833 of the above pattern. */
835 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
838 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
841 oprnd0 = gimple_assign_rhs1 (last_stmt);
842 oprnd1 = gimple_assign_rhs2 (last_stmt);
843 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
844 || !types_compatible_p (TREE_TYPE (oprnd1), type))
847 /* So far so good. Since last_stmt was detected as a (summation) reduction,
848 we know that oprnd1 is the reduction variable (defined by a loop-header
849 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
850 Left to check that oprnd0 is defined by a cast from type 'type' to type
853 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
856 oprnd0 = gimple_assign_rhs1 (stmt);
857 *type_in = half_type;
860 /* Pattern detected. Create a stmt to be used to replace the pattern: */
861 var = vect_recog_temp_ssa_var (type, NULL);
862 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
865 if (vect_print_dump_info (REPORT_DETAILS))
867 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
868 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
871 /* We don't allow changing the order of the computation in the inner-loop
872 when doing outer-loop vectorization. */
873 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
879 /* Return TRUE if the operation in STMT can be performed on a smaller type.
882 STMT - a statement to check.
883 DEF - we support operations with two operands, one of which is constant.
884 The other operand can be defined by a demotion operation, or by a
885 previous statement in a sequence of over-promoted operations. In the
886 later case DEF is used to replace that operand. (It is defined by a
887 pattern statement we created for the previous statement in the
891 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
892 NULL, it's the type of DEF.
893 STMTS - additional pattern statements. If a pattern statement (type
894 conversion) is created in this function, its original statement is
898 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
899 operands to use in the new pattern statement for STMT (will be created
900 in vect_recog_over_widening_pattern ()).
901 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
902 statements for STMT: the first one is a type promotion and the second
903 one is the operation itself. We return the type promotion statement
904 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
905 the second pattern statement. */
908 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
909 tree *op0, tree *op1, gimple *new_def_stmt,
910 VEC (gimple, heap) **stmts)
913 tree const_oprnd, oprnd;
914 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
915 gimple def_stmt, new_stmt;
917 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
918 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
922 *new_def_stmt = NULL;
924 if (!is_gimple_assign (stmt))
927 code = gimple_assign_rhs_code (stmt);
928 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
929 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
932 oprnd = gimple_assign_rhs1 (stmt);
933 const_oprnd = gimple_assign_rhs2 (stmt);
934 type = gimple_expr_type (stmt);
936 if (TREE_CODE (oprnd) != SSA_NAME
937 || TREE_CODE (const_oprnd) != INTEGER_CST)
940 /* If we are in the middle of a sequence, we use DEF from a previous
941 statement. Otherwise, OPRND has to be a result of type promotion. */
944 half_type = *new_type;
950 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
951 || !gimple_bb (def_stmt)
952 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
953 || !vinfo_for_stmt (def_stmt))
957 /* Can we perform the operation on a smaller type? */
963 if (!int_fits_type_p (const_oprnd, half_type))
965 /* HALF_TYPE is not enough. Try a bigger type if possible. */
966 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
969 interm_type = build_nonstandard_integer_type (
970 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
971 if (!int_fits_type_p (const_oprnd, interm_type))
978 /* Try intermediate type - HALF_TYPE is not enough for sure. */
979 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
982 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
983 (e.g., if the original value was char, the shift amount is at most 8
984 if we want to use short). */
985 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
988 interm_type = build_nonstandard_integer_type (
989 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
991 if (!vect_supportable_shift (code, interm_type))
997 if (vect_supportable_shift (code, half_type))
1000 /* Try intermediate type - HALF_TYPE is not supported. */
1001 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1004 interm_type = build_nonstandard_integer_type (
1005 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1007 if (!vect_supportable_shift (code, interm_type))
1016 /* There are four possible cases:
1017 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1018 the first statement in the sequence)
1019 a. The original, HALF_TYPE, is not enough - we replace the promotion
1020 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1021 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1023 2. OPRND is defined by a pattern statement we created.
1024 a. Its type is not sufficient for the operation, we create a new stmt:
1025 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1026 this statement in NEW_DEF_STMT, and it is later put in
1027 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1028 b. OPRND is good to use in the new statement. */
1033 /* Replace the original type conversion HALF_TYPE->TYPE with
1034 HALF_TYPE->INTERM_TYPE. */
1035 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1037 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1038 /* Check if the already created pattern stmt is what we need. */
1039 if (!is_gimple_assign (new_stmt)
1040 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1041 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1044 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1045 oprnd = gimple_assign_lhs (new_stmt);
1049 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1050 oprnd = gimple_assign_rhs1 (def_stmt);
1051 tmp = create_tmp_reg (interm_type, NULL);
1052 add_referenced_var (tmp);
1053 new_oprnd = make_ssa_name (tmp, NULL);
1054 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1056 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1057 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1063 /* Retrieve the operand before the type promotion. */
1064 oprnd = gimple_assign_rhs1 (def_stmt);
1071 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1072 tmp = create_tmp_reg (interm_type, NULL);
1073 add_referenced_var (tmp);
1074 new_oprnd = make_ssa_name (tmp, NULL);
1075 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1078 *new_def_stmt = new_stmt;
1081 /* Otherwise, OPRND is already set. */
1085 *new_type = interm_type;
1087 *new_type = half_type;
1090 *op1 = fold_convert (*new_type, const_oprnd);
1096 /* Try to find a statement or a sequence of statements that can be performed
1100 TYPE x_T, res0_T, res1_T;
1103 S2 x_T = (TYPE) x_t;
1104 S3 res0_T = op (x_T, C0);
1105 S4 res1_T = op (res0_T, C1);
1106 S5 ... = () res1_T; - type demotion
1108 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1110 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1111 be 'type' or some intermediate type. For now, we expect S5 to be a type
1112 demotion operation. We also check that S3 and S4 have only one use. */
1115 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1116 tree *type_in, tree *type_out)
1118 gimple stmt = VEC_pop (gimple, *stmts);
1119 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1120 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1121 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1128 if (!vinfo_for_stmt (stmt)
1129 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1132 new_def_stmt = NULL;
1133 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1134 &op0, &op1, &new_def_stmt,
1143 /* STMT can be performed on a smaller type. Check its uses. */
1144 use_stmt = vect_single_imm_use (stmt);
1145 if (!use_stmt || !is_gimple_assign (use_stmt))
1148 /* Create pattern statement for STMT. */
1149 vectype = get_vectype_for_scalar_type (new_type);
1153 /* We want to collect all the statements for which we create pattern
1154 statetments, except for the case when the last statement in the
1155 sequence doesn't have a corresponding pattern statement. In such
1156 case we associate the last pattern statement with the last statement
1157 in the sequence. Therefore, we only add the original statement to
1158 the list if we know that it is not the last. */
1160 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1162 var = vect_recog_temp_ssa_var (new_type, NULL);
1164 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1166 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1167 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1169 if (vect_print_dump_info (REPORT_DETAILS))
1171 fprintf (vect_dump, "created pattern stmt: ");
1172 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1175 type = gimple_expr_type (stmt);
1182 /* We got a sequence. We expect it to end with a type demotion operation.
1183 Otherwise, we quit (for now). There are three possible cases: the
1184 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1185 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1186 NEW_TYPE differs (we create a new conversion statement). */
1187 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1189 use_lhs = gimple_assign_lhs (use_stmt);
1190 use_type = TREE_TYPE (use_lhs);
1191 /* Support only type demotion or signedess change. */
1192 if (!INTEGRAL_TYPE_P (use_type)
1193 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1196 /* Check that NEW_TYPE is not bigger than the conversion result. */
1197 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1200 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1201 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1203 /* Create NEW_TYPE->USE_TYPE conversion. */
1204 tmp = create_tmp_reg (use_type, NULL);
1205 add_referenced_var (tmp);
1206 new_oprnd = make_ssa_name (tmp, NULL);
1207 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1209 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1211 *type_in = get_vectype_for_scalar_type (new_type);
1212 *type_out = get_vectype_for_scalar_type (use_type);
1214 /* We created a pattern statement for the last statement in the
1215 sequence, so we don't need to associate it with the pattern
1216 statement created for PREV_STMT. Therefore, we add PREV_STMT
1217 to the list in order to mark it later in vect_pattern_recog_1. */
1219 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1224 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1225 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1228 *type_out = NULL_TREE;
1231 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1234 /* TODO: support general case, create a conversion to the correct type. */
1237 /* Pattern detected. */
1238 if (vect_print_dump_info (REPORT_DETAILS))
1240 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1241 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1244 return pattern_stmt;
1247 /* Detect widening shift pattern:
1253 S2 a_T = (TYPE) a_t;
1254 S3 res_T = a_T << CONST;
1256 where type 'TYPE' is at least double the size of type 'type'.
1258 Also detect cases where the shift result is immediately converted
1259 to another type 'result_type' that is no larger in size than 'TYPE'.
1260 In those cases we perform a widen-shift that directly results in
1261 'result_type', to avoid a possible over-widening situation:
1265 result_type res_result;
1268 S2 a_T = (TYPE) a_t;
1269 S3 res_T = a_T << CONST;
1270 S4 res_result = (result_type) res_T;
1271 '--> res_result' = a_t w<< CONST;
1273 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1274 create an additional pattern stmt for S2 to create a variable of an
1275 intermediate type, and perform widen-shift on the intermediate type:
1279 TYPE a_T, res_T, res_T';
1282 S2 a_T = (TYPE) a_t;
1283 '--> a_it = (interm_type) a_t;
1284 S3 res_T = a_T << CONST;
1285 '--> res_T' = a_it <<* CONST;
1289 * STMTS: Contains a stmt from which the pattern search begins.
1290 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1291 in STMTS. When an intermediate type is used and a pattern statement is
1292 created for S2, we also put S2 here (before S3).
1296 * TYPE_IN: The type of the input arguments to the pattern.
1298 * TYPE_OUT: The type of the output of this pattern.
1300 * Return value: A new stmt that will be used to replace the sequence of
1301 stmts that constitute the pattern. In this case it will be:
1302 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1305 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1306 tree *type_in, tree *type_out)
1308 gimple last_stmt = VEC_pop (gimple, *stmts);
1310 tree oprnd0, oprnd1;
1311 tree type, half_type0;
1312 gimple pattern_stmt;
1313 tree vectype, vectype_out = NULL_TREE;
1316 enum tree_code dummy_code;
1318 VEC (tree, heap) * dummy_vec;
1321 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1324 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1327 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1330 oprnd0 = gimple_assign_rhs1 (last_stmt);
1331 oprnd1 = gimple_assign_rhs2 (last_stmt);
1332 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1335 /* Check operand 0: it has to be defined by a type promotion. */
1336 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1339 /* Check operand 1: has to be positive. We check that it fits the type
1340 in vect_handle_widen_op_by_const (). */
1341 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1344 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1345 type = gimple_expr_type (last_stmt);
1347 /* Check for subsequent conversion to another type. */
1348 use_stmt = vect_single_imm_use (last_stmt);
1349 if (use_stmt && is_gimple_assign (use_stmt)
1350 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1351 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1353 tree use_lhs = gimple_assign_lhs (use_stmt);
1354 tree use_type = TREE_TYPE (use_lhs);
1356 if (INTEGRAL_TYPE_P (use_type)
1357 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1359 last_stmt = use_stmt;
1364 /* Check if this a widening operation. */
1365 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1367 type, &half_type0, def_stmt0))
1370 /* Pattern detected. */
1371 if (vect_print_dump_info (REPORT_DETAILS))
1372 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1374 /* Check target support. */
1375 vectype = get_vectype_for_scalar_type (half_type0);
1376 vectype_out = get_vectype_for_scalar_type (type);
1380 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1381 vectype_out, vectype,
1382 &dummy, &dummy, &dummy_code,
1383 &dummy_code, &dummy_int,
1388 *type_out = vectype_out;
1390 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1391 var = vect_recog_temp_ssa_var (type, NULL);
1393 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1395 if (vect_print_dump_info (REPORT_DETAILS))
1396 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1398 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1399 return pattern_stmt;
1402 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1410 S3 res_T = b_T op a_t;
1412 where type 'TYPE' is a type with different size than 'type',
1413 and op is <<, >> or rotate.
1418 TYPE b_T, c_T, res_T;
1421 S1 a_t = (type) c_T;
1423 S3 res_T = b_T op a_t;
1427 * STMTS: Contains a stmt from which the pattern search begins,
1428 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1429 with a shift/rotate which has same type on both operands, in the
1430 second case just b_T op c_T, in the first case with added cast
1431 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1435 * TYPE_IN: The type of the input arguments to the pattern.
1437 * TYPE_OUT: The type of the output of this pattern.
1439 * Return value: A new stmt that will be used to replace the shift/rotate
1443 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1444 tree *type_in, tree *type_out)
1446 gimple last_stmt = VEC_pop (gimple, *stmts);
1447 tree oprnd0, oprnd1, lhs, var;
1448 gimple pattern_stmt, def_stmt;
1449 enum tree_code rhs_code;
1450 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1451 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1452 enum vect_def_type dt;
1455 if (!is_gimple_assign (last_stmt))
1458 rhs_code = gimple_assign_rhs_code (last_stmt);
1470 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1473 lhs = gimple_assign_lhs (last_stmt);
1474 oprnd0 = gimple_assign_rhs1 (last_stmt);
1475 oprnd1 = gimple_assign_rhs2 (last_stmt);
1476 if (TREE_CODE (oprnd0) != SSA_NAME
1477 || TREE_CODE (oprnd1) != SSA_NAME
1478 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1479 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1480 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1481 || TYPE_PRECISION (TREE_TYPE (lhs))
1482 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1485 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, NULL, &def_stmt,
1489 if (dt != vect_internal_def)
1492 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1493 *type_out = *type_in;
1494 if (*type_in == NULL_TREE)
1498 if (gimple_assign_cast_p (def_stmt))
1500 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1501 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1502 && TYPE_PRECISION (TREE_TYPE (rhs1))
1503 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1507 if (def == NULL_TREE)
1509 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1510 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1512 new_pattern_def_seq (stmt_vinfo, def_stmt);
1515 /* Pattern detected. */
1516 if (vect_print_dump_info (REPORT_DETAILS))
1517 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1519 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1520 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1521 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1523 if (vect_print_dump_info (REPORT_DETAILS))
1524 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1526 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1527 return pattern_stmt;
1530 /* Detect a signed division by power of two constant that wouldn't be
1531 otherwise vectorized:
1537 where type 'type' is a signed integral type and N is a constant positive
1540 Similarly handle signed modulo by power of two constant:
1546 * STMTS: Contains a stmt from which the pattern search begins,
1547 i.e. the division stmt. S1 is replaced by:
1548 S3 y_t = b_t < 0 ? N - 1 : 0;
1550 S1' a_t = x_t >> log2 (N);
1552 S4 is replaced by (where *_T temporaries have unsigned type):
1553 S9 y_T = b_t < 0 ? -1U : 0U;
1554 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1555 S7 z_t = (type) z_T;
1557 S5 x_t = w_t & (N - 1);
1558 S4' a_t = x_t - z_t;
1562 * TYPE_IN: The type of the input arguments to the pattern.
1564 * TYPE_OUT: The type of the output of this pattern.
1566 * Return value: A new stmt that will be used to replace the division
1567 S1 or modulo S4 stmt. */
1570 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1571 tree *type_in, tree *type_out)
1573 gimple last_stmt = VEC_pop (gimple, *stmts);
1574 tree oprnd0, oprnd1, vectype, itype, cond;
1575 gimple pattern_stmt, def_stmt;
1576 enum tree_code rhs_code;
1577 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1578 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1581 if (!is_gimple_assign (last_stmt))
1584 rhs_code = gimple_assign_rhs_code (last_stmt);
1587 case TRUNC_DIV_EXPR:
1588 case TRUNC_MOD_EXPR:
1594 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1597 oprnd0 = gimple_assign_rhs1 (last_stmt);
1598 oprnd1 = gimple_assign_rhs2 (last_stmt);
1599 itype = TREE_TYPE (oprnd0);
1600 if (TREE_CODE (oprnd0) != SSA_NAME
1601 || TREE_CODE (oprnd1) != INTEGER_CST
1602 || TREE_CODE (itype) != INTEGER_TYPE
1603 || TYPE_UNSIGNED (itype)
1604 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1605 || !integer_pow2p (oprnd1)
1606 || tree_int_cst_sgn (oprnd1) != 1)
1609 vectype = get_vectype_for_scalar_type (itype);
1610 if (vectype == NULL_TREE)
1613 /* If the target can handle vectorized division or modulo natively,
1614 don't attempt to optimize this. */
1615 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1618 enum machine_mode vec_mode = TYPE_MODE (vectype);
1619 int icode = (int) optab_handler (optab, vec_mode);
1620 if (icode != CODE_FOR_nothing
1621 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1625 /* Pattern detected. */
1626 if (vect_print_dump_info (REPORT_DETAILS))
1627 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1629 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1630 if (rhs_code == TRUNC_DIV_EXPR)
1632 tree var = vect_recog_temp_ssa_var (itype, NULL);
1634 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1635 fold_build2 (MINUS_EXPR, itype,
1637 build_int_cst (itype,
1639 build_int_cst (itype, 0));
1640 new_pattern_def_seq (stmt_vinfo, def_stmt);
1641 var = vect_recog_temp_ssa_var (itype, NULL);
1643 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1644 gimple_assign_lhs (def_stmt));
1645 append_pattern_def_seq (stmt_vinfo, def_stmt);
1648 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1649 vect_recog_temp_ssa_var (itype, NULL),
1651 build_int_cst (itype,
1652 tree_log2 (oprnd1)));
1657 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1658 if (compare_tree_int (oprnd1, 2) == 0)
1660 signmask = vect_recog_temp_ssa_var (itype, NULL);
1662 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1663 build_int_cst (itype, 1),
1664 build_int_cst (itype, 0));
1665 append_pattern_def_seq (stmt_vinfo, def_stmt);
1670 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1671 tree vecutype = get_vectype_for_scalar_type (utype);
1673 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1674 - tree_log2 (oprnd1));
1675 tree var = vect_recog_temp_ssa_var (utype, NULL);
1676 stmt_vec_info def_stmt_vinfo;
1679 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1680 build_int_cst (utype, -1),
1681 build_int_cst (utype, 0));
1682 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1683 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1684 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1685 append_pattern_def_seq (stmt_vinfo, def_stmt);
1686 var = vect_recog_temp_ssa_var (utype, NULL);
1688 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1689 gimple_assign_lhs (def_stmt),
1691 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1692 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1693 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1694 append_pattern_def_seq (stmt_vinfo, def_stmt);
1695 signmask = vect_recog_temp_ssa_var (itype, NULL);
1697 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1699 append_pattern_def_seq (stmt_vinfo, def_stmt);
1702 = gimple_build_assign_with_ops (PLUS_EXPR,
1703 vect_recog_temp_ssa_var (itype, NULL),
1705 append_pattern_def_seq (stmt_vinfo, def_stmt);
1707 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1708 vect_recog_temp_ssa_var (itype, NULL),
1709 gimple_assign_lhs (def_stmt),
1710 fold_build2 (MINUS_EXPR, itype,
1712 build_int_cst (itype,
1714 append_pattern_def_seq (stmt_vinfo, def_stmt);
1717 = gimple_build_assign_with_ops (MINUS_EXPR,
1718 vect_recog_temp_ssa_var (itype, NULL),
1719 gimple_assign_lhs (def_stmt),
1723 if (vect_print_dump_info (REPORT_DETAILS))
1724 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1726 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1729 *type_out = vectype;
1730 return pattern_stmt;
1733 /* Function vect_recog_mixed_size_cond_pattern
1735 Try to find the following pattern:
1740 S1 a_T = x_t CMP y_t ? b_T : c_T;
1742 where type 'TYPE' is an integral type which has different size
1743 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1744 than 'type', the constants need to fit into an integer type
1745 with the same width as 'type'.
1749 * LAST_STMT: A stmt from which the pattern search begins.
1753 * TYPE_IN: The type of the input arguments to the pattern.
1755 * TYPE_OUT: The type of the output of this pattern.
1757 * Return value: A new stmt that will be used to replace the pattern.
1758 Additionally a def_stmt is added.
1760 a_it = x_t CMP y_t ? b_it : c_it;
1761 a_T = (TYPE) a_it; */
1764 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1767 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1768 tree cond_expr, then_clause, else_clause;
1769 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1770 tree type, vectype, comp_vectype, itype, vecitype;
1771 enum machine_mode cmpmode;
1772 gimple pattern_stmt, def_stmt;
1773 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1775 if (!is_gimple_assign (last_stmt)
1776 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1777 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1780 cond_expr = gimple_assign_rhs1 (last_stmt);
1781 then_clause = gimple_assign_rhs2 (last_stmt);
1782 else_clause = gimple_assign_rhs3 (last_stmt);
1784 if (TREE_CODE (then_clause) != INTEGER_CST
1785 || TREE_CODE (else_clause) != INTEGER_CST)
1788 if (!COMPARISON_CLASS_P (cond_expr))
1792 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1793 if (comp_vectype == NULL_TREE)
1796 type = gimple_expr_type (last_stmt);
1797 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1799 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1802 vectype = get_vectype_for_scalar_type (type);
1803 if (vectype == NULL_TREE)
1806 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1809 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1810 TYPE_UNSIGNED (type));
1811 if (itype == NULL_TREE
1812 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1815 vecitype = get_vectype_for_scalar_type (itype);
1816 if (vecitype == NULL_TREE)
1819 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1822 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1824 if (!int_fits_type_p (then_clause, itype)
1825 || !int_fits_type_p (else_clause, itype))
1830 = gimple_build_assign_with_ops3 (COND_EXPR,
1831 vect_recog_temp_ssa_var (itype, NULL),
1832 unshare_expr (cond_expr),
1833 fold_convert (itype, then_clause),
1834 fold_convert (itype, else_clause));
1836 = gimple_build_assign_with_ops (NOP_EXPR,
1837 vect_recog_temp_ssa_var (type, NULL),
1838 gimple_assign_lhs (def_stmt), NULL_TREE);
1840 new_pattern_def_seq (stmt_vinfo, def_stmt);
1841 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1842 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1843 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1844 *type_in = vecitype;
1845 *type_out = vectype;
1847 return pattern_stmt;
1851 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1852 true if bool VAR can be optimized that way. */
1855 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1858 enum vect_def_type dt;
1860 enum tree_code rhs_code;
1862 if (!vect_is_simple_use (var, NULL, loop_vinfo, NULL, &def_stmt, &def, &dt))
1865 if (dt != vect_internal_def)
1868 if (!is_gimple_assign (def_stmt))
1871 if (!has_single_use (def))
1874 rhs1 = gimple_assign_rhs1 (def_stmt);
1875 rhs_code = gimple_assign_rhs_code (def_stmt);
1879 return check_bool_pattern (rhs1, loop_vinfo);
1882 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1883 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1884 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1886 return check_bool_pattern (rhs1, loop_vinfo);
1889 return check_bool_pattern (rhs1, loop_vinfo);
1894 if (!check_bool_pattern (rhs1, loop_vinfo))
1896 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1899 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1901 tree vecitype, comp_vectype;
1903 /* If the comparison can throw, then is_gimple_condexpr will be
1904 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
1905 if (stmt_could_throw_p (def_stmt))
1908 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1909 if (comp_vectype == NULL_TREE)
1912 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1914 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1916 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1917 vecitype = get_vectype_for_scalar_type (itype);
1918 if (vecitype == NULL_TREE)
1922 vecitype = comp_vectype;
1923 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1930 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1931 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1932 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
1935 adjust_bool_pattern_cast (tree type, tree var)
1937 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1938 gimple cast_stmt, pattern_stmt;
1940 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
1941 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1942 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
1944 = gimple_build_assign_with_ops (NOP_EXPR,
1945 vect_recog_temp_ssa_var (type, NULL),
1946 gimple_assign_lhs (pattern_stmt),
1948 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
1949 return gimple_assign_lhs (cast_stmt);
1953 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
1954 recursively. VAR is an SSA_NAME that should be transformed from bool
1955 to a wider integer type, OUT_TYPE is the desired final integer type of
1956 the whole pattern, TRUEVAL should be NULL unless optimizing
1957 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1958 in the then_clause, STMTS is where statements with added pattern stmts
1959 should be pushed to. */
1962 adjust_bool_pattern (tree var, tree out_type, tree trueval,
1963 VEC (gimple, heap) **stmts)
1965 gimple stmt = SSA_NAME_DEF_STMT (var);
1966 enum tree_code rhs_code, def_rhs_code;
1967 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
1969 gimple pattern_stmt, def_stmt;
1971 rhs1 = gimple_assign_rhs1 (stmt);
1972 rhs2 = gimple_assign_rhs2 (stmt);
1973 rhs_code = gimple_assign_rhs_code (stmt);
1974 loc = gimple_location (stmt);
1979 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1980 itype = TREE_TYPE (irhs1);
1982 = gimple_build_assign_with_ops (SSA_NAME,
1983 vect_recog_temp_ssa_var (itype, NULL),
1988 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
1989 itype = TREE_TYPE (irhs1);
1991 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
1992 vect_recog_temp_ssa_var (itype, NULL),
1993 irhs1, build_int_cst (itype, 1));
1997 /* Try to optimize x = y & (a < b ? 1 : 0); into
1998 x = (a < b ? y : 0);
2004 S1 a_b = x1 CMP1 y1;
2005 S2 b_b = x2 CMP2 y2;
2007 S4 d_T = (TYPE) c_b;
2009 we would normally emit:
2011 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2012 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2013 S3' c_T = a_T & b_T;
2016 but we can save one stmt by using the
2017 result of one of the COND_EXPRs in the other COND_EXPR and leave
2018 BIT_AND_EXPR stmt out:
2020 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2021 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2024 At least when VEC_COND_EXPR is implemented using masks
2025 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2026 computes the comparison masks and ands it, in one case with
2027 all ones vector, in the other case with a vector register.
2028 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2029 often more expensive. */
2030 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2031 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2032 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2034 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2035 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2036 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2037 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2040 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2041 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2042 tstmt = VEC_pop (gimple, *stmts);
2043 gcc_assert (tstmt == def_stmt);
2044 VEC_quick_push (gimple, *stmts, stmt);
2045 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2046 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2047 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2048 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2052 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2055 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2056 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2057 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2059 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2060 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2061 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2062 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2065 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2066 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2067 tstmt = VEC_pop (gimple, *stmts);
2068 gcc_assert (tstmt == def_stmt);
2069 VEC_quick_push (gimple, *stmts, stmt);
2070 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2071 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2072 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2073 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2077 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2083 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2084 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2086 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2087 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2089 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2090 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2091 int out_prec = TYPE_PRECISION (out_type);
2092 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2093 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2094 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2095 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2098 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2099 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2102 itype = TREE_TYPE (irhs1);
2104 = gimple_build_assign_with_ops (rhs_code,
2105 vect_recog_temp_ssa_var (itype, NULL),
2110 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2111 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2112 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2113 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2114 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2116 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2118 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2121 itype = TREE_TYPE (rhs1);
2122 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2123 if (trueval == NULL_TREE)
2124 trueval = build_int_cst (itype, 1);
2126 gcc_checking_assert (useless_type_conversion_p (itype,
2127 TREE_TYPE (trueval)));
2129 = gimple_build_assign_with_ops3 (COND_EXPR,
2130 vect_recog_temp_ssa_var (itype, NULL),
2132 build_int_cst (itype, 0));
2136 VEC_safe_push (gimple, heap, *stmts, stmt);
2137 gimple_set_location (pattern_stmt, loc);
2138 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2139 return gimple_assign_lhs (pattern_stmt);
2143 /* Function vect_recog_bool_pattern
2145 Try to find pattern like following:
2147 bool a_b, b_b, c_b, d_b, e_b;
2150 S1 a_b = x1 CMP1 y1;
2151 S2 b_b = x2 CMP2 y2;
2153 S4 d_b = x3 CMP3 y3;
2155 S6 f_T = (TYPE) e_b;
2157 where type 'TYPE' is an integral type.
2161 * LAST_STMT: A stmt at the end from which the pattern
2162 search begins, i.e. cast of a bool to
2167 * TYPE_IN: The type of the input arguments to the pattern.
2169 * TYPE_OUT: The type of the output of this pattern.
2171 * Return value: A new stmt that will be used to replace the pattern.
2173 Assuming size of TYPE is the same as size of all comparisons
2174 (otherwise some casts would be added where needed), the above
2175 sequence we create related pattern stmts:
2176 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2177 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2178 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2179 S5' e_T = c_T | d_T;
2182 Instead of the above S3' we could emit:
2183 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2184 S3' c_T = a_T | b_T;
2185 but the above is more efficient. */
2188 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2191 gimple last_stmt = VEC_pop (gimple, *stmts);
2192 enum tree_code rhs_code;
2193 tree var, lhs, rhs, vectype;
2194 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2195 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2196 gimple pattern_stmt;
2198 if (!is_gimple_assign (last_stmt))
2201 var = gimple_assign_rhs1 (last_stmt);
2202 lhs = gimple_assign_lhs (last_stmt);
2204 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2205 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2206 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2209 rhs_code = gimple_assign_rhs_code (last_stmt);
2210 if (CONVERT_EXPR_CODE_P (rhs_code))
2212 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2213 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2215 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2216 if (vectype == NULL_TREE)
2219 if (!check_bool_pattern (var, loop_vinfo))
2222 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2223 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2224 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2226 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2229 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2230 *type_out = vectype;
2232 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2233 return pattern_stmt;
2235 else if (rhs_code == SSA_NAME
2236 && STMT_VINFO_DATA_REF (stmt_vinfo))
2238 stmt_vec_info pattern_stmt_info;
2239 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2240 gcc_assert (vectype != NULL_TREE);
2241 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2243 if (!check_bool_pattern (var, loop_vinfo))
2246 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2247 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2248 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2250 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2252 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2253 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2257 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2258 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2259 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2260 STMT_VINFO_DATA_REF (pattern_stmt_info)
2261 = STMT_VINFO_DATA_REF (stmt_vinfo);
2262 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2263 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2264 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2265 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2266 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2267 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2268 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2269 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2270 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2271 *type_out = vectype;
2273 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2274 return pattern_stmt;
2281 /* Mark statements that are involved in a pattern. */
2284 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2285 tree pattern_vectype)
2287 stmt_vec_info pattern_stmt_info, def_stmt_info;
2288 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2289 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2292 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2293 if (pattern_stmt_info == NULL)
2295 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2296 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2298 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2300 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2301 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2302 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2303 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2304 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2305 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2306 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2307 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2308 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2310 gimple_stmt_iterator si;
2311 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2312 !gsi_end_p (si); gsi_next (&si))
2314 def_stmt = gsi_stmt (si);
2315 def_stmt_info = vinfo_for_stmt (def_stmt);
2316 if (def_stmt_info == NULL)
2318 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2319 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2321 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2322 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2323 STMT_VINFO_DEF_TYPE (def_stmt_info)
2324 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2325 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2326 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2331 /* Function vect_pattern_recog_1
2334 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2335 computation pattern.
2336 STMT: A stmt from which the pattern search should start.
2338 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2339 expression that computes the same functionality and can be used to
2340 replace the sequence of stmts that are involved in the pattern.
2343 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2344 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2345 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2346 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2347 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2348 to the available target pattern.
2350 This function also does some bookkeeping, as explained in the documentation
2351 for vect_recog_pattern. */
2354 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2355 gimple_stmt_iterator si,
2356 VEC (gimple, heap) **stmts_to_replace)
2358 gimple stmt = gsi_stmt (si), pattern_stmt;
2359 stmt_vec_info stmt_info;
2360 loop_vec_info loop_vinfo;
2361 tree pattern_vectype;
2362 tree type_in, type_out;
2363 enum tree_code code;
2367 VEC_truncate (gimple, *stmts_to_replace, 0);
2368 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2369 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2373 stmt = VEC_last (gimple, *stmts_to_replace);
2374 stmt_info = vinfo_for_stmt (stmt);
2375 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2377 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2379 /* No need to check target support (already checked by the pattern
2380 recognition function). */
2381 pattern_vectype = type_out ? type_out : type_in;
2385 enum machine_mode vec_mode;
2386 enum insn_code icode;
2389 /* Check target support */
2390 type_in = get_vectype_for_scalar_type (type_in);
2394 type_out = get_vectype_for_scalar_type (type_out);
2399 pattern_vectype = type_out;
2401 if (is_gimple_assign (pattern_stmt))
2402 code = gimple_assign_rhs_code (pattern_stmt);
2405 gcc_assert (is_gimple_call (pattern_stmt));
2409 optab = optab_for_tree_code (code, type_in, optab_default);
2410 vec_mode = TYPE_MODE (type_in);
2412 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2413 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2417 /* Found a vectorizable pattern. */
2418 if (vect_print_dump_info (REPORT_DETAILS))
2420 fprintf (vect_dump, "pattern recognized: ");
2421 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2424 /* Mark the stmts that are involved in the pattern. */
2425 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2427 /* Patterns cannot be vectorized using SLP, because they change the order of
2429 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2431 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2433 /* It is possible that additional pattern stmts are created and inserted in
2434 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2435 relevant statements. */
2436 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2437 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2440 stmt_info = vinfo_for_stmt (stmt);
2441 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2442 if (vect_print_dump_info (REPORT_DETAILS))
2444 fprintf (vect_dump, "additional pattern stmt: ");
2445 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2448 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2453 /* Function vect_pattern_recog
2456 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2459 Output - for each computation idiom that is detected we create a new stmt
2460 that provides the same functionality and that can be vectorized. We
2461 also record some information in the struct_stmt_info of the relevant
2462 stmts, as explained below:
2464 At the entry to this function we have the following stmts, with the
2465 following initial value in the STMT_VINFO fields:
2467 stmt in_pattern_p related_stmt vec_stmt
2468 S1: a_i = .... - - -
2469 S2: a_2 = ..use(a_i).. - - -
2470 S3: a_1 = ..use(a_2).. - - -
2471 S4: a_0 = ..use(a_1).. - - -
2472 S5: ... = ..use(a_0).. - - -
2474 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2475 represented by a single stmt. We then:
2476 - create a new stmt S6 equivalent to the pattern (the stmt is not
2477 inserted into the code)
2478 - fill in the STMT_VINFO fields as follows:
2480 in_pattern_p related_stmt vec_stmt
2481 S1: a_i = .... - - -
2482 S2: a_2 = ..use(a_i).. - - -
2483 S3: a_1 = ..use(a_2).. - - -
2484 S4: a_0 = ..use(a_1).. true S6 -
2485 '---> S6: a_new = .... - S4 -
2486 S5: ... = ..use(a_0).. - - -
2488 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2489 to each other through the RELATED_STMT field).
2491 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2492 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2493 remain irrelevant unless used by stmts other than S4.
2495 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2496 (because they are marked as irrelevant). It will vectorize S6, and record
2497 a pointer to the new vector stmt VS6 from S6 (as usual).
2498 S4 will be skipped, and S5 will be vectorized as usual:
2500 in_pattern_p related_stmt vec_stmt
2501 S1: a_i = .... - - -
2502 S2: a_2 = ..use(a_i).. - - -
2503 S3: a_1 = ..use(a_2).. - - -
2504 > VS6: va_new = .... - - -
2505 S4: a_0 = ..use(a_1).. true S6 VS6
2506 '---> S6: a_new = .... - S4 VS6
2507 > VS5: ... = ..vuse(va_new).. - - -
2508 S5: ... = ..use(a_0).. - - -
2510 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2511 elsewhere), and we'll end up with:
2514 VS5: ... = ..vuse(va_new)..
2516 In case of more than one pattern statements, e.g., widen-mult with
2520 S2 a_T = (TYPE) a_t;
2521 '--> S3: a_it = (interm_type) a_t;
2522 S4 prod_T = a_T * CONST;
2523 '--> S5: prod_T' = a_it w* CONST;
2525 there may be other users of a_T outside the pattern. In that case S2 will
2526 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2527 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2528 be recorded in S3. */
2531 vect_pattern_recog (loop_vec_info loop_vinfo)
2533 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2534 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2535 unsigned int nbbs = loop->num_nodes;
2536 gimple_stmt_iterator si;
2538 vect_recog_func_ptr vect_recog_func;
2539 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2541 if (vect_print_dump_info (REPORT_DETAILS))
2542 fprintf (vect_dump, "=== vect_pattern_recog ===");
2544 /* Scan through the loop stmts, applying the pattern recognition
2545 functions starting at each stmt visited: */
2546 for (i = 0; i < nbbs; i++)
2548 basic_block bb = bbs[i];
2549 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2551 /* Scan over all generic vect_recog_xxx_pattern functions. */
2552 for (j = 0; j < NUM_PATTERNS; j++)
2554 vect_recog_func = vect_vect_recog_func_ptrs[j];
2555 vect_pattern_recog_1 (vect_recog_func, si,
2561 VEC_free (gimple, heap, stmts_to_replace);