1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_over_widening_pattern,
67 vect_recog_widen_shift_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_sdivmod_pow2_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 /* Function widened_name_p
75 Check whether NAME, an ssa-name used in USE_STMT,
76 is a result of a type-promotion, such that:
77 DEF_STMT: NAME = NOP (name0)
78 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
79 If CHECK_SIGN is TRUE, check that either both types are signed or both are
83 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
88 loop_vec_info loop_vinfo;
89 stmt_vec_info stmt_vinfo;
90 tree type = TREE_TYPE (name);
92 enum vect_def_type dt;
95 stmt_vinfo = vinfo_for_stmt (use_stmt);
96 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
98 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
101 if (dt != vect_internal_def
102 && dt != vect_external_def && dt != vect_constant_def)
108 if (!is_gimple_assign (*def_stmt))
111 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
114 oprnd0 = gimple_assign_rhs1 (*def_stmt);
116 *half_type = TREE_TYPE (oprnd0);
117 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
118 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
119 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
122 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
129 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
130 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
133 vect_recog_temp_ssa_var (tree type, gimple stmt)
135 tree var = create_tmp_var (type, "patt");
137 add_referenced_var (var);
138 var = make_ssa_name (var, stmt);
142 /* Function vect_recog_dot_prod_pattern
144 Try to find the following pattern:
150 sum_0 = phi <init, sum_1>
153 S3 x_T = (TYPE1) x_t;
154 S4 y_T = (TYPE1) y_t;
156 [S6 prod = (TYPE2) prod; #optional]
157 S7 sum_1 = prod + sum_0;
159 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
160 same size of 'TYPE1' or bigger. This is a special case of a reduction
165 * STMTS: Contains a stmt from which the pattern search begins. In the
166 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
171 * TYPE_IN: The type of the input arguments to the pattern.
173 * TYPE_OUT: The type of the output of this pattern.
175 * Return value: A new stmt that will be used to replace the sequence of
176 stmts that constitute the pattern. In this case it will be:
177 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
179 Note: The dot-prod idiom is a widening reduction pattern that is
180 vectorized without preserving all the intermediate results. It
181 produces only N/2 (widened) results (by summing up pairs of
182 intermediate results) rather than all N results. Therefore, we
183 cannot allow this pattern when we want to get all the results and in
184 the correct order (as is the case when this computation is in an
185 inner-loop nested in an outer-loop that us being vectorized). */
188 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
191 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
193 tree oprnd00, oprnd01;
194 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
195 tree type, half_type;
198 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
199 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
202 if (!is_gimple_assign (last_stmt))
205 type = gimple_expr_type (last_stmt);
207 /* Look for the following pattern
211 DDPROD = (TYPE2) DPROD;
212 sum_1 = DDPROD + sum_0;
214 - DX is double the size of X
215 - DY is double the size of Y
216 - DX, DY, DPROD all have the same type
217 - sum is the same size of DPROD or bigger
218 - sum has been recognized as a reduction variable.
220 This is equivalent to:
221 DPROD = X w* Y; #widen mult
222 sum_1 = DPROD w+ sum_0; #widen summation
224 DPROD = X w* Y; #widen mult
225 sum_1 = DPROD + sum_0; #summation
228 /* Starting from LAST_STMT, follow the defs of its uses in search
229 of the above pattern. */
231 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
234 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
236 /* Has been detected as widening-summation? */
238 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
239 type = gimple_expr_type (stmt);
240 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
242 oprnd0 = gimple_assign_rhs1 (stmt);
243 oprnd1 = gimple_assign_rhs2 (stmt);
244 half_type = TREE_TYPE (oprnd0);
250 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
252 oprnd0 = gimple_assign_rhs1 (last_stmt);
253 oprnd1 = gimple_assign_rhs2 (last_stmt);
254 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
255 || !types_compatible_p (TREE_TYPE (oprnd1), type))
259 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
262 oprnd0 = gimple_assign_rhs1 (stmt);
268 /* So far so good. Since last_stmt was detected as a (summation) reduction,
269 we know that oprnd1 is the reduction variable (defined by a loop-header
270 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
271 Left to check that oprnd0 is defined by a (widen_)mult_expr */
272 if (TREE_CODE (oprnd0) != SSA_NAME)
275 prod_type = half_type;
276 stmt = SSA_NAME_DEF_STMT (oprnd0);
278 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
279 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
282 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
283 inside the loop (in case we are analyzing an outer-loop). */
284 if (!is_gimple_assign (stmt))
286 stmt_vinfo = vinfo_for_stmt (stmt);
287 gcc_assert (stmt_vinfo);
288 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
290 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
292 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
294 /* Has been detected as a widening multiplication? */
296 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
297 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
299 stmt_vinfo = vinfo_for_stmt (stmt);
300 gcc_assert (stmt_vinfo);
301 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
302 oprnd00 = gimple_assign_rhs1 (stmt);
303 oprnd01 = gimple_assign_rhs2 (stmt);
307 tree half_type0, half_type1;
311 oprnd0 = gimple_assign_rhs1 (stmt);
312 oprnd1 = gimple_assign_rhs2 (stmt);
313 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
314 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
316 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
318 oprnd00 = gimple_assign_rhs1 (def_stmt);
319 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
321 oprnd01 = gimple_assign_rhs1 (def_stmt);
322 if (!types_compatible_p (half_type0, half_type1))
324 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
328 half_type = TREE_TYPE (oprnd00);
329 *type_in = half_type;
332 /* Pattern detected. Create a stmt to be used to replace the pattern: */
333 var = vect_recog_temp_ssa_var (type, NULL);
334 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
335 oprnd00, oprnd01, oprnd1);
337 if (vect_print_dump_info (REPORT_DETAILS))
339 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
340 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
343 /* We don't allow changing the order of the computation in the inner-loop
344 when doing outer-loop vectorization. */
345 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
351 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
354 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
355 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
357 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
358 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
359 that satisfies the above restrictions, we can perform a widening opeartion
360 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
361 with a_it = (interm_type) a_t; */
364 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
365 tree const_oprnd, tree *oprnd,
366 VEC (gimple, heap) **stmts, tree type,
367 tree *half_type, gimple def_stmt)
369 tree new_type, new_oprnd, tmp;
371 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
372 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
374 if (code != MULT_EXPR && code != LSHIFT_EXPR)
377 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
378 || (code == LSHIFT_EXPR
379 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
381 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
383 /* CONST_OPRND is a constant of HALF_TYPE. */
384 *oprnd = gimple_assign_rhs1 (def_stmt);
388 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
389 || !gimple_bb (def_stmt)
390 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
391 || !vinfo_for_stmt (def_stmt))
394 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
395 a type 2 times bigger than HALF_TYPE. */
396 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
397 TYPE_UNSIGNED (type));
398 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
399 || (code == LSHIFT_EXPR
400 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
403 /* Use NEW_TYPE for widening operation. */
404 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
406 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
407 /* Check if the already created pattern stmt is what we need. */
408 if (!is_gimple_assign (new_stmt)
409 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
410 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
413 VEC_safe_push (gimple, heap, *stmts, def_stmt);
414 *oprnd = gimple_assign_lhs (new_stmt);
418 /* Create a_T = (NEW_TYPE) a_t; */
419 *oprnd = gimple_assign_rhs1 (def_stmt);
420 tmp = create_tmp_var (new_type, NULL);
421 add_referenced_var (tmp);
422 new_oprnd = make_ssa_name (tmp, NULL);
423 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
425 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
426 VEC_safe_push (gimple, heap, *stmts, def_stmt);
430 *half_type = new_type;
435 /* Function vect_recog_widen_mult_pattern
437 Try to find the following pattern:
440 TYPE a_T, b_T, prod_T;
446 S5 prod_T = a_T * b_T;
448 where type 'TYPE' is at least double the size of type 'type'.
450 Also detect unsgigned cases:
452 unsigned type a_t, b_t;
453 unsigned TYPE u_prod_T;
454 TYPE a_T, b_T, prod_T;
460 S5 prod_T = a_T * b_T;
461 S6 u_prod_T = (unsigned TYPE) prod_T;
463 and multiplication by constants:
470 S5 prod_T = a_T * CONST;
472 A special case of multiplication by constants is when 'TYPE' is 4 times
473 bigger than 'type', but CONST fits an intermediate type 2 times smaller
474 than 'TYPE'. In that case we create an additional pattern stmt for S3
475 to create a variable of the intermediate type, and perform widen-mult
476 on the intermediate type as well:
480 TYPE a_T, prod_T, prod_T';
484 '--> a_it = (interm_type) a_t;
485 S5 prod_T = a_T * CONST;
486 '--> prod_T' = a_it w* CONST;
490 * STMTS: Contains a stmt from which the pattern search begins. In the
491 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
492 is detected. In case of unsigned widen-mult, the original stmt (S5) is
493 replaced with S6 in STMTS. In case of multiplication by a constant
494 of an intermediate type (the last case above), STMTS also contains S3
495 (inserted before S5).
499 * TYPE_IN: The type of the input arguments to the pattern.
501 * TYPE_OUT: The type of the output of this pattern.
503 * Return value: A new stmt that will be used to replace the sequence of
504 stmts that constitute the pattern. In this case it will be:
505 WIDEN_MULT <a_t, b_t>
509 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
510 tree *type_in, tree *type_out)
512 gimple last_stmt = VEC_pop (gimple, *stmts);
513 gimple def_stmt0, def_stmt1;
515 tree type, half_type0, half_type1;
517 tree vectype, vectype_out = NULL_TREE;
520 enum tree_code dummy_code;
522 VEC (tree, heap) *dummy_vec;
525 if (!is_gimple_assign (last_stmt))
528 type = gimple_expr_type (last_stmt);
530 /* Starting from LAST_STMT, follow the defs of its uses in search
531 of the above pattern. */
533 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
536 oprnd0 = gimple_assign_rhs1 (last_stmt);
537 oprnd1 = gimple_assign_rhs2 (last_stmt);
538 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
539 || !types_compatible_p (TREE_TYPE (oprnd1), type))
542 /* Check argument 0. */
543 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
545 /* Check argument 1. */
546 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
550 oprnd0 = gimple_assign_rhs1 (def_stmt0);
551 oprnd1 = gimple_assign_rhs1 (def_stmt1);
555 if (TREE_CODE (oprnd1) == INTEGER_CST
556 && TREE_CODE (half_type0) == INTEGER_TYPE
557 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
558 &oprnd0, stmts, type,
559 &half_type0, def_stmt0))
560 half_type1 = half_type0;
565 /* Handle unsigned case. Look for
566 S6 u_prod_T = (unsigned TYPE) prod_T;
567 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
568 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
570 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
571 imm_use_iterator imm_iter;
574 gimple use_stmt = NULL;
577 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
580 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
582 if (is_gimple_debug (USE_STMT (use_p)))
584 use_stmt = USE_STMT (use_p);
588 if (nuses != 1 || !is_gimple_assign (use_stmt)
589 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
592 use_lhs = gimple_assign_lhs (use_stmt);
593 use_type = TREE_TYPE (use_lhs);
594 if (!INTEGRAL_TYPE_P (use_type)
595 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
596 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
600 last_stmt = use_stmt;
603 if (!types_compatible_p (half_type0, half_type1))
606 /* Pattern detected. */
607 if (vect_print_dump_info (REPORT_DETAILS))
608 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
610 /* Check target support */
611 vectype = get_vectype_for_scalar_type (half_type0);
612 vectype_out = get_vectype_for_scalar_type (type);
615 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
616 vectype_out, vectype,
617 &dummy, &dummy, &dummy_code,
618 &dummy_code, &dummy_int, &dummy_vec))
622 *type_out = vectype_out;
624 /* Pattern supported. Create a stmt to be used to replace the pattern: */
625 var = vect_recog_temp_ssa_var (type, NULL);
626 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
629 if (vect_print_dump_info (REPORT_DETAILS))
630 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
632 VEC_safe_push (gimple, heap, *stmts, last_stmt);
637 /* Function vect_recog_pow_pattern
639 Try to find the following pattern:
643 with POW being one of pow, powf, powi, powif and N being
648 * LAST_STMT: A stmt from which the pattern search begins.
652 * TYPE_IN: The type of the input arguments to the pattern.
654 * TYPE_OUT: The type of the output of this pattern.
656 * Return value: A new stmt that will be used to replace the sequence of
657 stmts that constitute the pattern. In this case it will be:
664 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
667 gimple last_stmt = VEC_index (gimple, *stmts, 0);
668 tree fn, base, exp = NULL;
672 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
675 fn = gimple_call_fndecl (last_stmt);
676 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
679 switch (DECL_FUNCTION_CODE (fn))
685 base = gimple_call_arg (last_stmt, 0);
686 exp = gimple_call_arg (last_stmt, 1);
687 if (TREE_CODE (exp) != REAL_CST
688 && TREE_CODE (exp) != INTEGER_CST)
696 /* We now have a pow or powi builtin function call with a constant
699 *type_out = NULL_TREE;
701 /* Catch squaring. */
702 if ((host_integerp (exp, 0)
703 && tree_low_cst (exp, 0) == 2)
704 || (TREE_CODE (exp) == REAL_CST
705 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
707 *type_in = TREE_TYPE (base);
709 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
710 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
714 /* Catch square root. */
715 if (TREE_CODE (exp) == REAL_CST
716 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
718 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
719 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
722 gimple stmt = gimple_build_call (newfn, 1, base);
723 if (vectorizable_function (stmt, *type_in, *type_in)
726 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
727 gimple_call_set_lhs (stmt, var);
737 /* Function vect_recog_widen_sum_pattern
739 Try to find the following pattern:
742 TYPE x_T, sum = init;
744 sum_0 = phi <init, sum_1>
747 S3 sum_1 = x_T + sum_0;
749 where type 'TYPE' is at least double the size of type 'type', i.e - we're
750 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
751 a special case of a reduction computation.
755 * LAST_STMT: A stmt from which the pattern search begins. In the example,
756 when this function is called with S3, the pattern {S2,S3} will be detected.
760 * TYPE_IN: The type of the input arguments to the pattern.
762 * TYPE_OUT: The type of the output of this pattern.
764 * Return value: A new stmt that will be used to replace the sequence of
765 stmts that constitute the pattern. In this case it will be:
766 WIDEN_SUM <x_t, sum_0>
768 Note: The widening-sum idiom is a widening reduction pattern that is
769 vectorized without preserving all the intermediate results. It
770 produces only N/2 (widened) results (by summing up pairs of
771 intermediate results) rather than all N results. Therefore, we
772 cannot allow this pattern when we want to get all the results and in
773 the correct order (as is the case when this computation is in an
774 inner-loop nested in an outer-loop that us being vectorized). */
777 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
780 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
782 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
783 tree type, half_type;
785 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
786 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
789 if (!is_gimple_assign (last_stmt))
792 type = gimple_expr_type (last_stmt);
794 /* Look for the following pattern
797 In which DX is at least double the size of X, and sum_1 has been
798 recognized as a reduction variable.
801 /* Starting from LAST_STMT, follow the defs of its uses in search
802 of the above pattern. */
804 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
807 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
810 oprnd0 = gimple_assign_rhs1 (last_stmt);
811 oprnd1 = gimple_assign_rhs2 (last_stmt);
812 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
813 || !types_compatible_p (TREE_TYPE (oprnd1), type))
816 /* So far so good. Since last_stmt was detected as a (summation) reduction,
817 we know that oprnd1 is the reduction variable (defined by a loop-header
818 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
819 Left to check that oprnd0 is defined by a cast from type 'type' to type
822 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
825 oprnd0 = gimple_assign_rhs1 (stmt);
826 *type_in = half_type;
829 /* Pattern detected. Create a stmt to be used to replace the pattern: */
830 var = vect_recog_temp_ssa_var (type, NULL);
831 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
834 if (vect_print_dump_info (REPORT_DETAILS))
836 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
837 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
840 /* We don't allow changing the order of the computation in the inner-loop
841 when doing outer-loop vectorization. */
842 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
848 /* Return TRUE if the operation in STMT can be performed on a smaller type.
851 STMT - a statement to check.
852 DEF - we support operations with two operands, one of which is constant.
853 The other operand can be defined by a demotion operation, or by a
854 previous statement in a sequence of over-promoted operations. In the
855 later case DEF is used to replace that operand. (It is defined by a
856 pattern statement we created for the previous statement in the
860 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
861 NULL, it's the type of DEF.
862 STMTS - additional pattern statements. If a pattern statement (type
863 conversion) is created in this function, its original statement is
867 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
868 operands to use in the new pattern statement for STMT (will be created
869 in vect_recog_over_widening_pattern ()).
870 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
871 statements for STMT: the first one is a type promotion and the second
872 one is the operation itself. We return the type promotion statement
873 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
874 the second pattern statement. */
877 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
878 tree *op0, tree *op1, gimple *new_def_stmt,
879 VEC (gimple, heap) **stmts)
882 tree const_oprnd, oprnd;
883 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
884 gimple def_stmt, new_stmt;
886 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
887 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
889 *new_def_stmt = NULL;
891 if (!is_gimple_assign (stmt))
894 code = gimple_assign_rhs_code (stmt);
895 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
896 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
899 oprnd = gimple_assign_rhs1 (stmt);
900 const_oprnd = gimple_assign_rhs2 (stmt);
901 type = gimple_expr_type (stmt);
903 if (TREE_CODE (oprnd) != SSA_NAME
904 || TREE_CODE (const_oprnd) != INTEGER_CST)
907 /* If we are in the middle of a sequence, we use DEF from a previous
908 statement. Otherwise, OPRND has to be a result of type promotion. */
911 half_type = *new_type;
917 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
918 || !gimple_bb (def_stmt)
919 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
920 || !vinfo_for_stmt (def_stmt))
924 /* Can we perform the operation on a smaller type? */
930 if (!int_fits_type_p (const_oprnd, half_type))
932 /* HALF_TYPE is not enough. Try a bigger type if possible. */
933 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
936 interm_type = build_nonstandard_integer_type (
937 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
938 if (!int_fits_type_p (const_oprnd, interm_type))
945 /* Try intermediate type - HALF_TYPE is not enough for sure. */
946 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
949 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
950 (e.g., if the original value was char, the shift amount is at most 8
951 if we want to use short). */
952 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
955 interm_type = build_nonstandard_integer_type (
956 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
958 if (!vect_supportable_shift (code, interm_type))
964 if (vect_supportable_shift (code, half_type))
967 /* Try intermediate type - HALF_TYPE is not supported. */
968 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
971 interm_type = build_nonstandard_integer_type (
972 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
974 if (!vect_supportable_shift (code, interm_type))
983 /* There are four possible cases:
984 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
985 the first statement in the sequence)
986 a. The original, HALF_TYPE, is not enough - we replace the promotion
987 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
988 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
990 2. OPRND is defined by a pattern statement we created.
991 a. Its type is not sufficient for the operation, we create a new stmt:
992 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
993 this statement in NEW_DEF_STMT, and it is later put in
994 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
995 b. OPRND is good to use in the new statement. */
1000 /* Replace the original type conversion HALF_TYPE->TYPE with
1001 HALF_TYPE->INTERM_TYPE. */
1002 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1004 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1005 /* Check if the already created pattern stmt is what we need. */
1006 if (!is_gimple_assign (new_stmt)
1007 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1008 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1011 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1012 oprnd = gimple_assign_lhs (new_stmt);
1016 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1017 oprnd = gimple_assign_rhs1 (def_stmt);
1018 tmp = create_tmp_reg (interm_type, NULL);
1019 add_referenced_var (tmp);
1020 new_oprnd = make_ssa_name (tmp, NULL);
1021 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1023 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1024 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1030 /* Retrieve the operand before the type promotion. */
1031 oprnd = gimple_assign_rhs1 (def_stmt);
1038 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1039 tmp = create_tmp_reg (interm_type, NULL);
1040 add_referenced_var (tmp);
1041 new_oprnd = make_ssa_name (tmp, NULL);
1042 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1045 *new_def_stmt = new_stmt;
1048 /* Otherwise, OPRND is already set. */
1052 *new_type = interm_type;
1054 *new_type = half_type;
1057 *op1 = fold_convert (*new_type, const_oprnd);
1063 /* Try to find a statement or a sequence of statements that can be performed
1067 TYPE x_T, res0_T, res1_T;
1070 S2 x_T = (TYPE) x_t;
1071 S3 res0_T = op (x_T, C0);
1072 S4 res1_T = op (res0_T, C1);
1073 S5 ... = () res1_T; - type demotion
1075 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1077 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1078 be 'type' or some intermediate type. For now, we expect S5 to be a type
1079 demotion operation. We also check that S3 and S4 have only one use. */
1082 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1083 tree *type_in, tree *type_out)
1085 gimple stmt = VEC_pop (gimple, *stmts);
1086 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1087 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1088 imm_use_iterator imm_iter;
1089 use_operand_p use_p;
1091 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1093 struct loop *loop = (gimple_bb (stmt))->loop_father;
1099 if (!vinfo_for_stmt (stmt)
1100 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1103 new_def_stmt = NULL;
1104 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1105 &op0, &op1, &new_def_stmt,
1114 /* STMT can be performed on a smaller type. Check its uses. */
1115 lhs = gimple_assign_lhs (stmt);
1117 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1119 if (is_gimple_debug (USE_STMT (use_p)))
1121 use_stmt = USE_STMT (use_p);
1125 if (nuses != 1 || !is_gimple_assign (use_stmt)
1126 || !gimple_bb (use_stmt)
1127 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1130 /* Create pattern statement for STMT. */
1131 vectype = get_vectype_for_scalar_type (new_type);
1135 /* We want to collect all the statements for which we create pattern
1136 statetments, except for the case when the last statement in the
1137 sequence doesn't have a corresponding pattern statement. In such
1138 case we associate the last pattern statement with the last statement
1139 in the sequence. Therefore, we only add the original statement to
1140 the list if we know that it is not the last. */
1142 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1144 var = vect_recog_temp_ssa_var (new_type, NULL);
1146 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1148 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1149 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt))
1150 = gimple_seq_alloc_with_stmt (new_def_stmt);
1152 if (vect_print_dump_info (REPORT_DETAILS))
1154 fprintf (vect_dump, "created pattern stmt: ");
1155 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1158 type = gimple_expr_type (stmt);
1165 /* We got a sequence. We expect it to end with a type demotion operation.
1166 Otherwise, we quit (for now). There are three possible cases: the
1167 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1168 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1169 NEW_TYPE differs (we create a new conversion statement). */
1170 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1172 use_lhs = gimple_assign_lhs (use_stmt);
1173 use_type = TREE_TYPE (use_lhs);
1174 /* Support only type promotion or signedess change. Check that USE_TYPE
1175 is not bigger than the original type. */
1176 if (!INTEGRAL_TYPE_P (use_type)
1177 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
1178 || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
1181 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1182 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1184 /* Create NEW_TYPE->USE_TYPE conversion. */
1185 tmp = create_tmp_reg (use_type, NULL);
1186 add_referenced_var (tmp);
1187 new_oprnd = make_ssa_name (tmp, NULL);
1188 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1190 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1192 *type_in = get_vectype_for_scalar_type (new_type);
1193 *type_out = get_vectype_for_scalar_type (use_type);
1195 /* We created a pattern statement for the last statement in the
1196 sequence, so we don't need to associate it with the pattern
1197 statement created for PREV_STMT. Therefore, we add PREV_STMT
1198 to the list in order to mark it later in vect_pattern_recog_1. */
1200 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1205 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1206 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1209 *type_out = NULL_TREE;
1212 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1215 /* TODO: support general case, create a conversion to the correct type. */
1218 /* Pattern detected. */
1219 if (vect_print_dump_info (REPORT_DETAILS))
1221 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1222 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1225 return pattern_stmt;
1228 /* Detect widening shift pattern:
1234 S2 a_T = (TYPE) a_t;
1235 S3 res_T = a_T << CONST;
1237 where type 'TYPE' is at least double the size of type 'type'.
1239 Also detect unsigned cases:
1242 unsigned TYPE u_res_T;
1246 S2 a_T = (TYPE) a_t;
1247 S3 res_T = a_T << CONST;
1248 S4 u_res_T = (unsigned TYPE) res_T;
1250 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1251 create an additional pattern stmt for S2 to create a variable of an
1252 intermediate type, and perform widen-shift on the intermediate type:
1256 TYPE a_T, res_T, res_T';
1259 S2 a_T = (TYPE) a_t;
1260 '--> a_it = (interm_type) a_t;
1261 S3 res_T = a_T << CONST;
1262 '--> res_T' = a_it <<* CONST;
1266 * STMTS: Contains a stmt from which the pattern search begins.
1267 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1268 in STMTS. When an intermediate type is used and a pattern statement is
1269 created for S2, we also put S2 here (before S3).
1273 * TYPE_IN: The type of the input arguments to the pattern.
1275 * TYPE_OUT: The type of the output of this pattern.
1277 * Return value: A new stmt that will be used to replace the sequence of
1278 stmts that constitute the pattern. In this case it will be:
1279 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1282 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1283 tree *type_in, tree *type_out)
1285 gimple last_stmt = VEC_pop (gimple, *stmts);
1287 tree oprnd0, oprnd1;
1288 tree type, half_type0;
1289 gimple pattern_stmt, orig_stmt = NULL;
1290 tree vectype, vectype_out = NULL_TREE;
1293 enum tree_code dummy_code;
1295 VEC (tree, heap) * dummy_vec;
1296 gimple use_stmt = NULL;
1297 bool over_widen = false;
1299 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1302 orig_stmt = last_stmt;
1303 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1305 /* This statement was also detected as over-widening operation (it can't
1306 be any other pattern, because only over-widening detects shifts).
1307 LAST_STMT is the final type demotion statement, but its related
1308 statement is shift. We analyze the related statement to catch cases:
1315 S1 a_T = (TYPE) a_t;
1316 S2 res_T = a_T << CONST;
1317 S3 res = (itype)res_T;
1319 (size of type * 2 <= size of itype
1320 and size of itype * 2 <= size of TYPE)
1322 code after over-widening pattern detection:
1324 S1 a_T = (TYPE) a_t;
1325 --> a_it = (itype) a_t;
1326 S2 res_T = a_T << CONST;
1327 S3 res = (itype)res_T; <--- LAST_STMT
1328 --> res = a_it << CONST;
1332 S1 a_T = (TYPE) a_t;
1333 --> a_it = (itype) a_t; - redundant
1334 S2 res_T = a_T << CONST;
1335 S3 res = (itype)res_T;
1336 --> res = a_t w<< CONST;
1338 i.e., we replace the three statements with res = a_t w<< CONST. */
1339 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1343 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1346 oprnd0 = gimple_assign_rhs1 (last_stmt);
1347 oprnd1 = gimple_assign_rhs2 (last_stmt);
1348 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1351 /* Check operand 0: it has to be defined by a type promotion. */
1352 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1355 /* Check operand 1: has to be positive. We check that it fits the type
1356 in vect_handle_widen_op_by_const (). */
1357 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1360 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1361 type = gimple_expr_type (last_stmt);
1363 /* Check if this a widening operation. */
1364 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1366 type, &half_type0, def_stmt0))
1369 /* Handle unsigned case. Look for
1370 S4 u_res_T = (unsigned TYPE) res_T;
1371 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1372 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1374 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1375 imm_use_iterator imm_iter;
1376 use_operand_p use_p;
1382 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1383 We check here that TYPE is the correct type for the operation,
1384 i.e., it's the type of the original result. */
1385 tree orig_type = gimple_expr_type (orig_stmt);
1386 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1387 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1392 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1394 if (is_gimple_debug (USE_STMT (use_p)))
1396 use_stmt = USE_STMT (use_p);
1400 if (nuses != 1 || !is_gimple_assign (use_stmt)
1401 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1404 use_lhs = gimple_assign_lhs (use_stmt);
1405 use_type = TREE_TYPE (use_lhs);
1407 if (!INTEGRAL_TYPE_P (use_type)
1408 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1409 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1416 /* Pattern detected. */
1417 if (vect_print_dump_info (REPORT_DETAILS))
1418 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1420 /* Check target support. */
1421 vectype = get_vectype_for_scalar_type (half_type0);
1422 vectype_out = get_vectype_for_scalar_type (type);
1426 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1427 vectype_out, vectype,
1428 &dummy, &dummy, &dummy_code,
1429 &dummy_code, &dummy_int,
1434 *type_out = vectype_out;
1436 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1437 var = vect_recog_temp_ssa_var (type, NULL);
1439 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1441 if (vect_print_dump_info (REPORT_DETAILS))
1442 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1445 last_stmt = use_stmt;
1447 last_stmt = orig_stmt;
1449 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1450 return pattern_stmt;
1453 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1461 S3 res_T = b_T op a_t;
1463 where type 'TYPE' is a type with different size than 'type',
1464 and op is <<, >> or rotate.
1469 TYPE b_T, c_T, res_T;
1472 S1 a_t = (type) c_T;
1474 S3 res_T = b_T op a_t;
1478 * STMTS: Contains a stmt from which the pattern search begins,
1479 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1480 with a shift/rotate which has same type on both operands, in the
1481 second case just b_T op c_T, in the first case with added cast
1482 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1486 * TYPE_IN: The type of the input arguments to the pattern.
1488 * TYPE_OUT: The type of the output of this pattern.
1490 * Return value: A new stmt that will be used to replace the shift/rotate
1494 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1495 tree *type_in, tree *type_out)
1497 gimple last_stmt = VEC_pop (gimple, *stmts);
1498 tree oprnd0, oprnd1, lhs, var;
1499 gimple pattern_stmt, def_stmt;
1500 enum tree_code rhs_code;
1501 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1502 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1503 enum vect_def_type dt;
1506 if (!is_gimple_assign (last_stmt))
1509 rhs_code = gimple_assign_rhs_code (last_stmt);
1521 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1524 lhs = gimple_assign_lhs (last_stmt);
1525 oprnd0 = gimple_assign_rhs1 (last_stmt);
1526 oprnd1 = gimple_assign_rhs2 (last_stmt);
1527 if (TREE_CODE (oprnd0) != SSA_NAME
1528 || TREE_CODE (oprnd1) != SSA_NAME
1529 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1530 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1531 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1532 || TYPE_PRECISION (TREE_TYPE (lhs))
1533 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1536 if (!vect_is_simple_use (oprnd1, loop_vinfo, NULL, &def_stmt, &def, &dt))
1539 if (dt != vect_internal_def)
1542 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1543 *type_out = *type_in;
1544 if (*type_in == NULL_TREE)
1548 if (gimple_assign_cast_p (def_stmt))
1550 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1551 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1552 && TYPE_PRECISION (TREE_TYPE (rhs1))
1553 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1557 if (def == NULL_TREE)
1559 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1560 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1562 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)
1563 = gimple_seq_alloc_with_stmt (def_stmt);
1566 /* Pattern detected. */
1567 if (vect_print_dump_info (REPORT_DETAILS))
1568 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1570 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1571 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1572 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1577 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1578 return pattern_stmt;
1581 /* Detect a signed division by power of two constant that wouldn't be
1582 otherwise vectorized:
1588 where type 'type' is a signed integral type and N is a constant positive
1591 Similarly handle signed modulo by power of two constant:
1597 * STMTS: Contains a stmt from which the pattern search begins,
1598 i.e. the division stmt. S1 is replaced by:
1599 S3 y_t = b_t < 0 ? N - 1 : 0;
1601 S1' a_t = x_t >> log2 (N);
1603 S4 is replaced by (where *_T temporaries have unsigned type):
1604 S9 y_T = b_t < 0 ? -1U : 0U;
1605 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1606 S7 z_t = (type) z_T;
1608 S5 x_t = w_t & (N - 1);
1609 S4' a_t = x_t - z_t;
1613 * TYPE_IN: The type of the input arguments to the pattern.
1615 * TYPE_OUT: The type of the output of this pattern.
1617 * Return value: A new stmt that will be used to replace the division
1618 S1 or modulo S4 stmt. */
1621 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1622 tree *type_in, tree *type_out)
1624 gimple last_stmt = VEC_pop (gimple, *stmts);
1625 tree oprnd0, oprnd1, vectype, itype, cond;
1626 gimple pattern_stmt, def_stmt;
1627 enum tree_code rhs_code;
1628 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1629 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1632 if (!is_gimple_assign (last_stmt))
1635 rhs_code = gimple_assign_rhs_code (last_stmt);
1638 case TRUNC_DIV_EXPR:
1639 case TRUNC_MOD_EXPR:
1645 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1648 oprnd0 = gimple_assign_rhs1 (last_stmt);
1649 oprnd1 = gimple_assign_rhs2 (last_stmt);
1650 itype = TREE_TYPE (oprnd0);
1651 if (TREE_CODE (oprnd0) != SSA_NAME
1652 || TREE_CODE (oprnd1) != INTEGER_CST
1653 || TREE_CODE (itype) != INTEGER_TYPE
1654 || TYPE_UNSIGNED (itype)
1655 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1656 || !integer_pow2p (oprnd1)
1657 || tree_int_cst_sgn (oprnd1) != 1)
1660 vectype = get_vectype_for_scalar_type (itype);
1661 if (vectype == NULL_TREE)
1664 /* If the target can handle vectorized division or modulo natively,
1665 don't attempt to optimize this. */
1666 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1669 enum machine_mode vec_mode = TYPE_MODE (vectype);
1670 int icode = (int) optab_handler (optab, vec_mode);
1671 if (icode != CODE_FOR_nothing
1672 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1676 /* Pattern detected. */
1677 if (vect_print_dump_info (REPORT_DETAILS))
1678 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1680 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1681 if (rhs_code == TRUNC_DIV_EXPR)
1683 tree var = vect_recog_temp_ssa_var (itype, NULL);
1685 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1686 fold_build2 (MINUS_EXPR, itype,
1688 build_int_cst (itype,
1690 build_int_cst (itype, 0));
1691 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)
1692 = gimple_seq_alloc_with_stmt (def_stmt);
1693 var = vect_recog_temp_ssa_var (itype, NULL);
1695 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1696 gimple_assign_lhs (def_stmt));
1697 gimplify_seq_add_stmt (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo),
1701 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1702 vect_recog_temp_ssa_var (itype, NULL),
1704 build_int_cst (itype,
1705 tree_log2 (oprnd1)));
1710 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1711 if (compare_tree_int (oprnd1, 2) == 0)
1713 signmask = vect_recog_temp_ssa_var (itype, NULL);
1715 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1716 build_int_cst (itype, 1),
1717 build_int_cst (itype, 0));
1718 gimplify_seq_add_stmt (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo),
1724 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1725 tree vecutype = get_vectype_for_scalar_type (utype);
1727 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1728 - tree_log2 (oprnd1));
1729 tree var = vect_recog_temp_ssa_var (utype, NULL);
1730 stmt_vec_info def_stmt_vinfo;
1733 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1734 build_int_cst (utype, -1),
1735 build_int_cst (utype, 0));
1736 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1737 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1738 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1739 gimplify_seq_add_stmt (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo),
1741 var = vect_recog_temp_ssa_var (utype, NULL);
1743 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1744 gimple_assign_lhs (def_stmt),
1746 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1747 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1748 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1749 gimplify_seq_add_stmt (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo),
1751 signmask = vect_recog_temp_ssa_var (itype, NULL);
1753 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1755 gimplify_seq_add_stmt (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo),
1759 = gimple_build_assign_with_ops (PLUS_EXPR,
1760 vect_recog_temp_ssa_var (itype, NULL),
1762 gimplify_seq_add_stmt (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo),
1765 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1766 vect_recog_temp_ssa_var (itype, NULL),
1767 gimple_assign_lhs (def_stmt),
1768 fold_build2 (MINUS_EXPR, itype,
1770 build_int_cst (itype,
1772 gimplify_seq_add_stmt (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo),
1776 = gimple_build_assign_with_ops (MINUS_EXPR,
1777 vect_recog_temp_ssa_var (itype, NULL),
1778 gimple_assign_lhs (def_stmt),
1782 if (vect_print_dump_info (REPORT_DETAILS))
1783 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1785 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1788 *type_out = vectype;
1789 return pattern_stmt;
1792 /* Function vect_recog_mixed_size_cond_pattern
1794 Try to find the following pattern:
1799 S1 a_T = x_t CMP y_t ? b_T : c_T;
1801 where type 'TYPE' is an integral type which has different size
1802 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1803 than 'type', the constants need to fit into an integer type
1804 with the same width as 'type'.
1808 * LAST_STMT: A stmt from which the pattern search begins.
1812 * TYPE_IN: The type of the input arguments to the pattern.
1814 * TYPE_OUT: The type of the output of this pattern.
1816 * Return value: A new stmt that will be used to replace the pattern.
1817 Additionally a def_stmt is added.
1819 a_it = x_t CMP y_t ? b_it : c_it;
1820 a_T = (TYPE) a_it; */
1823 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1826 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1827 tree cond_expr, then_clause, else_clause;
1828 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1829 tree type, vectype, comp_vectype, itype, vecitype;
1830 enum machine_mode cmpmode;
1831 gimple pattern_stmt, def_stmt;
1832 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1834 if (!is_gimple_assign (last_stmt)
1835 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1836 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1839 cond_expr = gimple_assign_rhs1 (last_stmt);
1840 then_clause = gimple_assign_rhs2 (last_stmt);
1841 else_clause = gimple_assign_rhs3 (last_stmt);
1843 if (TREE_CODE (then_clause) != INTEGER_CST
1844 || TREE_CODE (else_clause) != INTEGER_CST)
1847 if (!COMPARISON_CLASS_P (cond_expr))
1851 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1852 if (comp_vectype == NULL_TREE)
1855 type = gimple_expr_type (last_stmt);
1856 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1858 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1861 vectype = get_vectype_for_scalar_type (type);
1862 if (vectype == NULL_TREE)
1865 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1868 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1869 TYPE_UNSIGNED (type));
1870 if (itype == NULL_TREE
1871 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1874 vecitype = get_vectype_for_scalar_type (itype);
1875 if (vecitype == NULL_TREE)
1878 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1881 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1883 if (!int_fits_type_p (then_clause, itype)
1884 || !int_fits_type_p (else_clause, itype))
1889 = gimple_build_assign_with_ops3 (COND_EXPR,
1890 vect_recog_temp_ssa_var (itype, NULL),
1891 unshare_expr (cond_expr),
1892 fold_convert (itype, then_clause),
1893 fold_convert (itype, else_clause));
1895 = gimple_build_assign_with_ops (NOP_EXPR,
1896 vect_recog_temp_ssa_var (type, NULL),
1897 gimple_assign_lhs (def_stmt), NULL_TREE);
1899 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)
1900 = gimple_seq_alloc_with_stmt (def_stmt);
1901 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1902 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1903 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1904 *type_in = vecitype;
1905 *type_out = vectype;
1907 return pattern_stmt;
1911 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1912 true if bool VAR can be optimized that way. */
1915 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1918 enum vect_def_type dt;
1920 enum tree_code rhs_code;
1922 if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
1925 if (dt != vect_internal_def)
1928 if (!is_gimple_assign (def_stmt))
1931 if (!has_single_use (def))
1934 rhs1 = gimple_assign_rhs1 (def_stmt);
1935 rhs_code = gimple_assign_rhs_code (def_stmt);
1939 return check_bool_pattern (rhs1, loop_vinfo);
1942 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1943 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1944 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1946 return check_bool_pattern (rhs1, loop_vinfo);
1949 return check_bool_pattern (rhs1, loop_vinfo);
1954 if (!check_bool_pattern (rhs1, loop_vinfo))
1956 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1959 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1961 tree vecitype, comp_vectype;
1963 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1964 if (comp_vectype == NULL_TREE)
1967 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1969 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1971 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1972 vecitype = get_vectype_for_scalar_type (itype);
1973 if (vecitype == NULL_TREE)
1977 vecitype = comp_vectype;
1978 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1985 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1986 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1987 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
1990 adjust_bool_pattern_cast (tree type, tree var)
1992 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
1993 gimple cast_stmt, pattern_stmt;
1995 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
1996 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1997 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)
1998 = gimple_seq_alloc_with_stmt (pattern_stmt);
2000 = gimple_build_assign_with_ops (NOP_EXPR,
2001 vect_recog_temp_ssa_var (type, NULL),
2002 gimple_assign_lhs (pattern_stmt),
2004 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2005 return gimple_assign_lhs (cast_stmt);
2009 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2010 recursively. VAR is an SSA_NAME that should be transformed from bool
2011 to a wider integer type, OUT_TYPE is the desired final integer type of
2012 the whole pattern, TRUEVAL should be NULL unless optimizing
2013 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2014 in the then_clause, STMTS is where statements with added pattern stmts
2015 should be pushed to. */
2018 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2019 VEC (gimple, heap) **stmts)
2021 gimple stmt = SSA_NAME_DEF_STMT (var);
2022 enum tree_code rhs_code, def_rhs_code;
2023 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2025 gimple pattern_stmt, def_stmt;
2027 rhs1 = gimple_assign_rhs1 (stmt);
2028 rhs2 = gimple_assign_rhs2 (stmt);
2029 rhs_code = gimple_assign_rhs_code (stmt);
2030 loc = gimple_location (stmt);
2035 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2036 itype = TREE_TYPE (irhs1);
2038 = gimple_build_assign_with_ops (SSA_NAME,
2039 vect_recog_temp_ssa_var (itype, NULL),
2044 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2045 itype = TREE_TYPE (irhs1);
2047 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2048 vect_recog_temp_ssa_var (itype, NULL),
2049 irhs1, build_int_cst (itype, 1));
2053 /* Try to optimize x = y & (a < b ? 1 : 0); into
2054 x = (a < b ? y : 0);
2060 S1 a_b = x1 CMP1 y1;
2061 S2 b_b = x2 CMP2 y2;
2063 S4 d_T = (TYPE) c_b;
2065 we would normally emit:
2067 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2068 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2069 S3' c_T = a_T & b_T;
2072 but we can save one stmt by using the
2073 result of one of the COND_EXPRs in the other COND_EXPR and leave
2074 BIT_AND_EXPR stmt out:
2076 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2077 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2080 At least when VEC_COND_EXPR is implemented using masks
2081 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2082 computes the comparison masks and ands it, in one case with
2083 all ones vector, in the other case with a vector register.
2084 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2085 often more expensive. */
2086 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2087 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2088 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2090 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2091 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2092 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2093 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2096 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2097 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2098 tstmt = VEC_pop (gimple, *stmts);
2099 gcc_assert (tstmt == def_stmt);
2100 VEC_quick_push (gimple, *stmts, stmt);
2101 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2102 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2103 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2104 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2108 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2111 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2112 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2113 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2115 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2116 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2117 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2118 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2121 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2122 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2123 tstmt = VEC_pop (gimple, *stmts);
2124 gcc_assert (tstmt == def_stmt);
2125 VEC_quick_push (gimple, *stmts, stmt);
2126 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2127 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2128 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2129 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2133 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2139 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2140 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2142 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2143 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2145 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2146 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2147 int out_prec = TYPE_PRECISION (out_type);
2148 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2149 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2150 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2151 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2154 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2155 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2158 itype = TREE_TYPE (irhs1);
2160 = gimple_build_assign_with_ops (rhs_code,
2161 vect_recog_temp_ssa_var (itype, NULL),
2166 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2167 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2168 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2170 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2172 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2175 itype = TREE_TYPE (rhs1);
2176 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2177 if (trueval == NULL_TREE)
2178 trueval = build_int_cst (itype, 1);
2180 gcc_checking_assert (useless_type_conversion_p (itype,
2181 TREE_TYPE (trueval)));
2183 = gimple_build_assign_with_ops3 (COND_EXPR,
2184 vect_recog_temp_ssa_var (itype, NULL),
2186 build_int_cst (itype, 0));
2190 VEC_safe_push (gimple, heap, *stmts, stmt);
2191 gimple_set_location (pattern_stmt, loc);
2192 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2193 return gimple_assign_lhs (pattern_stmt);
2197 /* Function vect_recog_bool_pattern
2199 Try to find pattern like following:
2201 bool a_b, b_b, c_b, d_b, e_b;
2204 S1 a_b = x1 CMP1 y1;
2205 S2 b_b = x2 CMP2 y2;
2207 S4 d_b = x3 CMP3 y3;
2209 S6 f_T = (TYPE) e_b;
2211 where type 'TYPE' is an integral type.
2215 * LAST_STMT: A stmt at the end from which the pattern
2216 search begins, i.e. cast of a bool to
2221 * TYPE_IN: The type of the input arguments to the pattern.
2223 * TYPE_OUT: The type of the output of this pattern.
2225 * Return value: A new stmt that will be used to replace the pattern.
2227 Assuming size of TYPE is the same as size of all comparisons
2228 (otherwise some casts would be added where needed), the above
2229 sequence we create related pattern stmts:
2230 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2231 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2232 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2233 S5' e_T = c_T | d_T;
2236 Instead of the above S3' we could emit:
2237 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2238 S3' c_T = a_T | b_T;
2239 but the above is more efficient. */
2242 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2245 gimple last_stmt = VEC_pop (gimple, *stmts);
2246 enum tree_code rhs_code;
2247 tree var, lhs, rhs, vectype;
2248 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2249 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2250 gimple pattern_stmt;
2252 if (!is_gimple_assign (last_stmt))
2255 var = gimple_assign_rhs1 (last_stmt);
2256 lhs = gimple_assign_lhs (last_stmt);
2258 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2259 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2260 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2263 rhs_code = gimple_assign_rhs_code (last_stmt);
2264 if (CONVERT_EXPR_CODE_P (rhs_code))
2266 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2267 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2269 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2270 if (vectype == NULL_TREE)
2273 if (!check_bool_pattern (var, loop_vinfo))
2276 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2277 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2278 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2280 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2283 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2284 *type_out = vectype;
2286 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2287 return pattern_stmt;
2289 else if (rhs_code == SSA_NAME
2290 && STMT_VINFO_DATA_REF (stmt_vinfo))
2292 stmt_vec_info pattern_stmt_info;
2293 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2294 gcc_assert (vectype != NULL_TREE);
2295 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2297 if (!check_bool_pattern (var, loop_vinfo))
2300 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2301 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2302 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2304 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2306 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2307 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)
2308 = gimple_seq_alloc_with_stmt (cast_stmt);
2312 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2313 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2314 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2315 STMT_VINFO_DATA_REF (pattern_stmt_info)
2316 = STMT_VINFO_DATA_REF (stmt_vinfo);
2317 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2318 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2319 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2320 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2321 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2322 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2323 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2324 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2325 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2326 *type_out = vectype;
2328 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2329 return pattern_stmt;
2336 /* Mark statements that are involved in a pattern. */
2339 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2340 tree pattern_vectype)
2342 stmt_vec_info pattern_stmt_info, def_stmt_info;
2343 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2344 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2347 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2348 if (pattern_stmt_info == NULL)
2350 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2351 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2353 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2355 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2356 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2357 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2358 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2359 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2360 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2361 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2362 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2363 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2365 gimple_stmt_iterator si;
2366 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2367 !gsi_end_p (si); gsi_next (&si))
2369 def_stmt = gsi_stmt (si);
2370 def_stmt_info = vinfo_for_stmt (def_stmt);
2371 if (def_stmt_info == NULL)
2373 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2374 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2376 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2377 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2378 STMT_VINFO_DEF_TYPE (def_stmt_info)
2379 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2380 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2381 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2386 /* Function vect_pattern_recog_1
2389 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2390 computation pattern.
2391 STMT: A stmt from which the pattern search should start.
2393 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2394 expression that computes the same functionality and can be used to
2395 replace the sequence of stmts that are involved in the pattern.
2398 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2399 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2400 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2401 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2402 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2403 to the available target pattern.
2405 This function also does some bookkeeping, as explained in the documentation
2406 for vect_recog_pattern. */
2409 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2410 gimple_stmt_iterator si,
2411 VEC (gimple, heap) **stmts_to_replace)
2413 gimple stmt = gsi_stmt (si), pattern_stmt;
2414 stmt_vec_info stmt_info;
2415 loop_vec_info loop_vinfo;
2416 tree pattern_vectype;
2417 tree type_in, type_out;
2418 enum tree_code code;
2422 VEC_truncate (gimple, *stmts_to_replace, 0);
2423 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2424 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2428 stmt = VEC_last (gimple, *stmts_to_replace);
2429 stmt_info = vinfo_for_stmt (stmt);
2430 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2432 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2434 /* No need to check target support (already checked by the pattern
2435 recognition function). */
2436 pattern_vectype = type_out ? type_out : type_in;
2440 enum machine_mode vec_mode;
2441 enum insn_code icode;
2444 /* Check target support */
2445 type_in = get_vectype_for_scalar_type (type_in);
2449 type_out = get_vectype_for_scalar_type (type_out);
2454 pattern_vectype = type_out;
2456 if (is_gimple_assign (pattern_stmt))
2457 code = gimple_assign_rhs_code (pattern_stmt);
2460 gcc_assert (is_gimple_call (pattern_stmt));
2464 optab = optab_for_tree_code (code, type_in, optab_default);
2465 vec_mode = TYPE_MODE (type_in);
2467 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2468 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2472 /* Found a vectorizable pattern. */
2473 if (vect_print_dump_info (REPORT_DETAILS))
2475 fprintf (vect_dump, "pattern recognized: ");
2476 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2479 /* Mark the stmts that are involved in the pattern. */
2480 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2482 /* Patterns cannot be vectorized using SLP, because they change the order of
2484 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2486 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2488 /* It is possible that additional pattern stmts are created and inserted in
2489 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2490 relevant statements. */
2491 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2492 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2495 stmt_info = vinfo_for_stmt (stmt);
2496 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2497 if (vect_print_dump_info (REPORT_DETAILS))
2499 fprintf (vect_dump, "additional pattern stmt: ");
2500 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2503 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2508 /* Function vect_pattern_recog
2511 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2514 Output - for each computation idiom that is detected we create a new stmt
2515 that provides the same functionality and that can be vectorized. We
2516 also record some information in the struct_stmt_info of the relevant
2517 stmts, as explained below:
2519 At the entry to this function we have the following stmts, with the
2520 following initial value in the STMT_VINFO fields:
2522 stmt in_pattern_p related_stmt vec_stmt
2523 S1: a_i = .... - - -
2524 S2: a_2 = ..use(a_i).. - - -
2525 S3: a_1 = ..use(a_2).. - - -
2526 S4: a_0 = ..use(a_1).. - - -
2527 S5: ... = ..use(a_0).. - - -
2529 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2530 represented by a single stmt. We then:
2531 - create a new stmt S6 equivalent to the pattern (the stmt is not
2532 inserted into the code)
2533 - fill in the STMT_VINFO fields as follows:
2535 in_pattern_p related_stmt vec_stmt
2536 S1: a_i = .... - - -
2537 S2: a_2 = ..use(a_i).. - - -
2538 S3: a_1 = ..use(a_2).. - - -
2539 S4: a_0 = ..use(a_1).. true S6 -
2540 '---> S6: a_new = .... - S4 -
2541 S5: ... = ..use(a_0).. - - -
2543 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2544 to each other through the RELATED_STMT field).
2546 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2547 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2548 remain irrelevant unless used by stmts other than S4.
2550 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2551 (because they are marked as irrelevant). It will vectorize S6, and record
2552 a pointer to the new vector stmt VS6 from S6 (as usual).
2553 S4 will be skipped, and S5 will be vectorized as usual:
2555 in_pattern_p related_stmt vec_stmt
2556 S1: a_i = .... - - -
2557 S2: a_2 = ..use(a_i).. - - -
2558 S3: a_1 = ..use(a_2).. - - -
2559 > VS6: va_new = .... - - -
2560 S4: a_0 = ..use(a_1).. true S6 VS6
2561 '---> S6: a_new = .... - S4 VS6
2562 > VS5: ... = ..vuse(va_new).. - - -
2563 S5: ... = ..use(a_0).. - - -
2565 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2566 elsewhere), and we'll end up with:
2569 VS5: ... = ..vuse(va_new)..
2571 In case of more than one pattern statements, e.g., widen-mult with
2575 S2 a_T = (TYPE) a_t;
2576 '--> S3: a_it = (interm_type) a_t;
2577 S4 prod_T = a_T * CONST;
2578 '--> S5: prod_T' = a_it w* CONST;
2580 there may be other users of a_T outside the pattern. In that case S2 will
2581 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2582 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2583 be recorded in S3. */
2586 vect_pattern_recog (loop_vec_info loop_vinfo)
2588 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2589 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2590 unsigned int nbbs = loop->num_nodes;
2591 gimple_stmt_iterator si;
2593 vect_recog_func_ptr vect_recog_func;
2594 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2596 if (vect_print_dump_info (REPORT_DETAILS))
2597 fprintf (vect_dump, "=== vect_pattern_recog ===");
2599 /* Scan through the loop stmts, applying the pattern recognition
2600 functions starting at each stmt visited: */
2601 for (i = 0; i < nbbs; i++)
2603 basic_block bb = bbs[i];
2604 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2606 /* Scan over all generic vect_recog_xxx_pattern functions. */
2607 for (j = 0; j < NUM_PATTERNS; j++)
2609 vect_recog_func = vect_vect_recog_func_ptrs[j];
2610 vect_pattern_recog_1 (vect_recog_func, si,
2616 VEC_free (gimple, heap, stmts_to_replace);