1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
33 #include "diagnostic.h"
35 /* Need to include rtl.h, expr.h, etc. for optabs. */
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49 int n = HOST_BITS_PER_WIDE_INT / width;
50 unsigned HOST_WIDE_INT low, high, mask;
55 if (width == HOST_BITS_PER_WIDE_INT)
59 mask = ((HOST_WIDE_INT)1 << width) - 1;
60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
67 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
72 ret = build_int_cst_wide (type, low, high);
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
83 build_word_mode_vector_type (int nunits)
85 if (!vector_inner_type)
86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87 else if (vector_last_nunits == nunits)
89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90 return vector_last_type;
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits = nunits;
96 vector_last_type = type_hash_canon (nunits,
97 build_vector_type (vector_inner_type,
99 return vector_last_type;
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 tree, tree, tree, tree, tree, enum tree_code);
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 tree t, tree bitsize, tree bitpos)
110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121 return gimplify_build1 (gsi, code, inner_type, a);
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 tree bitpos, tree bitsize, enum tree_code code)
128 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
129 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
130 return gimplify_build2 (gsi, code, inner_type, a, b);
133 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
135 INNER_TYPE is the type of A and B elements
137 returned expression is of signed integer type with the
138 size equal to the size of INNER_TYPE. */
140 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
141 tree bitpos, tree bitsize, enum tree_code code)
145 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
146 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
148 comp_type = build_nonstandard_integer_type
149 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
151 return gimplify_build3 (gsi, COND_EXPR, comp_type,
152 fold_build2 (code, boolean_type_node, a, b),
153 build_int_cst (comp_type, -1),
154 build_int_cst (comp_type, 0));
157 /* Expand vector addition to scalars. This does bit twiddling
158 in order to increase parallelism:
160 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
164 (a ^ ~b) & 0x80808080
166 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
168 This optimization should be done only if 4 vector items or more
171 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
172 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
175 tree inner_type = TREE_TYPE (TREE_TYPE (a));
176 unsigned HOST_WIDE_INT max;
177 tree low_bits, high_bits, a_low, b_low, result_low, signs;
179 max = GET_MODE_MASK (TYPE_MODE (inner_type));
180 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
181 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
183 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
184 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
186 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
187 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
188 if (code == PLUS_EXPR)
189 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
192 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
193 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
196 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
197 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
198 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
202 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
203 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
204 tree bitsize ATTRIBUTE_UNUSED,
205 enum tree_code code ATTRIBUTE_UNUSED)
207 tree inner_type = TREE_TYPE (TREE_TYPE (b));
209 tree low_bits, high_bits, b_low, result_low, signs;
211 max = GET_MODE_MASK (TYPE_MODE (inner_type));
212 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
213 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
215 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
217 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
218 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
219 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
220 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
221 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
224 /* Expand a vector operation to scalars, by using many operations
225 whose type is the vector type's inner type. */
227 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
228 tree type, tree inner_type,
229 tree a, tree b, enum tree_code code)
231 VEC(constructor_elt,gc) *v;
232 tree part_width = TYPE_SIZE (inner_type);
233 tree index = bitsize_int (0);
234 int nunits = TYPE_VECTOR_SUBPARTS (type);
235 int delta = tree_low_cst (part_width, 1)
236 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
238 location_t loc = gimple_location (gsi_stmt (*gsi));
240 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
241 warning_at (loc, OPT_Wvector_operation_performance,
242 "vector operation will be expanded piecewise");
244 warning_at (loc, OPT_Wvector_operation_performance,
245 "vector operation will be expanded in parallel");
247 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
248 for (i = 0; i < nunits;
249 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
251 tree result = f (gsi, inner_type, a, b, index, part_width, code);
252 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
253 ce->index = NULL_TREE;
257 return build_constructor (type, v);
260 /* Expand a vector operation to scalars with the freedom to use
261 a scalar integer type, or to use a different size for the items
262 in the vector type. */
264 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
268 tree result, compute_type;
269 enum machine_mode mode;
270 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
271 location_t loc = gimple_location (gsi_stmt (*gsi));
273 /* We have three strategies. If the type is already correct, just do
274 the operation an element at a time. Else, if the vector is wider than
275 one word, do it a word at a time; finally, if the vector is smaller
276 than one word, do it as a scalar. */
277 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
278 return expand_vector_piecewise (gsi, f,
279 type, TREE_TYPE (type),
281 else if (n_words > 1)
283 tree word_type = build_word_mode_vector_type (n_words);
284 result = expand_vector_piecewise (gsi, f,
285 word_type, TREE_TYPE (word_type),
287 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
292 /* Use a single scalar operation with a mode no wider than word_mode. */
293 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
294 compute_type = lang_hooks.types.type_for_mode (mode, 1);
295 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
296 warning_at (loc, OPT_Wvector_operation_performance,
297 "vector operation will be expanded with a "
298 "single scalar operation");
304 /* Expand a vector operation to scalars; for integer types we can use
305 special bit twiddling tricks to do the sums a word at a time, using
306 function F_PARALLEL instead of F. These tricks are done only if
307 they can process at least four items, that is, only if the vector
308 holds at least four items and if a word can hold four items. */
310 expand_vector_addition (gimple_stmt_iterator *gsi,
311 elem_op_func f, elem_op_func f_parallel,
312 tree type, tree a, tree b, enum tree_code code)
314 int parts_per_word = UNITS_PER_WORD
315 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
317 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
318 && parts_per_word >= 4
319 && TYPE_VECTOR_SUBPARTS (type) >= 4)
320 return expand_vector_parallel (gsi, f_parallel,
323 return expand_vector_piecewise (gsi, f,
324 type, TREE_TYPE (type),
328 /* Check if vector VEC consists of all the equal elements and
329 that the number of elements corresponds to the type of VEC.
330 The function returns first element of the vector
331 or NULL_TREE if the vector is not uniform. */
333 uniform_vector_p (tree vec)
338 if (vec == NULL_TREE)
341 if (TREE_CODE (vec) == VECTOR_CST)
343 els = TREE_VECTOR_CST_ELTS (vec);
344 first = TREE_VALUE (els);
345 els = TREE_CHAIN (els);
347 for (t = els; t; t = TREE_CHAIN (t))
348 if (!operand_equal_p (first, TREE_VALUE (t), 0))
354 else if (TREE_CODE (vec) == CONSTRUCTOR)
356 first = error_mark_node;
358 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
365 if (!operand_equal_p (first, t, 0))
368 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
377 /* Try to expand vector comparison expression OP0 CODE OP1 by
378 querying optab if the following expression:
379 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
382 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
383 tree op1, enum tree_code code)
386 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
387 t = expand_vector_piecewise (gsi, do_compare, type,
388 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
396 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
397 gimple assign, enum tree_code code)
399 enum machine_mode compute_mode = TYPE_MODE (compute_type);
401 /* If the compute mode is not a vector mode (hence we are not decomposing
402 a BLKmode vector to smaller, hardware-supported vectors), we may want
403 to expand the operations in parallel. */
404 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
405 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
406 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
407 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
408 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
409 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
414 if (!TYPE_OVERFLOW_TRAPS (type))
415 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
416 gimple_assign_rhs1 (assign),
417 gimple_assign_rhs2 (assign), code);
421 if (!TYPE_OVERFLOW_TRAPS (type))
422 return expand_vector_addition (gsi, do_unop, do_negate, type,
423 gimple_assign_rhs1 (assign),
430 return expand_vector_parallel (gsi, do_binop, type,
431 gimple_assign_rhs1 (assign),
432 gimple_assign_rhs2 (assign), code);
435 return expand_vector_parallel (gsi, do_unop, type,
436 gimple_assign_rhs1 (assign),
453 tree rhs1 = gimple_assign_rhs1 (assign);
454 tree rhs2 = gimple_assign_rhs2 (assign);
456 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
462 if (TREE_CODE_CLASS (code) == tcc_unary)
463 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
464 gimple_assign_rhs1 (assign),
467 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
468 gimple_assign_rhs1 (assign),
469 gimple_assign_rhs2 (assign), code);
472 /* Return a type for the widest vector mode whose components are of mode
473 INNER_MODE, or NULL_TREE if none is found.
474 SATP is true for saturating fixed-point types. */
477 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp)
479 enum machine_mode best_mode = VOIDmode, mode;
482 if (SCALAR_FLOAT_MODE_P (inner_mode))
483 mode = MIN_MODE_VECTOR_FLOAT;
484 else if (SCALAR_FRACT_MODE_P (inner_mode))
485 mode = MIN_MODE_VECTOR_FRACT;
486 else if (SCALAR_UFRACT_MODE_P (inner_mode))
487 mode = MIN_MODE_VECTOR_UFRACT;
488 else if (SCALAR_ACCUM_MODE_P (inner_mode))
489 mode = MIN_MODE_VECTOR_ACCUM;
490 else if (SCALAR_UACCUM_MODE_P (inner_mode))
491 mode = MIN_MODE_VECTOR_UACCUM;
493 mode = MIN_MODE_VECTOR_INT;
495 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
496 if (GET_MODE_INNER (mode) == inner_mode
497 && GET_MODE_NUNITS (mode) > best_nunits
498 && optab_handler (op, mode) != CODE_FOR_nothing)
499 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
501 if (best_mode == VOIDmode)
505 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
506 if (ALL_FIXED_POINT_MODE_P (best_mode))
507 return lang_hooks.types.type_for_mode (best_mode, satp);
509 return lang_hooks.types.type_for_mode (best_mode, 1);
514 /* Build a reference to the element of the vector VECT. Function
515 returns either the element itself, either BIT_FIELD_REF, or an
516 ARRAY_REF expression.
518 GSI is requred to insert temporary variables while building a
519 refernece to the element of the vector VECT.
521 PTMPVEC is a pointer to the temporary variable for caching
522 purposes. In case when PTMPVEC is NULL new temporary variable
525 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
527 tree vect_type, vect_elt_type;
531 bool need_asgn = true;
532 unsigned int elements;
534 vect_type = TREE_TYPE (vect);
535 vect_elt_type = TREE_TYPE (vect_type);
536 elements = TYPE_VECTOR_SUBPARTS (vect_type);
538 if (TREE_CODE (idx) == INTEGER_CST)
540 unsigned HOST_WIDE_INT index;
542 /* Given that we're about to compute a binary modulus,
543 we don't care about the high bits of the value. */
544 index = TREE_INT_CST_LOW (idx);
545 if (!host_integerp (idx, 1) || index >= elements)
547 index &= elements - 1;
548 idx = build_int_cst (TREE_TYPE (idx), index);
551 /* When lowering a vector statement sequence do some easy
552 simplification by looking through intermediate vector results. */
553 if (TREE_CODE (vect) == SSA_NAME)
555 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
556 if (is_gimple_assign (def_stmt)
557 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
558 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
559 vect = gimple_assign_rhs1 (def_stmt);
562 if (TREE_CODE (vect) == VECTOR_CST)
565 tree vals = TREE_VECTOR_CST_ELTS (vect);
566 for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
568 return TREE_VALUE (vals);
569 return build_zero_cst (vect_elt_type);
571 else if (TREE_CODE (vect) == CONSTRUCTOR)
576 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
577 if (operand_equal_p (elt_i, idx, 0))
579 return build_zero_cst (vect_elt_type);
583 tree size = TYPE_SIZE (vect_elt_type);
584 tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx), idx, size);
585 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
590 tmpvec = create_tmp_var (vect_type, "vectmp");
592 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
601 TREE_ADDRESSABLE (tmpvec) = 1;
602 asgn = gimple_build_assign (tmpvec, vect);
603 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
606 arraytype = build_array_type_nelts (vect_elt_type, elements);
607 return build4 (ARRAY_REF, vect_elt_type,
608 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
609 idx, NULL_TREE, NULL_TREE);
612 /* Check if VEC_PERM_EXPR within the given setting is supported
613 by hardware, or lower it piecewise.
615 When VEC_PERM_EXPR has the same first and second operands:
616 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
617 {v0[mask[0]], v0[mask[1]], ...}
618 MASK and V0 must have the same number of elements.
620 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
621 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
622 V0 and V1 must have the same type. MASK, V0, V1 must have the
623 same number of arguments. */
626 lower_vec_perm (gimple_stmt_iterator *gsi)
628 gimple stmt = gsi_stmt (*gsi);
629 tree mask = gimple_assign_rhs3 (stmt);
630 tree vec0 = gimple_assign_rhs1 (stmt);
631 tree vec1 = gimple_assign_rhs2 (stmt);
632 tree vect_type = TREE_TYPE (vec0);
633 tree mask_type = TREE_TYPE (mask);
634 tree vect_elt_type = TREE_TYPE (vect_type);
635 tree mask_elt_type = TREE_TYPE (mask_type);
636 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
637 VEC(constructor_elt,gc) *v;
638 tree constr, t, si, i_val;
639 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
640 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
641 location_t loc = gimple_location (gsi_stmt (*gsi));
644 if (can_vec_perm_expr_p (vect_type, mask))
647 warning_at (loc, OPT_Wvector_operation_performance,
648 "vector shuffling operation will be expanded piecewise");
651 v = VEC_alloc (constructor_elt, gc, elements);
652 for (i = 0; i < elements; i++)
655 i_val = vector_element (gsi, mask, si, &masktmp);
657 if (TREE_CODE (i_val) == INTEGER_CST)
659 unsigned HOST_WIDE_INT index;
661 index = TREE_INT_CST_LOW (i_val);
662 if (!host_integerp (i_val, 1) || index >= elements)
663 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
665 if (two_operand_p && (index & elements) != 0)
666 t = vector_element (gsi, vec1, i_val, &vec1tmp);
668 t = vector_element (gsi, vec0, i_val, &vec0tmp);
670 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
671 true, GSI_SAME_STMT);
675 tree cond = NULL_TREE, v0_val;
679 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
680 build_int_cst (mask_elt_type, elements));
681 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
682 true, GSI_SAME_STMT);
685 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
686 build_int_cst (mask_elt_type, elements - 1));
687 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
688 true, GSI_SAME_STMT);
690 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
691 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
692 true, GSI_SAME_STMT);
698 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
699 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
700 true, GSI_SAME_STMT);
702 cond = fold_build2 (EQ_EXPR, boolean_type_node,
703 cond, build_zero_cst (mask_elt_type));
704 cond = fold_build3 (COND_EXPR, vect_elt_type,
705 cond, v0_val, v1_val);
706 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
707 true, GSI_SAME_STMT);
713 CONSTRUCTOR_APPEND_ELT (v, si, t);
716 constr = build_constructor (vect_type, v);
717 gimple_assign_set_rhs_from_tree (gsi, constr);
718 update_stmt (gsi_stmt (*gsi));
721 /* Process one statement. If we identify a vector operation, expand it. */
724 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
726 gimple stmt = gsi_stmt (*gsi);
727 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
729 enum machine_mode compute_mode;
731 enum gimple_rhs_class rhs_class;
734 if (gimple_code (stmt) != GIMPLE_ASSIGN)
737 code = gimple_assign_rhs_code (stmt);
738 rhs_class = get_gimple_rhs_class (code);
739 lhs = gimple_assign_lhs (stmt);
741 if (code == VEC_PERM_EXPR)
743 lower_vec_perm (gsi);
747 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
750 rhs1 = gimple_assign_rhs1 (stmt);
751 type = gimple_expr_type (stmt);
752 if (rhs_class == GIMPLE_BINARY_RHS)
753 rhs2 = gimple_assign_rhs2 (stmt);
755 if (TREE_CODE (type) != VECTOR_TYPE)
759 || code == FLOAT_EXPR
760 || code == FIX_TRUNC_EXPR
761 || code == VIEW_CONVERT_EXPR)
764 gcc_assert (code != CONVERT_EXPR);
766 /* The signedness is determined from input argument. */
767 if (code == VEC_UNPACK_FLOAT_HI_EXPR
768 || code == VEC_UNPACK_FLOAT_LO_EXPR)
769 type = TREE_TYPE (rhs1);
771 /* Choose between vector shift/rotate by vector and vector shift/rotate by
773 if (code == LSHIFT_EXPR
774 || code == RSHIFT_EXPR
775 || code == LROTATE_EXPR
776 || code == RROTATE_EXPR)
778 /* Check whether we have vector <op> {x,x,x,x} where x
779 could be a scalar variable or a constant. Transform
780 vector <op> {x,x,x,x} ==> vector <op> scalar. */
781 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
786 if ((TREE_CODE (rhs2) == VECTOR_CST
787 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
788 || (TREE_CODE (rhs2) == SSA_NAME
789 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
790 && gimple_assign_single_p (def_stmt)
791 && (first = uniform_vector_p
792 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
794 gimple_assign_set_rhs2 (stmt, first);
800 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
801 op = optab_for_tree_code (code, type, optab_vector);
804 op = optab_for_tree_code (code, type, optab_scalar);
806 /* The rtl expander will expand vector/scalar as vector/vector
807 if necessary. Don't bother converting the stmt here. */
809 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
810 op = optab_for_tree_code (code, type, optab_vector);
814 op = optab_for_tree_code (code, type, optab_default);
816 /* For widening/narrowing vector operations, the relevant type is of the
817 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
818 calculated in the same way above. */
819 if (code == WIDEN_SUM_EXPR
820 || code == VEC_WIDEN_MULT_HI_EXPR
821 || code == VEC_WIDEN_MULT_LO_EXPR
822 || code == VEC_UNPACK_HI_EXPR
823 || code == VEC_UNPACK_LO_EXPR
824 || code == VEC_PACK_TRUNC_EXPR
825 || code == VEC_PACK_SAT_EXPR
826 || code == VEC_PACK_FIX_TRUNC_EXPR
827 || code == VEC_WIDEN_LSHIFT_HI_EXPR
828 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
829 type = TREE_TYPE (rhs1);
831 /* Optabs will try converting a negation into a subtraction, so
832 look for it as well. TODO: negation of floating-point vectors
833 might be turned into an exclusive OR toggling the sign bit. */
835 && code == NEGATE_EXPR
836 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
837 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
839 /* For very wide vectors, try using a smaller vector mode. */
841 if (TYPE_MODE (type) == BLKmode && op)
843 tree vector_compute_type
844 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
845 TYPE_SATURATING (TREE_TYPE (type)));
846 if (vector_compute_type != NULL_TREE
847 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
848 < TYPE_VECTOR_SUBPARTS (compute_type)))
849 compute_type = vector_compute_type;
852 /* If we are breaking a BLKmode vector into smaller pieces,
853 type_for_widest_vector_mode has already looked into the optab,
854 so skip these checks. */
855 if (compute_type == type)
857 compute_mode = TYPE_MODE (compute_type);
858 if (VECTOR_MODE_P (compute_mode)
860 && optab_handler (op, compute_mode) != CODE_FOR_nothing)
863 /* There is no operation in hardware, so fall back to scalars. */
864 compute_type = TREE_TYPE (type);
867 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
868 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
870 /* Leave expression untouched for later expansion. */
871 if (new_rhs == NULL_TREE)
874 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
875 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
878 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
879 way to do it is change expand_vector_operation and its callees to
880 return a tree_code, RHS1 and RHS2 instead of a tree. */
881 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
882 update_stmt (gsi_stmt (*gsi));
885 /* Use this to lower vector operations introduced by the vectorizer,
886 if it may need the bit-twiddling tricks implemented in this file. */
889 gate_expand_vector_operations_ssa (void)
891 return optimize == 0;
895 expand_vector_operations (void)
897 gimple_stmt_iterator gsi;
899 bool cfg_changed = false;
903 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
905 expand_vector_operations_1 (&gsi);
906 /* ??? If we do not cleanup EH then we will ICE in
907 verification. But in reality we have created wrong-code
908 as we did not properly transition EH info and edges to
909 the piecewise computations. */
910 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
911 && gimple_purge_dead_eh_edges (bb))
916 return cfg_changed ? TODO_cleanup_cfg : 0;
919 struct gimple_opt_pass pass_lower_vector =
923 "veclower", /* name */
924 gate_expand_vector_operations_ssa, /* gate */
925 expand_vector_operations, /* execute */
928 0, /* static_pass_number */
930 PROP_cfg, /* properties_required */
931 0, /* properties_provided */
932 0, /* properties_destroyed */
933 0, /* todo_flags_start */
934 TODO_update_ssa /* todo_flags_finish */
936 | TODO_verify_stmts | TODO_verify_flow
941 struct gimple_opt_pass pass_lower_vector_ssa =
945 "veclower2", /* name */
947 expand_vector_operations, /* execute */
950 0, /* static_pass_number */
952 PROP_cfg, /* properties_required */
953 0, /* properties_provided */
954 0, /* properties_destroyed */
955 0, /* todo_flags_start */
956 TODO_update_ssa /* todo_flags_finish */
958 | TODO_verify_stmts | TODO_verify_flow
963 #include "gt-tree-vect-generic.h"