1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "insn-codes.h"
28 #include "diagnostic.h"
31 #include "langhooks.h"
32 #include "tree-flow.h"
33 #include "tree-gimple.h"
34 #include "tree-iterator.h"
35 #include "tree-pass.h"
40 /* Build a constant of type TYPE, made of VALUE's bits replicated
41 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
43 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
45 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
46 int n = HOST_BITS_PER_WIDE_INT / width;
47 unsigned HOST_WIDE_INT low, high, mask;
52 if (width == HOST_BITS_PER_WIDE_INT)
56 mask = ((HOST_WIDE_INT)1 << width) - 1;
57 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
60 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
61 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
62 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
64 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
69 ret = build_int_cst_wide (type, low, high);
73 static GTY(()) tree vector_inner_type;
74 static GTY(()) tree vector_last_type;
75 static GTY(()) int vector_last_nunits;
77 /* Return a suitable vector types made of SUBPARTS units each of mode
78 "word_mode" (the global variable). */
80 build_word_mode_vector_type (int nunits)
82 if (!vector_inner_type)
83 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
84 else if (vector_last_nunits == nunits)
86 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
87 return vector_last_type;
90 /* We build a new type, but we canonicalize it nevertheless,
91 because it still saves some memory. */
92 vector_last_nunits = nunits;
93 vector_last_type = type_hash_canon (nunits,
94 build_vector_type (vector_inner_type,
96 return vector_last_type;
99 typedef tree (*elem_op_func) (block_stmt_iterator *,
100 tree, tree, tree, tree, tree, enum tree_code);
103 tree_vec_extract (block_stmt_iterator *bsi, tree type,
104 tree t, tree bitsize, tree bitpos)
107 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
109 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
113 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
114 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
117 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
118 return gimplify_build1 (bsi, code, inner_type, a);
122 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
123 tree bitpos, tree bitsize, enum tree_code code)
125 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
126 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
127 return gimplify_build2 (bsi, code, inner_type, a, b);
130 /* Expand vector addition to scalars. This does bit twiddling
131 in order to increase parallelism:
133 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
136 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
137 (a ^ ~b) & 0x80808080
139 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
141 This optimization should be done only if 4 vector items or more
144 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
145 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
148 tree inner_type = TREE_TYPE (TREE_TYPE (a));
149 unsigned HOST_WIDE_INT max;
150 tree low_bits, high_bits, a_low, b_low, result_low, signs;
152 max = GET_MODE_MASK (TYPE_MODE (inner_type));
153 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
154 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
156 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
157 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
159 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
160 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
161 if (code == PLUS_EXPR)
162 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
165 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
166 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
169 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
170 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
171 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
175 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
176 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
177 tree bitsize ATTRIBUTE_UNUSED,
178 enum tree_code code ATTRIBUTE_UNUSED)
180 tree inner_type = TREE_TYPE (TREE_TYPE (b));
182 tree low_bits, high_bits, b_low, result_low, signs;
184 max = GET_MODE_MASK (TYPE_MODE (inner_type));
185 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
186 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
188 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
190 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
191 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
192 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
193 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
194 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
197 /* Expand a vector operation to scalars, by using many operations
198 whose type is the vector type's inner type. */
200 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
201 tree type, tree inner_type,
202 tree a, tree b, enum tree_code code)
204 VEC(constructor_elt,gc) *v;
205 tree part_width = TYPE_SIZE (inner_type);
206 tree index = bitsize_int (0);
207 int nunits = TYPE_VECTOR_SUBPARTS (type);
208 int delta = tree_low_cst (part_width, 1)
209 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
212 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
213 for (i = 0; i < nunits;
214 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
216 tree result = f (bsi, inner_type, a, b, index, part_width, code);
217 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
218 ce->index = NULL_TREE;
222 return build_constructor (type, v);
225 /* Expand a vector operation to scalars with the freedom to use
226 a scalar integer type, or to use a different size for the items
227 in the vector type. */
229 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
233 tree result, compute_type;
234 enum machine_mode mode;
235 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
237 /* We have three strategies. If the type is already correct, just do
238 the operation an element at a time. Else, if the vector is wider than
239 one word, do it a word at a time; finally, if the vector is smaller
240 than one word, do it as a scalar. */
241 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
242 return expand_vector_piecewise (bsi, f,
243 type, TREE_TYPE (type),
245 else if (n_words > 1)
247 tree word_type = build_word_mode_vector_type (n_words);
248 result = expand_vector_piecewise (bsi, f,
249 word_type, TREE_TYPE (word_type),
251 result = gimplify_val (bsi, word_type, result);
255 /* Use a single scalar operation with a mode no wider than word_mode. */
256 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
257 compute_type = lang_hooks.types.type_for_mode (mode, 1);
258 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
264 /* Expand a vector operation to scalars; for integer types we can use
265 special bit twiddling tricks to do the sums a word at a time, using
266 function F_PARALLEL instead of F. These tricks are done only if
267 they can process at least four items, that is, only if the vector
268 holds at least four items and if a word can hold four items. */
270 expand_vector_addition (block_stmt_iterator *bsi,
271 elem_op_func f, elem_op_func f_parallel,
272 tree type, tree a, tree b, enum tree_code code)
274 int parts_per_word = UNITS_PER_WORD
275 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
277 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
278 && parts_per_word >= 4
279 && TYPE_VECTOR_SUBPARTS (type) >= 4)
280 return expand_vector_parallel (bsi, f_parallel,
283 return expand_vector_piecewise (bsi, f,
284 type, TREE_TYPE (type),
289 expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type,
290 tree rhs, enum tree_code code)
292 enum machine_mode compute_mode = TYPE_MODE (compute_type);
294 /* If the compute mode is not a vector mode (hence we are not decomposing
295 a BLKmode vector to smaller, hardware-supported vectors), we may want
296 to expand the operations in parallel. */
297 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
298 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
303 if (!TYPE_OVERFLOW_TRAPS (type))
304 return expand_vector_addition (bsi, do_binop, do_plus_minus, type,
305 TREE_OPERAND (rhs, 0),
306 TREE_OPERAND (rhs, 1), code);
310 if (!TYPE_OVERFLOW_TRAPS (type))
311 return expand_vector_addition (bsi, do_unop, do_negate, type,
312 TREE_OPERAND (rhs, 0),
319 return expand_vector_parallel (bsi, do_binop, type,
320 TREE_OPERAND (rhs, 0),
321 TREE_OPERAND (rhs, 1), code);
324 return expand_vector_parallel (bsi, do_unop, type,
325 TREE_OPERAND (rhs, 0),
332 if (TREE_CODE_CLASS (code) == tcc_unary)
333 return expand_vector_piecewise (bsi, do_unop, type, compute_type,
334 TREE_OPERAND (rhs, 0),
337 return expand_vector_piecewise (bsi, do_binop, type, compute_type,
338 TREE_OPERAND (rhs, 0),
339 TREE_OPERAND (rhs, 1), code);
342 /* Return a type for the widest vector mode whose components are of mode
343 INNER_MODE, or NULL_TREE if none is found. */
345 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
347 enum machine_mode best_mode = VOIDmode, mode;
350 if (SCALAR_FLOAT_MODE_P (inner_mode))
351 mode = MIN_MODE_VECTOR_FLOAT;
353 mode = MIN_MODE_VECTOR_INT;
355 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
356 if (GET_MODE_INNER (mode) == inner_mode
357 && GET_MODE_NUNITS (mode) > best_nunits
358 && op->handlers[mode].insn_code != CODE_FOR_nothing)
359 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
361 if (best_mode == VOIDmode)
364 return lang_hooks.types.type_for_mode (best_mode, 1);
367 /* Process one statement. If we identify a vector operation, expand it. */
370 expand_vector_operations_1 (block_stmt_iterator *bsi)
372 tree stmt = bsi_stmt (*bsi);
373 tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type;
375 enum machine_mode compute_mode;
378 switch (TREE_CODE (stmt))
381 stmt = TREE_OPERAND (stmt, 0);
382 if (!stmt || TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
387 case GIMPLE_MODIFY_STMT:
388 p_lhs = &GIMPLE_STMT_OPERAND (stmt, 0);
389 p_rhs = &GIMPLE_STMT_OPERAND (stmt, 1);
398 type = TREE_TYPE (rhs);
399 if (TREE_CODE (type) != VECTOR_TYPE)
402 code = TREE_CODE (rhs);
403 if (TREE_CODE_CLASS (code) != tcc_unary
404 && TREE_CODE_CLASS (code) != tcc_binary)
408 || code == FLOAT_EXPR
409 || code == FIX_TRUNC_EXPR
410 || code == VIEW_CONVERT_EXPR)
413 gcc_assert (code != CONVERT_EXPR);
415 /* The signedness is determined from input argument. */
416 if (code == VEC_UNPACK_FLOAT_HI_EXPR
417 || code == VEC_UNPACK_FLOAT_LO_EXPR)
418 type = TREE_TYPE (TREE_OPERAND (rhs, 0));
420 op = optab_for_tree_code (code, type);
422 /* For widening/narrowing vector operations, the relevant type is of the
423 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
424 calculated in the same way above. */
425 if (code == WIDEN_SUM_EXPR
426 || code == VEC_WIDEN_MULT_HI_EXPR
427 || code == VEC_WIDEN_MULT_LO_EXPR
428 || code == VEC_UNPACK_HI_EXPR
429 || code == VEC_UNPACK_LO_EXPR
430 || code == VEC_PACK_TRUNC_EXPR
431 || code == VEC_PACK_SAT_EXPR
432 || code == VEC_PACK_FIX_TRUNC_EXPR)
433 type = TREE_TYPE (TREE_OPERAND (rhs, 0));
435 /* Optabs will try converting a negation into a subtraction, so
436 look for it as well. TODO: negation of floating-point vectors
437 might be turned into an exclusive OR toggling the sign bit. */
439 && code == NEGATE_EXPR
440 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
441 op = optab_for_tree_code (MINUS_EXPR, type);
443 /* For very wide vectors, try using a smaller vector mode. */
445 if (TYPE_MODE (type) == BLKmode && op)
447 tree vector_compute_type
448 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
449 if (vector_compute_type != NULL_TREE)
450 compute_type = vector_compute_type;
453 /* If we are breaking a BLKmode vector into smaller pieces,
454 type_for_widest_vector_mode has already looked into the optab,
455 so skip these checks. */
456 if (compute_type == type)
458 compute_mode = TYPE_MODE (compute_type);
459 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
460 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
462 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
465 /* There is no operation in hardware, so fall back to scalars. */
466 compute_type = TREE_TYPE (type);
469 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
470 rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
471 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
474 *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs);
476 mark_stmt_modified (bsi_stmt (*bsi));
479 /* Use this to lower vector operations introduced by the vectorizer,
480 if it may need the bit-twiddling tricks implemented in this file. */
483 gate_expand_vector_operations (void)
485 return flag_tree_vectorize != 0;
489 expand_vector_operations (void)
491 block_stmt_iterator bsi;
496 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
498 expand_vector_operations_1 (&bsi);
499 update_stmt_if_modified (bsi_stmt (bsi));
505 struct tree_opt_pass pass_lower_vector =
507 "veclower", /* name */
509 expand_vector_operations, /* execute */
512 0, /* static_pass_number */
514 PROP_cfg, /* properties_required */
515 0, /* properties_provided */
516 0, /* properties_destroyed */
517 0, /* todo_flags_start */
518 TODO_dump_func | TODO_ggc_collect
519 | TODO_verify_stmts, /* todo_flags_finish */
523 struct tree_opt_pass pass_lower_vector_ssa =
525 "veclower2", /* name */
526 gate_expand_vector_operations, /* gate */
527 expand_vector_operations, /* execute */
530 0, /* static_pass_number */
532 PROP_cfg, /* properties_required */
533 0, /* properties_provided */
534 0, /* properties_destroyed */
535 0, /* todo_flags_start */
536 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
538 | TODO_verify_stmts | TODO_verify_flow,
542 #include "gt-tree-vect-generic.h"