1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dan@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
27 #include "basic-block.h"
28 #include "tree-pretty-print.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-inline.h"
31 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
46 /* This is a simple global reassociation pass. It is, in part, based
47 on the LLVM pass of the same name (They do some things more/less
48 than we do, in different orders, etc).
50 It consists of five steps:
52 1. Breaking up subtract operations into addition + negate, where
53 it would promote the reassociation of adds.
55 2. Left linearization of the expression trees, so that (A+B)+(C+D)
56 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
57 During linearization, we place the operands of the binary
58 expressions into a vector of operand_entry_t
60 3. Optimization of the operand lists, eliminating things like a +
63 4. Rewrite the expression trees we linearized and optimized so
64 they are in proper rank order.
66 5. Repropagate negates, as nothing else will clean it up ATM.
68 A bit of theory on #4, since nobody seems to write anything down
69 about why it makes sense to do it the way they do it:
71 We could do this much nicer theoretically, but don't (for reasons
72 explained after how to do it theoretically nice :P).
74 In order to promote the most redundancy elimination, you want
75 binary expressions whose operands are the same rank (or
76 preferably, the same value) exposed to the redundancy eliminator,
77 for possible elimination.
79 So the way to do this if we really cared, is to build the new op
80 tree from the leaves to the roots, merging as you go, and putting the
81 new op on the end of the worklist, until you are left with one
82 thing on the worklist.
84 IE if you have to rewrite the following set of operands (listed with
85 rank in parentheses), with opcode PLUS_EXPR:
87 a (1), b (1), c (1), d (2), e (2)
90 We start with our merge worklist empty, and the ops list with all of
93 You want to first merge all leaves of the same rank, as much as
96 So first build a binary op of
98 mergetmp = a + b, and put "mergetmp" on the merge worklist.
100 Because there is no three operand form of PLUS_EXPR, c is not going to
101 be exposed to redundancy elimination as a rank 1 operand.
103 So you might as well throw it on the merge worklist (you could also
104 consider it to now be a rank two operand, and merge it with d and e,
105 but in this case, you then have evicted e from a binary op. So at
106 least in this situation, you can't win.)
108 Then build a binary op of d + e
111 and put mergetmp2 on the merge worklist.
113 so merge worklist = {mergetmp, c, mergetmp2}
115 Continue building binary ops of these operations until you have only
116 one operation left on the worklist.
121 mergetmp3 = mergetmp + c
123 worklist = {mergetmp2, mergetmp3}
125 mergetmp4 = mergetmp2 + mergetmp3
127 worklist = {mergetmp4}
129 because we have one operation left, we can now just set the original
130 statement equal to the result of that operation.
132 This will at least expose a + b and d + e to redundancy elimination
133 as binary operations.
135 For extra points, you can reuse the old statements to build the
136 mergetmps, since you shouldn't run out.
138 So why don't we do this?
140 Because it's expensive, and rarely will help. Most trees we are
141 reassociating have 3 or less ops. If they have 2 ops, they already
142 will be written into a nice single binary op. If you have 3 ops, a
143 single simple check suffices to tell you whether the first two are of the
144 same rank. If so, you know to order it
147 newstmt = mergetmp + op3
151 newstmt = mergetmp + op1
153 If all three are of the same rank, you can't expose them all in a
154 single binary operator anyway, so the above is *still* the best you
157 Thus, this is what we do. When we have three ops left, we check to see
158 what order to put them in, and call it a day. As a nod to vector sum
159 reduction, we check if any of the ops are really a phi node that is a
160 destructive update for the associating op, and keep the destructive
161 update together for vector sum reduction recognition. */
168 int constants_eliminated;
173 /* Operator, rank pair. */
174 typedef struct operand_entry
181 static alloc_pool operand_entry_pool;
183 /* This is used to assign a unique ID to each struct operand_entry
184 so that qsort results are identical on different hosts. */
185 static int next_operand_entry_id;
187 /* Starting rank number for a given basic block, so that we can rank
188 operations using unmovable instructions in that BB based on the bb
190 static long *bb_rank;
192 /* Operand->rank hashtable. */
193 static struct pointer_map_t *operand_rank;
196 static long get_rank (tree);
199 /* Bias amount for loop-carried phis. We want this to be larger than
200 the depth of any reassociation tree we can see, but not larger than
201 the rank difference between two blocks. */
202 #define PHI_LOOP_BIAS (1 << 15)
204 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
205 an innermost loop, and the phi has only a single use which is inside
206 the loop, then the rank is the block rank of the loop latch plus an
207 extra bias for the loop-carried dependence. This causes expressions
208 calculated into an accumulator variable to be independent for each
209 iteration of the loop. If STMT is some other phi, the rank is the
210 block rank of its containing block. */
212 phi_rank (gimple stmt)
214 basic_block bb = gimple_bb (stmt);
215 struct loop *father = bb->loop_father;
221 /* We only care about real loops (those with a latch). */
223 return bb_rank[bb->index];
225 /* Interesting phis must be in headers of innermost loops. */
226 if (bb != father->header
228 return bb_rank[bb->index];
230 /* Ignore virtual SSA_NAMEs. */
231 res = gimple_phi_result (stmt);
232 if (!is_gimple_reg (SSA_NAME_VAR (res)))
233 return bb_rank[bb->index];
235 /* The phi definition must have a single use, and that use must be
236 within the loop. Otherwise this isn't an accumulator pattern. */
237 if (!single_imm_use (res, &use, &use_stmt)
238 || gimple_bb (use_stmt)->loop_father != father)
239 return bb_rank[bb->index];
241 /* Look for phi arguments from within the loop. If found, bias this phi. */
242 for (i = 0; i < gimple_phi_num_args (stmt); i++)
244 tree arg = gimple_phi_arg_def (stmt, i);
245 if (TREE_CODE (arg) == SSA_NAME
246 && !SSA_NAME_IS_DEFAULT_DEF (arg))
248 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
249 if (gimple_bb (def_stmt)->loop_father == father)
250 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
254 /* Must be an uninteresting phi. */
255 return bb_rank[bb->index];
258 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
259 loop-carried dependence of an innermost loop, return TRUE; else
262 loop_carried_phi (tree exp)
267 if (TREE_CODE (exp) != SSA_NAME
268 || SSA_NAME_IS_DEFAULT_DEF (exp))
271 phi_stmt = SSA_NAME_DEF_STMT (exp);
273 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
276 /* Non-loop-carried phis have block rank. Loop-carried phis have
277 an additional bias added in. If this phi doesn't have block rank,
278 it's biased and should not be propagated. */
279 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
281 if (phi_rank (phi_stmt) != block_rank)
287 /* Return the maximum of RANK and the rank that should be propagated
288 from expression OP. For most operands, this is just the rank of OP.
289 For loop-carried phis, the value is zero to avoid undoing the bias
290 in favor of the phi. */
292 propagate_rank (long rank, tree op)
296 if (loop_carried_phi (op))
299 op_rank = get_rank (op);
301 return MAX (rank, op_rank);
304 /* Look up the operand rank structure for expression E. */
307 find_operand_rank (tree e)
309 void **slot = pointer_map_contains (operand_rank, e);
310 return slot ? (long) (intptr_t) *slot : -1;
313 /* Insert {E,RANK} into the operand rank hashtable. */
316 insert_operand_rank (tree e, long rank)
319 gcc_assert (rank > 0);
320 slot = pointer_map_insert (operand_rank, e);
322 *slot = (void *) (intptr_t) rank;
325 /* Given an expression E, return the rank of the expression. */
330 /* Constants have rank 0. */
331 if (is_gimple_min_invariant (e))
334 /* SSA_NAME's have the rank of the expression they are the result
336 For globals and uninitialized values, the rank is 0.
337 For function arguments, use the pre-setup rank.
338 For PHI nodes, stores, asm statements, etc, we use the rank of
340 For simple operations, the rank is the maximum rank of any of
341 its operands, or the bb_rank, whichever is less.
342 I make no claims that this is optimal, however, it gives good
345 /* We make an exception to the normal ranking system to break
346 dependences of accumulator variables in loops. Suppose we
347 have a simple one-block loop containing:
354 As shown, each iteration of the calculation into x is fully
355 dependent upon the iteration before it. We would prefer to
356 see this in the form:
363 If the loop is unrolled, the calculations of b and c from
364 different iterations can be interleaved.
366 To obtain this result during reassociation, we bias the rank
367 of the phi definition x_1 upward, when it is recognized as an
368 accumulator pattern. The artificial rank causes it to be
369 added last, providing the desired independence. */
371 if (TREE_CODE (e) == SSA_NAME)
378 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
379 && SSA_NAME_IS_DEFAULT_DEF (e))
380 return find_operand_rank (e);
382 stmt = SSA_NAME_DEF_STMT (e);
383 if (gimple_bb (stmt) == NULL)
386 if (gimple_code (stmt) == GIMPLE_PHI)
387 return phi_rank (stmt);
389 if (!is_gimple_assign (stmt)
390 || gimple_vdef (stmt))
391 return bb_rank[gimple_bb (stmt)->index];
393 /* If we already have a rank for this expression, use that. */
394 rank = find_operand_rank (e);
398 /* Otherwise, find the maximum rank for the operands. As an
399 exception, remove the bias from loop-carried phis when propagating
400 the rank so that dependent operations are not also biased. */
402 if (gimple_assign_single_p (stmt))
404 tree rhs = gimple_assign_rhs1 (stmt);
405 n = TREE_OPERAND_LENGTH (rhs);
407 rank = propagate_rank (rank, rhs);
410 for (i = 0; i < n; i++)
412 op = TREE_OPERAND (rhs, i);
415 rank = propagate_rank (rank, op);
421 n = gimple_num_ops (stmt);
422 for (i = 1; i < n; i++)
424 op = gimple_op (stmt, i);
426 rank = propagate_rank (rank, op);
430 if (dump_file && (dump_flags & TDF_DETAILS))
432 fprintf (dump_file, "Rank for ");
433 print_generic_expr (dump_file, e, 0);
434 fprintf (dump_file, " is %ld\n", (rank + 1));
437 /* Note the rank in the hashtable so we don't recompute it. */
438 insert_operand_rank (e, (rank + 1));
442 /* Globals, etc, are rank 0 */
446 DEF_VEC_P(operand_entry_t);
447 DEF_VEC_ALLOC_P(operand_entry_t, heap);
449 /* We want integer ones to end up last no matter what, since they are
450 the ones we can do the most with. */
451 #define INTEGER_CONST_TYPE 1 << 3
452 #define FLOAT_CONST_TYPE 1 << 2
453 #define OTHER_CONST_TYPE 1 << 1
455 /* Classify an invariant tree into integer, float, or other, so that
456 we can sort them to be near other constants of the same type. */
458 constant_type (tree t)
460 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
461 return INTEGER_CONST_TYPE;
462 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
463 return FLOAT_CONST_TYPE;
465 return OTHER_CONST_TYPE;
468 /* qsort comparison function to sort operand entries PA and PB by rank
469 so that the sorted array is ordered by rank in decreasing order. */
471 sort_by_operand_rank (const void *pa, const void *pb)
473 const operand_entry_t oea = *(const operand_entry_t *)pa;
474 const operand_entry_t oeb = *(const operand_entry_t *)pb;
476 /* It's nicer for optimize_expression if constants that are likely
477 to fold when added/multiplied//whatever are put next to each
478 other. Since all constants have rank 0, order them by type. */
479 if (oeb->rank == 0 && oea->rank == 0)
481 if (constant_type (oeb->op) != constant_type (oea->op))
482 return constant_type (oeb->op) - constant_type (oea->op);
484 /* To make sorting result stable, we use unique IDs to determine
486 return oeb->id - oea->id;
489 /* Lastly, make sure the versions that are the same go next to each
490 other. We use SSA_NAME_VERSION because it's stable. */
491 if ((oeb->rank - oea->rank == 0)
492 && TREE_CODE (oea->op) == SSA_NAME
493 && TREE_CODE (oeb->op) == SSA_NAME)
495 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
496 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
498 return oeb->id - oea->id;
501 if (oeb->rank != oea->rank)
502 return oeb->rank - oea->rank;
504 return oeb->id - oea->id;
507 /* Add an operand entry to *OPS for the tree operand OP. */
510 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
512 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
515 oe->rank = get_rank (op);
516 oe->id = next_operand_entry_id++;
517 VEC_safe_push (operand_entry_t, heap, *ops, oe);
520 /* Return true if STMT is reassociable operation containing a binary
521 operation with tree code CODE, and is inside LOOP. */
524 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
526 basic_block bb = gimple_bb (stmt);
528 if (gimple_bb (stmt) == NULL)
531 if (!flow_bb_inside_loop_p (loop, bb))
534 if (is_gimple_assign (stmt)
535 && gimple_assign_rhs_code (stmt) == code
536 && has_single_use (gimple_assign_lhs (stmt)))
543 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
544 operand of the negate operation. Otherwise, return NULL. */
547 get_unary_op (tree name, enum tree_code opcode)
549 gimple stmt = SSA_NAME_DEF_STMT (name);
551 if (!is_gimple_assign (stmt))
554 if (gimple_assign_rhs_code (stmt) == opcode)
555 return gimple_assign_rhs1 (stmt);
559 /* If CURR and LAST are a pair of ops that OPCODE allows us to
560 eliminate through equivalences, do so, remove them from OPS, and
561 return true. Otherwise, return false. */
564 eliminate_duplicate_pair (enum tree_code opcode,
565 VEC (operand_entry_t, heap) **ops,
568 operand_entry_t curr,
569 operand_entry_t last)
572 /* If we have two of the same op, and the opcode is & |, min, or max,
573 we can eliminate one of them.
574 If we have two of the same op, and the opcode is ^, we can
575 eliminate both of them. */
577 if (last && last->op == curr->op)
585 if (dump_file && (dump_flags & TDF_DETAILS))
587 fprintf (dump_file, "Equivalence: ");
588 print_generic_expr (dump_file, curr->op, 0);
589 fprintf (dump_file, " [&|minmax] ");
590 print_generic_expr (dump_file, last->op, 0);
591 fprintf (dump_file, " -> ");
592 print_generic_stmt (dump_file, last->op, 0);
595 VEC_ordered_remove (operand_entry_t, *ops, i);
596 reassociate_stats.ops_eliminated ++;
601 if (dump_file && (dump_flags & TDF_DETAILS))
603 fprintf (dump_file, "Equivalence: ");
604 print_generic_expr (dump_file, curr->op, 0);
605 fprintf (dump_file, " ^ ");
606 print_generic_expr (dump_file, last->op, 0);
607 fprintf (dump_file, " -> nothing\n");
610 reassociate_stats.ops_eliminated += 2;
612 if (VEC_length (operand_entry_t, *ops) == 2)
614 VEC_free (operand_entry_t, heap, *ops);
616 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
621 VEC_ordered_remove (operand_entry_t, *ops, i-1);
622 VEC_ordered_remove (operand_entry_t, *ops, i-1);
634 static VEC(tree, heap) *plus_negates;
636 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
637 expression, look in OPS for a corresponding positive operation to cancel
638 it out. If we find one, remove the other from OPS, replace
639 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
643 eliminate_plus_minus_pair (enum tree_code opcode,
644 VEC (operand_entry_t, heap) **ops,
645 unsigned int currindex,
646 operand_entry_t curr)
653 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
656 negateop = get_unary_op (curr->op, NEGATE_EXPR);
657 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
658 if (negateop == NULL_TREE && notop == NULL_TREE)
661 /* Any non-negated version will have a rank that is one less than
662 the current rank. So once we hit those ranks, if we don't find
665 for (i = currindex + 1;
666 VEC_iterate (operand_entry_t, *ops, i, oe)
667 && oe->rank >= curr->rank - 1 ;
670 if (oe->op == negateop)
673 if (dump_file && (dump_flags & TDF_DETAILS))
675 fprintf (dump_file, "Equivalence: ");
676 print_generic_expr (dump_file, negateop, 0);
677 fprintf (dump_file, " + -");
678 print_generic_expr (dump_file, oe->op, 0);
679 fprintf (dump_file, " -> 0\n");
682 VEC_ordered_remove (operand_entry_t, *ops, i);
683 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
684 VEC_ordered_remove (operand_entry_t, *ops, currindex);
685 reassociate_stats.ops_eliminated ++;
689 else if (oe->op == notop)
691 tree op_type = TREE_TYPE (oe->op);
693 if (dump_file && (dump_flags & TDF_DETAILS))
695 fprintf (dump_file, "Equivalence: ");
696 print_generic_expr (dump_file, notop, 0);
697 fprintf (dump_file, " + ~");
698 print_generic_expr (dump_file, oe->op, 0);
699 fprintf (dump_file, " -> -1\n");
702 VEC_ordered_remove (operand_entry_t, *ops, i);
703 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
704 VEC_ordered_remove (operand_entry_t, *ops, currindex);
705 reassociate_stats.ops_eliminated ++;
711 /* CURR->OP is a negate expr in a plus expr: save it for later
712 inspection in repropagate_negates(). */
713 if (negateop != NULL_TREE)
714 VEC_safe_push (tree, heap, plus_negates, curr->op);
719 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
720 bitwise not expression, look in OPS for a corresponding operand to
721 cancel it out. If we find one, remove the other from OPS, replace
722 OPS[CURRINDEX] with 0, and return true. Otherwise, return
726 eliminate_not_pairs (enum tree_code opcode,
727 VEC (operand_entry_t, heap) **ops,
728 unsigned int currindex,
729 operand_entry_t curr)
735 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
736 || TREE_CODE (curr->op) != SSA_NAME)
739 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
740 if (notop == NULL_TREE)
743 /* Any non-not version will have a rank that is one less than
744 the current rank. So once we hit those ranks, if we don't find
747 for (i = currindex + 1;
748 VEC_iterate (operand_entry_t, *ops, i, oe)
749 && oe->rank >= curr->rank - 1;
754 if (dump_file && (dump_flags & TDF_DETAILS))
756 fprintf (dump_file, "Equivalence: ");
757 print_generic_expr (dump_file, notop, 0);
758 if (opcode == BIT_AND_EXPR)
759 fprintf (dump_file, " & ~");
760 else if (opcode == BIT_IOR_EXPR)
761 fprintf (dump_file, " | ~");
762 print_generic_expr (dump_file, oe->op, 0);
763 if (opcode == BIT_AND_EXPR)
764 fprintf (dump_file, " -> 0\n");
765 else if (opcode == BIT_IOR_EXPR)
766 fprintf (dump_file, " -> -1\n");
769 if (opcode == BIT_AND_EXPR)
770 oe->op = build_zero_cst (TREE_TYPE (oe->op));
771 else if (opcode == BIT_IOR_EXPR)
772 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
773 TYPE_PRECISION (TREE_TYPE (oe->op)));
775 reassociate_stats.ops_eliminated
776 += VEC_length (operand_entry_t, *ops) - 1;
777 VEC_free (operand_entry_t, heap, *ops);
779 VEC_safe_push (operand_entry_t, heap, *ops, oe);
787 /* Use constant value that may be present in OPS to try to eliminate
788 operands. Note that this function is only really used when we've
789 eliminated ops for other reasons, or merged constants. Across
790 single statements, fold already does all of this, plus more. There
791 is little point in duplicating logic, so I've only included the
792 identities that I could ever construct testcases to trigger. */
795 eliminate_using_constants (enum tree_code opcode,
796 VEC(operand_entry_t, heap) **ops)
798 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
799 tree type = TREE_TYPE (oelast->op);
801 if (oelast->rank == 0
802 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
807 if (integer_zerop (oelast->op))
809 if (VEC_length (operand_entry_t, *ops) != 1)
811 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "Found & 0, removing all other ops\n");
814 reassociate_stats.ops_eliminated
815 += VEC_length (operand_entry_t, *ops) - 1;
817 VEC_free (operand_entry_t, heap, *ops);
819 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
823 else if (integer_all_onesp (oelast->op))
825 if (VEC_length (operand_entry_t, *ops) != 1)
827 if (dump_file && (dump_flags & TDF_DETAILS))
828 fprintf (dump_file, "Found & -1, removing\n");
829 VEC_pop (operand_entry_t, *ops);
830 reassociate_stats.ops_eliminated++;
835 if (integer_all_onesp (oelast->op))
837 if (VEC_length (operand_entry_t, *ops) != 1)
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "Found | -1, removing all other ops\n");
842 reassociate_stats.ops_eliminated
843 += VEC_length (operand_entry_t, *ops) - 1;
845 VEC_free (operand_entry_t, heap, *ops);
847 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
851 else if (integer_zerop (oelast->op))
853 if (VEC_length (operand_entry_t, *ops) != 1)
855 if (dump_file && (dump_flags & TDF_DETAILS))
856 fprintf (dump_file, "Found | 0, removing\n");
857 VEC_pop (operand_entry_t, *ops);
858 reassociate_stats.ops_eliminated++;
863 if (integer_zerop (oelast->op)
864 || (FLOAT_TYPE_P (type)
865 && !HONOR_NANS (TYPE_MODE (type))
866 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
867 && real_zerop (oelast->op)))
869 if (VEC_length (operand_entry_t, *ops) != 1)
871 if (dump_file && (dump_flags & TDF_DETAILS))
872 fprintf (dump_file, "Found * 0, removing all other ops\n");
874 reassociate_stats.ops_eliminated
875 += VEC_length (operand_entry_t, *ops) - 1;
876 VEC_free (operand_entry_t, heap, *ops);
878 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
882 else if (integer_onep (oelast->op)
883 || (FLOAT_TYPE_P (type)
884 && !HONOR_SNANS (TYPE_MODE (type))
885 && real_onep (oelast->op)))
887 if (VEC_length (operand_entry_t, *ops) != 1)
889 if (dump_file && (dump_flags & TDF_DETAILS))
890 fprintf (dump_file, "Found * 1, removing\n");
891 VEC_pop (operand_entry_t, *ops);
892 reassociate_stats.ops_eliminated++;
900 if (integer_zerop (oelast->op)
901 || (FLOAT_TYPE_P (type)
902 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
903 && fold_real_zero_addition_p (type, oelast->op,
904 opcode == MINUS_EXPR)))
906 if (VEC_length (operand_entry_t, *ops) != 1)
908 if (dump_file && (dump_flags & TDF_DETAILS))
909 fprintf (dump_file, "Found [|^+] 0, removing\n");
910 VEC_pop (operand_entry_t, *ops);
911 reassociate_stats.ops_eliminated++;
923 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
926 /* Structure for tracking and counting operands. */
927 typedef struct oecount_s {
930 enum tree_code oecode;
935 DEF_VEC_ALLOC_O(oecount,heap);
937 /* The heap for the oecount hashtable and the sorted list of operands. */
938 static VEC (oecount, heap) *cvec;
940 /* Hash function for oecount. */
943 oecount_hash (const void *p)
945 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
946 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
949 /* Comparison function for oecount. */
952 oecount_eq (const void *p1, const void *p2)
954 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
955 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
956 return (c1->oecode == c2->oecode
957 && c1->op == c2->op);
960 /* Comparison function for qsort sorting oecount elements by count. */
963 oecount_cmp (const void *p1, const void *p2)
965 const oecount *c1 = (const oecount *)p1;
966 const oecount *c2 = (const oecount *)p2;
967 if (c1->cnt != c2->cnt)
968 return c1->cnt - c2->cnt;
970 /* If counts are identical, use unique IDs to stabilize qsort. */
971 return c1->id - c2->id;
974 /* Walks the linear chain with result *DEF searching for an operation
975 with operand OP and code OPCODE removing that from the chain. *DEF
976 is updated if there is only one operand but no operation left. */
979 zero_one_operation (tree *def, enum tree_code opcode, tree op)
981 gimple stmt = SSA_NAME_DEF_STMT (*def);
985 tree name = gimple_assign_rhs1 (stmt);
987 /* If this is the operation we look for and one of the operands
988 is ours simply propagate the other operand into the stmts
990 if (gimple_assign_rhs_code (stmt) == opcode
992 || gimple_assign_rhs2 (stmt) == op))
996 gimple_stmt_iterator gsi;
998 name = gimple_assign_rhs2 (stmt);
999 gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
1000 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
1001 if (gimple_assign_lhs (stmt) == *def)
1003 SET_USE (use, name);
1004 if (TREE_CODE (name) != SSA_NAME)
1005 update_stmt (use_stmt);
1006 gsi = gsi_for_stmt (stmt);
1007 gsi_remove (&gsi, true);
1008 release_defs (stmt);
1012 /* Continue walking the chain. */
1013 gcc_assert (name != op
1014 && TREE_CODE (name) == SSA_NAME);
1015 stmt = SSA_NAME_DEF_STMT (name);
1020 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1021 the result. Places the statement after the definition of either
1022 OP1 or OP2. Returns the new statement. */
1025 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1027 gimple op1def = NULL, op2def = NULL;
1028 gimple_stmt_iterator gsi;
1032 /* Create the addition statement. */
1033 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1034 op = make_ssa_name (tmpvar, sum);
1035 gimple_assign_set_lhs (sum, op);
1037 /* Find an insertion place and insert. */
1038 if (TREE_CODE (op1) == SSA_NAME)
1039 op1def = SSA_NAME_DEF_STMT (op1);
1040 if (TREE_CODE (op2) == SSA_NAME)
1041 op2def = SSA_NAME_DEF_STMT (op2);
1042 if ((!op1def || gimple_nop_p (op1def))
1043 && (!op2def || gimple_nop_p (op2def)))
1045 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1046 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1048 else if ((!op1def || gimple_nop_p (op1def))
1049 || (op2def && !gimple_nop_p (op2def)
1050 && stmt_dominates_stmt_p (op1def, op2def)))
1052 if (gimple_code (op2def) == GIMPLE_PHI)
1054 gsi = gsi_after_labels (gimple_bb (op2def));
1055 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1059 if (!stmt_ends_bb_p (op2def))
1061 gsi = gsi_for_stmt (op2def);
1062 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1069 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1070 if (e->flags & EDGE_FALLTHRU)
1071 gsi_insert_on_edge_immediate (e, sum);
1077 if (gimple_code (op1def) == GIMPLE_PHI)
1079 gsi = gsi_after_labels (gimple_bb (op1def));
1080 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1084 if (!stmt_ends_bb_p (op1def))
1086 gsi = gsi_for_stmt (op1def);
1087 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1094 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1095 if (e->flags & EDGE_FALLTHRU)
1096 gsi_insert_on_edge_immediate (e, sum);
1105 /* Perform un-distribution of divisions and multiplications.
1106 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1107 to (A + B) / X for real X.
1109 The algorithm is organized as follows.
1111 - First we walk the addition chain *OPS looking for summands that
1112 are defined by a multiplication or a real division. This results
1113 in the candidates bitmap with relevant indices into *OPS.
1115 - Second we build the chains of multiplications or divisions for
1116 these candidates, counting the number of occurences of (operand, code)
1117 pairs in all of the candidates chains.
1119 - Third we sort the (operand, code) pairs by number of occurence and
1120 process them starting with the pair with the most uses.
1122 * For each such pair we walk the candidates again to build a
1123 second candidate bitmap noting all multiplication/division chains
1124 that have at least one occurence of (operand, code).
1126 * We build an alternate addition chain only covering these
1127 candidates with one (operand, code) operation removed from their
1128 multiplication/division chain.
1130 * The first candidate gets replaced by the alternate addition chain
1131 multiplied/divided by the operand.
1133 * All candidate chains get disabled for further processing and
1134 processing of (operand, code) pairs continues.
1136 The alternate addition chains built are re-processed by the main
1137 reassociation algorithm which allows optimizing a * x * y + b * y * x
1138 to (a + b ) * x * y in one invocation of the reassociation pass. */
1141 undistribute_ops_list (enum tree_code opcode,
1142 VEC (operand_entry_t, heap) **ops, struct loop *loop)
1144 unsigned int length = VEC_length (operand_entry_t, *ops);
1145 operand_entry_t oe1;
1147 sbitmap candidates, candidates2;
1148 unsigned nr_candidates, nr_candidates2;
1149 sbitmap_iterator sbi0;
1150 VEC (operand_entry_t, heap) **subops;
1152 bool changed = false;
1153 int next_oecount_id = 0;
1156 || opcode != PLUS_EXPR)
1159 /* Build a list of candidates to process. */
1160 candidates = sbitmap_alloc (length);
1161 sbitmap_zero (candidates);
1163 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1165 enum tree_code dcode;
1168 if (TREE_CODE (oe1->op) != SSA_NAME)
1170 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1171 if (!is_gimple_assign (oe1def))
1173 dcode = gimple_assign_rhs_code (oe1def);
1174 if ((dcode != MULT_EXPR
1175 && dcode != RDIV_EXPR)
1176 || !is_reassociable_op (oe1def, dcode, loop))
1179 SET_BIT (candidates, i);
1183 if (nr_candidates < 2)
1185 sbitmap_free (candidates);
1189 if (dump_file && (dump_flags & TDF_DETAILS))
1191 fprintf (dump_file, "searching for un-distribute opportunities ");
1192 print_generic_expr (dump_file,
1193 VEC_index (operand_entry_t, *ops,
1194 sbitmap_first_set_bit (candidates))->op, 0);
1195 fprintf (dump_file, " %d\n", nr_candidates);
1198 /* Build linearized sub-operand lists and the counting table. */
1200 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1201 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1202 VEC_length (operand_entry_t, *ops));
1203 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1206 enum tree_code oecode;
1209 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1210 oecode = gimple_assign_rhs_code (oedef);
1211 linearize_expr_tree (&subops[i], oedef,
1212 associative_tree_code (oecode), false);
1214 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1221 c.id = next_oecount_id++;
1223 VEC_safe_push (oecount, heap, cvec, &c);
1224 idx = VEC_length (oecount, cvec) + 41;
1225 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1228 *slot = (void *)idx;
1232 VEC_pop (oecount, cvec);
1233 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1237 htab_delete (ctable);
1239 /* Sort the counting table. */
1240 VEC_qsort (oecount, cvec, oecount_cmp);
1242 if (dump_file && (dump_flags & TDF_DETAILS))
1245 fprintf (dump_file, "Candidates:\n");
1246 FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1248 fprintf (dump_file, " %u %s: ", c->cnt,
1249 c->oecode == MULT_EXPR
1250 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1251 print_generic_expr (dump_file, c->op, 0);
1252 fprintf (dump_file, "\n");
1256 /* Process the (operand, code) pairs in order of most occurence. */
1257 candidates2 = sbitmap_alloc (length);
1258 while (!VEC_empty (oecount, cvec))
1260 oecount *c = VEC_last (oecount, cvec);
1264 /* Now collect the operands in the outer chain that contain
1265 the common operand in their inner chain. */
1266 sbitmap_zero (candidates2);
1268 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1271 enum tree_code oecode;
1273 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1275 /* If we undistributed in this chain already this may be
1277 if (TREE_CODE (op) != SSA_NAME)
1280 oedef = SSA_NAME_DEF_STMT (op);
1281 oecode = gimple_assign_rhs_code (oedef);
1282 if (oecode != c->oecode)
1285 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1287 if (oe1->op == c->op)
1289 SET_BIT (candidates2, i);
1296 if (nr_candidates2 >= 2)
1298 operand_entry_t oe1, oe2;
1301 int first = sbitmap_first_set_bit (candidates2);
1303 /* Build the new addition chain. */
1304 oe1 = VEC_index (operand_entry_t, *ops, first);
1305 if (dump_file && (dump_flags & TDF_DETAILS))
1307 fprintf (dump_file, "Building (");
1308 print_generic_expr (dump_file, oe1->op, 0);
1310 tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1311 add_referenced_var (tmpvar);
1312 zero_one_operation (&oe1->op, c->oecode, c->op);
1313 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1316 oe2 = VEC_index (operand_entry_t, *ops, i);
1317 if (dump_file && (dump_flags & TDF_DETAILS))
1319 fprintf (dump_file, " + ");
1320 print_generic_expr (dump_file, oe2->op, 0);
1322 zero_one_operation (&oe2->op, c->oecode, c->op);
1323 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1324 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1326 oe1->op = gimple_get_lhs (sum);
1329 /* Apply the multiplication/division. */
1330 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1331 if (dump_file && (dump_flags & TDF_DETAILS))
1333 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1334 print_generic_expr (dump_file, c->op, 0);
1335 fprintf (dump_file, "\n");
1338 /* Record it in the addition chain and disable further
1339 undistribution with this op. */
1340 oe1->op = gimple_assign_lhs (prod);
1341 oe1->rank = get_rank (oe1->op);
1342 VEC_free (operand_entry_t, heap, subops[first]);
1347 VEC_pop (oecount, cvec);
1350 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1351 VEC_free (operand_entry_t, heap, subops[i]);
1353 VEC_free (oecount, heap, cvec);
1354 sbitmap_free (candidates);
1355 sbitmap_free (candidates2);
1360 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1361 expression, examine the other OPS to see if any of them are comparisons
1362 of the same values, which we may be able to combine or eliminate.
1363 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1366 eliminate_redundant_comparison (enum tree_code opcode,
1367 VEC (operand_entry_t, heap) **ops,
1368 unsigned int currindex,
1369 operand_entry_t curr)
1372 enum tree_code lcode, rcode;
1377 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1380 /* Check that CURR is a comparison. */
1381 if (TREE_CODE (curr->op) != SSA_NAME)
1383 def1 = SSA_NAME_DEF_STMT (curr->op);
1384 if (!is_gimple_assign (def1))
1386 lcode = gimple_assign_rhs_code (def1);
1387 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1389 op1 = gimple_assign_rhs1 (def1);
1390 op2 = gimple_assign_rhs2 (def1);
1392 /* Now look for a similar comparison in the remaining OPS. */
1393 for (i = currindex + 1;
1394 VEC_iterate (operand_entry_t, *ops, i, oe);
1399 if (TREE_CODE (oe->op) != SSA_NAME)
1401 def2 = SSA_NAME_DEF_STMT (oe->op);
1402 if (!is_gimple_assign (def2))
1404 rcode = gimple_assign_rhs_code (def2);
1405 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1408 /* If we got here, we have a match. See if we can combine the
1410 if (opcode == BIT_IOR_EXPR)
1411 t = maybe_fold_or_comparisons (lcode, op1, op2,
1412 rcode, gimple_assign_rhs1 (def2),
1413 gimple_assign_rhs2 (def2));
1415 t = maybe_fold_and_comparisons (lcode, op1, op2,
1416 rcode, gimple_assign_rhs1 (def2),
1417 gimple_assign_rhs2 (def2));
1421 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1422 always give us a boolean_type_node value back. If the original
1423 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1424 we need to convert. */
1425 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1426 t = fold_convert (TREE_TYPE (curr->op), t);
1428 if (TREE_CODE (t) != INTEGER_CST
1429 && !operand_equal_p (t, curr->op, 0))
1431 enum tree_code subcode;
1432 tree newop1, newop2;
1433 if (!COMPARISON_CLASS_P (t))
1435 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1436 STRIP_USELESS_TYPE_CONVERSION (newop1);
1437 STRIP_USELESS_TYPE_CONVERSION (newop2);
1438 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1442 if (dump_file && (dump_flags & TDF_DETAILS))
1444 fprintf (dump_file, "Equivalence: ");
1445 print_generic_expr (dump_file, curr->op, 0);
1446 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1447 print_generic_expr (dump_file, oe->op, 0);
1448 fprintf (dump_file, " -> ");
1449 print_generic_expr (dump_file, t, 0);
1450 fprintf (dump_file, "\n");
1453 /* Now we can delete oe, as it has been subsumed by the new combined
1455 VEC_ordered_remove (operand_entry_t, *ops, i);
1456 reassociate_stats.ops_eliminated ++;
1458 /* If t is the same as curr->op, we're done. Otherwise we must
1459 replace curr->op with t. Special case is if we got a constant
1460 back, in which case we add it to the end instead of in place of
1461 the current entry. */
1462 if (TREE_CODE (t) == INTEGER_CST)
1464 VEC_ordered_remove (operand_entry_t, *ops, currindex);
1465 add_to_ops_vec (ops, t);
1467 else if (!operand_equal_p (t, curr->op, 0))
1471 enum tree_code subcode;
1474 gcc_assert (COMPARISON_CLASS_P (t));
1475 tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1476 add_referenced_var (tmpvar);
1477 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1478 STRIP_USELESS_TYPE_CONVERSION (newop1);
1479 STRIP_USELESS_TYPE_CONVERSION (newop2);
1480 gcc_checking_assert (is_gimple_val (newop1)
1481 && is_gimple_val (newop2));
1482 sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1483 curr->op = gimple_get_lhs (sum);
1491 /* Perform various identities and other optimizations on the list of
1492 operand entries, stored in OPS. The tree code for the binary
1493 operation between all the operands is OPCODE. */
1496 optimize_ops_list (enum tree_code opcode,
1497 VEC (operand_entry_t, heap) **ops)
1499 unsigned int length = VEC_length (operand_entry_t, *ops);
1502 operand_entry_t oelast = NULL;
1503 bool iterate = false;
1508 oelast = VEC_last (operand_entry_t, *ops);
1510 /* If the last two are constants, pop the constants off, merge them
1511 and try the next two. */
1512 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1514 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1516 if (oelm1->rank == 0
1517 && is_gimple_min_invariant (oelm1->op)
1518 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1519 TREE_TYPE (oelast->op)))
1521 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1522 oelm1->op, oelast->op);
1524 if (folded && is_gimple_min_invariant (folded))
1526 if (dump_file && (dump_flags & TDF_DETAILS))
1527 fprintf (dump_file, "Merging constants\n");
1529 VEC_pop (operand_entry_t, *ops);
1530 VEC_pop (operand_entry_t, *ops);
1532 add_to_ops_vec (ops, folded);
1533 reassociate_stats.constants_eliminated++;
1535 optimize_ops_list (opcode, ops);
1541 eliminate_using_constants (opcode, ops);
1544 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1548 if (eliminate_not_pairs (opcode, ops, i, oe))
1550 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1551 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1552 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1564 length = VEC_length (operand_entry_t, *ops);
1565 oelast = VEC_last (operand_entry_t, *ops);
1568 optimize_ops_list (opcode, ops);
1571 /* Return true if OPERAND is defined by a PHI node which uses the LHS
1572 of STMT in it's operands. This is also known as a "destructive
1573 update" operation. */
1576 is_phi_for_stmt (gimple stmt, tree operand)
1580 use_operand_p arg_p;
1583 if (TREE_CODE (operand) != SSA_NAME)
1586 lhs = gimple_assign_lhs (stmt);
1588 def_stmt = SSA_NAME_DEF_STMT (operand);
1589 if (gimple_code (def_stmt) != GIMPLE_PHI)
1592 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
1593 if (lhs == USE_FROM_PTR (arg_p))
1598 /* Remove def stmt of VAR if VAR has zero uses and recurse
1599 on rhs1 operand if so. */
1602 remove_visited_stmt_chain (tree var)
1605 gimple_stmt_iterator gsi;
1609 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
1611 stmt = SSA_NAME_DEF_STMT (var);
1612 if (!is_gimple_assign (stmt)
1613 || !gimple_visited_p (stmt))
1615 var = gimple_assign_rhs1 (stmt);
1616 gsi = gsi_for_stmt (stmt);
1617 gsi_remove (&gsi, true);
1618 release_defs (stmt);
1622 /* This function checks three consequtive operands in
1623 passed operands vector OPS starting from OPINDEX and
1624 swaps two operands if it is profitable for binary operation
1625 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
1627 We pair ops with the same rank if possible.
1629 The alternative we try is to see if STMT is a destructive
1630 update style statement, which is like:
1633 In that case, we want to use the destructive update form to
1634 expose the possible vectorizer sum reduction opportunity.
1635 In that case, the third operand will be the phi node. This
1636 check is not performed if STMT is null.
1638 We could, of course, try to be better as noted above, and do a
1639 lot of work to try to find these opportunities in >3 operand
1640 cases, but it is unlikely to be worth it. */
1643 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
1644 unsigned int opindex, gimple stmt)
1646 operand_entry_t oe1, oe2, oe3;
1648 oe1 = VEC_index (operand_entry_t, ops, opindex);
1649 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1650 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
1652 if ((oe1->rank == oe2->rank
1653 && oe2->rank != oe3->rank)
1654 || (stmt && is_phi_for_stmt (stmt, oe3->op)
1655 && !is_phi_for_stmt (stmt, oe1->op)
1656 && !is_phi_for_stmt (stmt, oe2->op)))
1658 struct operand_entry temp = *oe3;
1660 oe3->rank = oe1->rank;
1662 oe1->rank= temp.rank;
1664 else if ((oe1->rank == oe3->rank
1665 && oe2->rank != oe3->rank)
1666 || (stmt && is_phi_for_stmt (stmt, oe2->op)
1667 && !is_phi_for_stmt (stmt, oe1->op)
1668 && !is_phi_for_stmt (stmt, oe3->op)))
1670 struct operand_entry temp = *oe2;
1672 oe2->rank = oe1->rank;
1674 oe1->rank= temp.rank;
1678 /* Recursively rewrite our linearized statements so that the operators
1679 match those in OPS[OPINDEX], putting the computation in rank
1683 rewrite_expr_tree (gimple stmt, unsigned int opindex,
1684 VEC(operand_entry_t, heap) * ops, bool moved)
1686 tree rhs1 = gimple_assign_rhs1 (stmt);
1687 tree rhs2 = gimple_assign_rhs2 (stmt);
1690 /* If we have three operands left, then we want to make sure the ones
1691 that get the double binary op are chosen wisely. */
1692 if (opindex + 3 == VEC_length (operand_entry_t, ops))
1693 swap_ops_for_binary_stmt (ops, opindex, stmt);
1695 /* The final recursion case for this function is that you have
1696 exactly two operations left.
1697 If we had one exactly one op in the entire list to start with, we
1698 would have never called this function, and the tail recursion
1699 rewrites them one at a time. */
1700 if (opindex + 2 == VEC_length (operand_entry_t, ops))
1702 operand_entry_t oe1, oe2;
1704 oe1 = VEC_index (operand_entry_t, ops, opindex);
1705 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1707 if (rhs1 != oe1->op || rhs2 != oe2->op)
1709 if (dump_file && (dump_flags & TDF_DETAILS))
1711 fprintf (dump_file, "Transforming ");
1712 print_gimple_stmt (dump_file, stmt, 0, 0);
1715 gimple_assign_set_rhs1 (stmt, oe1->op);
1716 gimple_assign_set_rhs2 (stmt, oe2->op);
1718 if (rhs1 != oe1->op && rhs1 != oe2->op)
1719 remove_visited_stmt_chain (rhs1);
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, " into ");
1724 print_gimple_stmt (dump_file, stmt, 0, 0);
1731 /* If we hit here, we should have 3 or more ops left. */
1732 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
1734 /* Rewrite the next operator. */
1735 oe = VEC_index (operand_entry_t, ops, opindex);
1741 gimple_stmt_iterator gsinow, gsirhs1;
1742 gimple stmt1 = stmt, stmt2;
1745 gsinow = gsi_for_stmt (stmt);
1746 count = VEC_length (operand_entry_t, ops) - opindex - 2;
1747 while (count-- != 0)
1749 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
1750 gsirhs1 = gsi_for_stmt (stmt2);
1751 gsi_move_before (&gsirhs1, &gsinow);
1758 if (dump_file && (dump_flags & TDF_DETAILS))
1760 fprintf (dump_file, "Transforming ");
1761 print_gimple_stmt (dump_file, stmt, 0, 0);
1764 gimple_assign_set_rhs2 (stmt, oe->op);
1767 if (dump_file && (dump_flags & TDF_DETAILS))
1769 fprintf (dump_file, " into ");
1770 print_gimple_stmt (dump_file, stmt, 0, 0);
1773 /* Recurse on the LHS of the binary operator, which is guaranteed to
1774 be the non-leaf side. */
1775 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
1778 /* Find out how many cycles we need to compute statements chain.
1779 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
1780 maximum number of independent statements we may execute per cycle. */
1783 get_required_cycles (int ops_num, int cpu_width)
1789 /* While we have more than 2 * cpu_width operands
1790 we may reduce number of operands by cpu_width
1792 res = ops_num / (2 * cpu_width);
1794 /* Remained operands count may be reduced twice per cycle
1795 until we have only one operand. */
1796 rest = (unsigned)(ops_num - res * cpu_width);
1797 elog = exact_log2 (rest);
1801 res += floor_log2 (rest) + 1;
1806 /* Returns an optimal number of registers to use for computation of
1807 given statements. */
1810 get_reassociation_width (int ops_num, enum tree_code opc,
1811 enum machine_mode mode)
1813 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
1818 if (param_width > 0)
1819 width = param_width;
1821 width = targetm.sched.reassociation_width (opc, mode);
1826 /* Get the minimal time required for sequence computation. */
1827 cycles_best = get_required_cycles (ops_num, width);
1829 /* Check if we may use less width and still compute sequence for
1830 the same time. It will allow us to reduce registers usage.
1831 get_required_cycles is monotonically increasing with lower width
1832 so we can perform a binary search for the minimal width that still
1833 results in the optimal cycle count. */
1835 while (width > width_min)
1837 int width_mid = (width + width_min) / 2;
1839 if (get_required_cycles (ops_num, width_mid) == cycles_best)
1841 else if (width_min < width_mid)
1842 width_min = width_mid;
1850 /* Recursively rewrite our linearized statements so that the operators
1851 match those in OPS[OPINDEX], putting the computation in rank
1852 order and trying to allow operations to be executed in
1856 rewrite_expr_tree_parallel (gimple stmt, int width,
1857 VEC(operand_entry_t, heap) * ops)
1859 enum tree_code opcode = gimple_assign_rhs_code (stmt);
1860 int op_num = VEC_length (operand_entry_t, ops);
1861 int stmt_num = op_num - 1;
1862 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
1863 int op_index = op_num - 1;
1865 int ready_stmts_end = 0;
1867 tree last_rhs1 = gimple_assign_rhs1 (stmt);
1870 /* We start expression rewriting from the top statements.
1871 So, in this loop we create a full list of statements
1872 we will work with. */
1873 stmts[stmt_num - 1] = stmt;
1874 for (i = stmt_num - 2; i >= 0; i--)
1875 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
1877 lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
1878 add_referenced_var (lhs_var);
1880 for (i = 0; i < stmt_num; i++)
1884 /* Determine whether we should use results of
1885 already handled statements or not. */
1886 if (ready_stmts_end == 0
1887 && (i - stmt_index >= width || op_index < 1))
1888 ready_stmts_end = i;
1890 /* Now we choose operands for the next statement. Non zero
1891 value in ready_stmts_end means here that we should use
1892 the result of already generated statements as new operand. */
1893 if (ready_stmts_end > 0)
1895 op1 = gimple_assign_lhs (stmts[stmt_index++]);
1896 if (ready_stmts_end > stmt_index)
1897 op2 = gimple_assign_lhs (stmts[stmt_index++]);
1898 else if (op_index >= 0)
1899 op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
1902 gcc_assert (stmt_index < i);
1903 op2 = gimple_assign_lhs (stmts[stmt_index++]);
1906 if (stmt_index >= ready_stmts_end)
1907 ready_stmts_end = 0;
1912 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
1913 op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
1914 op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
1917 /* If we emit the last statement then we should put
1918 operands into the last statement. It will also
1920 if (op_index < 0 && stmt_index == i)
1923 if (dump_file && (dump_flags & TDF_DETAILS))
1925 fprintf (dump_file, "Transforming ");
1926 print_gimple_stmt (dump_file, stmts[i], 0, 0);
1929 /* We keep original statement only for the last one. All
1930 others are recreated. */
1931 if (i == stmt_num - 1)
1933 gimple_assign_set_rhs1 (stmts[i], op1);
1934 gimple_assign_set_rhs2 (stmts[i], op2);
1935 update_stmt (stmts[i]);
1938 stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
1940 if (dump_file && (dump_flags & TDF_DETAILS))
1942 fprintf (dump_file, " into ");
1943 print_gimple_stmt (dump_file, stmts[i], 0, 0);
1947 remove_visited_stmt_chain (last_rhs1);
1950 /* Transform STMT, which is really (A +B) + (C + D) into the left
1951 linear form, ((A+B)+C)+D.
1952 Recurse on D if necessary. */
1955 linearize_expr (gimple stmt)
1957 gimple_stmt_iterator gsinow, gsirhs;
1958 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1959 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1960 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1961 gimple newbinrhs = NULL;
1962 struct loop *loop = loop_containing_stmt (stmt);
1964 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
1965 && is_reassociable_op (binrhs, rhscode, loop));
1967 gsinow = gsi_for_stmt (stmt);
1968 gsirhs = gsi_for_stmt (binrhs);
1969 gsi_move_before (&gsirhs, &gsinow);
1971 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
1972 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
1973 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
1975 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1976 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1978 if (dump_file && (dump_flags & TDF_DETAILS))
1980 fprintf (dump_file, "Linearized: ");
1981 print_gimple_stmt (dump_file, stmt, 0, 0);
1984 reassociate_stats.linearized++;
1985 update_stmt (binrhs);
1986 update_stmt (binlhs);
1989 gimple_set_visited (stmt, true);
1990 gimple_set_visited (binlhs, true);
1991 gimple_set_visited (binrhs, true);
1993 /* Tail recurse on the new rhs if it still needs reassociation. */
1994 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
1995 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
1996 want to change the algorithm while converting to tuples. */
1997 linearize_expr (stmt);
2000 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2001 it. Otherwise, return NULL. */
2004 get_single_immediate_use (tree lhs)
2006 use_operand_p immuse;
2009 if (TREE_CODE (lhs) == SSA_NAME
2010 && single_imm_use (lhs, &immuse, &immusestmt)
2011 && is_gimple_assign (immusestmt))
2017 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2018 representing the negated value. Insertions of any necessary
2019 instructions go before GSI.
2020 This function is recursive in that, if you hand it "a_5" as the
2021 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2022 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
2025 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2027 gimple negatedefstmt= NULL;
2028 tree resultofnegate;
2030 /* If we are trying to negate a name, defined by an add, negate the
2031 add operands instead. */
2032 if (TREE_CODE (tonegate) == SSA_NAME)
2033 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2034 if (TREE_CODE (tonegate) == SSA_NAME
2035 && is_gimple_assign (negatedefstmt)
2036 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2037 && has_single_use (gimple_assign_lhs (negatedefstmt))
2038 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2040 gimple_stmt_iterator gsi;
2041 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2042 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2044 gsi = gsi_for_stmt (negatedefstmt);
2045 rhs1 = negate_value (rhs1, &gsi);
2046 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2048 gsi = gsi_for_stmt (negatedefstmt);
2049 rhs2 = negate_value (rhs2, &gsi);
2050 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2052 update_stmt (negatedefstmt);
2053 return gimple_assign_lhs (negatedefstmt);
2056 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2057 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2058 NULL_TREE, true, GSI_SAME_STMT);
2059 return resultofnegate;
2062 /* Return true if we should break up the subtract in STMT into an add
2063 with negate. This is true when we the subtract operands are really
2064 adds, or the subtract itself is used in an add expression. In
2065 either case, breaking up the subtract into an add with negate
2066 exposes the adds to reassociation. */
2069 should_break_up_subtract (gimple stmt)
2071 tree lhs = gimple_assign_lhs (stmt);
2072 tree binlhs = gimple_assign_rhs1 (stmt);
2073 tree binrhs = gimple_assign_rhs2 (stmt);
2075 struct loop *loop = loop_containing_stmt (stmt);
2077 if (TREE_CODE (binlhs) == SSA_NAME
2078 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2081 if (TREE_CODE (binrhs) == SSA_NAME
2082 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2085 if (TREE_CODE (lhs) == SSA_NAME
2086 && (immusestmt = get_single_immediate_use (lhs))
2087 && is_gimple_assign (immusestmt)
2088 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2089 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2094 /* Transform STMT from A - B into A + -B. */
2097 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2099 tree rhs1 = gimple_assign_rhs1 (stmt);
2100 tree rhs2 = gimple_assign_rhs2 (stmt);
2102 if (dump_file && (dump_flags & TDF_DETAILS))
2104 fprintf (dump_file, "Breaking up subtract ");
2105 print_gimple_stmt (dump_file, stmt, 0, 0);
2108 rhs2 = negate_value (rhs2, gsip);
2109 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2113 /* Recursively linearize a binary expression that is the RHS of STMT.
2114 Place the operands of the expression tree in the vector named OPS. */
2117 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2118 bool is_associative, bool set_visited)
2120 tree binlhs = gimple_assign_rhs1 (stmt);
2121 tree binrhs = gimple_assign_rhs2 (stmt);
2122 gimple binlhsdef, binrhsdef;
2123 bool binlhsisreassoc = false;
2124 bool binrhsisreassoc = false;
2125 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2126 struct loop *loop = loop_containing_stmt (stmt);
2129 gimple_set_visited (stmt, true);
2131 if (TREE_CODE (binlhs) == SSA_NAME)
2133 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2134 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2135 && !stmt_could_throw_p (binlhsdef));
2138 if (TREE_CODE (binrhs) == SSA_NAME)
2140 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2141 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2142 && !stmt_could_throw_p (binrhsdef));
2145 /* If the LHS is not reassociable, but the RHS is, we need to swap
2146 them. If neither is reassociable, there is nothing we can do, so
2147 just put them in the ops vector. If the LHS is reassociable,
2148 linearize it. If both are reassociable, then linearize the RHS
2151 if (!binlhsisreassoc)
2155 /* If this is not a associative operation like division, give up. */
2156 if (!is_associative)
2158 add_to_ops_vec (ops, binrhs);
2162 if (!binrhsisreassoc)
2164 add_to_ops_vec (ops, binrhs);
2165 add_to_ops_vec (ops, binlhs);
2169 if (dump_file && (dump_flags & TDF_DETAILS))
2171 fprintf (dump_file, "swapping operands of ");
2172 print_gimple_stmt (dump_file, stmt, 0, 0);
2175 swap_tree_operands (stmt,
2176 gimple_assign_rhs1_ptr (stmt),
2177 gimple_assign_rhs2_ptr (stmt));
2180 if (dump_file && (dump_flags & TDF_DETAILS))
2182 fprintf (dump_file, " is now ");
2183 print_gimple_stmt (dump_file, stmt, 0, 0);
2186 /* We want to make it so the lhs is always the reassociative op,
2192 else if (binrhsisreassoc)
2194 linearize_expr (stmt);
2195 binlhs = gimple_assign_rhs1 (stmt);
2196 binrhs = gimple_assign_rhs2 (stmt);
2199 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2200 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2202 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2203 is_associative, set_visited);
2204 add_to_ops_vec (ops, binrhs);
2207 /* Repropagate the negates back into subtracts, since no other pass
2208 currently does it. */
2211 repropagate_negates (void)
2216 FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2218 gimple user = get_single_immediate_use (negate);
2220 if (!user || !is_gimple_assign (user))
2223 /* The negate operand can be either operand of a PLUS_EXPR
2224 (it can be the LHS if the RHS is a constant for example).
2226 Force the negate operand to the RHS of the PLUS_EXPR, then
2227 transform the PLUS_EXPR into a MINUS_EXPR. */
2228 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2230 /* If the negated operand appears on the LHS of the
2231 PLUS_EXPR, exchange the operands of the PLUS_EXPR
2232 to force the negated operand to the RHS of the PLUS_EXPR. */
2233 if (gimple_assign_rhs1 (user) == negate)
2235 swap_tree_operands (user,
2236 gimple_assign_rhs1_ptr (user),
2237 gimple_assign_rhs2_ptr (user));
2240 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2241 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
2242 if (gimple_assign_rhs2 (user) == negate)
2244 tree rhs1 = gimple_assign_rhs1 (user);
2245 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2246 gimple_stmt_iterator gsi = gsi_for_stmt (user);
2247 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2251 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2253 if (gimple_assign_rhs1 (user) == negate)
2258 which we transform into
2261 This pushes down the negate which we possibly can merge
2262 into some other operation, hence insert it into the
2263 plus_negates vector. */
2264 gimple feed = SSA_NAME_DEF_STMT (negate);
2265 tree a = gimple_assign_rhs1 (feed);
2266 tree rhs2 = gimple_assign_rhs2 (user);
2267 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2268 gimple_replace_lhs (feed, negate);
2269 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2270 update_stmt (gsi_stmt (gsi));
2271 gsi2 = gsi_for_stmt (user);
2272 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2273 update_stmt (gsi_stmt (gsi2));
2274 gsi_move_before (&gsi, &gsi2);
2275 VEC_safe_push (tree, heap, plus_negates,
2276 gimple_assign_lhs (gsi_stmt (gsi2)));
2280 /* Transform "x = -a; y = b - x" into "y = b + a", getting
2281 rid of one operation. */
2282 gimple feed = SSA_NAME_DEF_STMT (negate);
2283 tree a = gimple_assign_rhs1 (feed);
2284 tree rhs1 = gimple_assign_rhs1 (user);
2285 gimple_stmt_iterator gsi = gsi_for_stmt (user);
2286 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2287 update_stmt (gsi_stmt (gsi));
2293 /* Returns true if OP is of a type for which we can do reassociation.
2294 That is for integral or non-saturating fixed-point types, and for
2295 floating point type when associative-math is enabled. */
2298 can_reassociate_p (tree op)
2300 tree type = TREE_TYPE (op);
2301 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2302 || NON_SAT_FIXED_POINT_TYPE_P (type)
2303 || (flag_associative_math && FLOAT_TYPE_P (type)))
2308 /* Break up subtract operations in block BB.
2310 We do this top down because we don't know whether the subtract is
2311 part of a possible chain of reassociation except at the top.
2320 we want to break up k = t - q, but we won't until we've transformed q
2321 = b - r, which won't be broken up until we transform b = c - d.
2323 En passant, clear the GIMPLE visited flag on every statement. */
2326 break_up_subtract_bb (basic_block bb)
2328 gimple_stmt_iterator gsi;
2331 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2333 gimple stmt = gsi_stmt (gsi);
2334 gimple_set_visited (stmt, false);
2336 if (!is_gimple_assign (stmt)
2337 || !can_reassociate_p (gimple_assign_lhs (stmt)))
2340 /* Look for simple gimple subtract operations. */
2341 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
2343 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
2344 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
2347 /* Check for a subtract used only in an addition. If this
2348 is the case, transform it into add of a negate for better
2349 reassociation. IE transform C = A-B into C = A + -B if C
2350 is only used in an addition. */
2351 if (should_break_up_subtract (stmt))
2352 break_up_subtract (stmt, &gsi);
2354 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
2355 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
2356 VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
2358 for (son = first_dom_son (CDI_DOMINATORS, bb);
2360 son = next_dom_son (CDI_DOMINATORS, son))
2361 break_up_subtract_bb (son);
2364 /* Reassociate expressions in basic block BB and its post-dominator as
2368 reassociate_bb (basic_block bb)
2370 gimple_stmt_iterator gsi;
2373 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2375 gimple stmt = gsi_stmt (gsi);
2377 if (is_gimple_assign (stmt)
2378 && !stmt_could_throw_p (stmt))
2380 tree lhs, rhs1, rhs2;
2381 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2383 /* If this is not a gimple binary expression, there is
2384 nothing for us to do with it. */
2385 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
2388 /* If this was part of an already processed statement,
2389 we don't need to touch it again. */
2390 if (gimple_visited_p (stmt))
2392 /* This statement might have become dead because of previous
2394 if (has_zero_uses (gimple_get_lhs (stmt)))
2396 gsi_remove (&gsi, true);
2397 release_defs (stmt);
2398 /* We might end up removing the last stmt above which
2399 places the iterator to the end of the sequence.
2400 Reset it to the last stmt in this case which might
2401 be the end of the sequence as well if we removed
2402 the last statement of the sequence. In which case
2403 we need to bail out. */
2404 if (gsi_end_p (gsi))
2406 gsi = gsi_last_bb (bb);
2407 if (gsi_end_p (gsi))
2414 lhs = gimple_assign_lhs (stmt);
2415 rhs1 = gimple_assign_rhs1 (stmt);
2416 rhs2 = gimple_assign_rhs2 (stmt);
2418 /* For non-bit or min/max operations we can't associate
2419 all types. Verify that here. */
2420 if (rhs_code != BIT_IOR_EXPR
2421 && rhs_code != BIT_AND_EXPR
2422 && rhs_code != BIT_XOR_EXPR
2423 && rhs_code != MIN_EXPR
2424 && rhs_code != MAX_EXPR
2425 && (!can_reassociate_p (lhs)
2426 || !can_reassociate_p (rhs1)
2427 || !can_reassociate_p (rhs2)))
2430 if (associative_tree_code (rhs_code))
2432 VEC(operand_entry_t, heap) *ops = NULL;
2434 /* There may be no immediate uses left by the time we
2435 get here because we may have eliminated them all. */
2436 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
2439 gimple_set_visited (stmt, true);
2440 linearize_expr_tree (&ops, stmt, true, true);
2441 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2442 optimize_ops_list (rhs_code, &ops);
2443 if (undistribute_ops_list (rhs_code, &ops,
2444 loop_containing_stmt (stmt)))
2446 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2447 optimize_ops_list (rhs_code, &ops);
2450 if (VEC_length (operand_entry_t, ops) == 1)
2452 if (dump_file && (dump_flags & TDF_DETAILS))
2454 fprintf (dump_file, "Transforming ");
2455 print_gimple_stmt (dump_file, stmt, 0, 0);
2458 rhs1 = gimple_assign_rhs1 (stmt);
2459 gimple_assign_set_rhs_from_tree (&gsi,
2460 VEC_last (operand_entry_t,
2463 remove_visited_stmt_chain (rhs1);
2465 if (dump_file && (dump_flags & TDF_DETAILS))
2467 fprintf (dump_file, " into ");
2468 print_gimple_stmt (dump_file, stmt, 0, 0);
2473 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2474 int ops_num = VEC_length (operand_entry_t, ops);
2475 int width = get_reassociation_width (ops_num, rhs_code, mode);
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2479 "Width = %d was chosen for reassociation\n", width);
2482 && VEC_length (operand_entry_t, ops) > 3)
2483 rewrite_expr_tree_parallel (stmt, width, ops);
2485 rewrite_expr_tree (stmt, 0, ops, false);
2488 VEC_free (operand_entry_t, heap, ops);
2492 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
2494 son = next_dom_son (CDI_POST_DOMINATORS, son))
2495 reassociate_bb (son);
2498 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2499 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2501 /* Dump the operand entry vector OPS to FILE. */
2504 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2509 FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
2511 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2512 print_generic_expr (file, oe->op, 0);
2516 /* Dump the operand entry vector OPS to STDERR. */
2519 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2521 dump_ops_vector (stderr, ops);
2527 break_up_subtract_bb (ENTRY_BLOCK_PTR);
2528 reassociate_bb (EXIT_BLOCK_PTR);
2531 /* Initialize the reassociation pass. */
2539 int *bbs = XNEWVEC (int, last_basic_block + 1);
2541 /* Find the loops, so that we can prevent moving calculations in
2543 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2545 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
2547 operand_entry_pool = create_alloc_pool ("operand entry pool",
2548 sizeof (struct operand_entry), 30);
2549 next_operand_entry_id = 0;
2551 /* Reverse RPO (Reverse Post Order) will give us something where
2552 deeper loops come later. */
2553 pre_and_rev_post_order_compute (NULL, bbs, false);
2554 bb_rank = XCNEWVEC (long, last_basic_block + 1);
2555 operand_rank = pointer_map_create ();
2557 /* Give each argument a distinct rank. */
2558 for (param = DECL_ARGUMENTS (current_function_decl);
2560 param = DECL_CHAIN (param))
2562 if (gimple_default_def (cfun, param) != NULL)
2564 tree def = gimple_default_def (cfun, param);
2565 insert_operand_rank (def, ++rank);
2569 /* Give the chain decl a distinct rank. */
2570 if (cfun->static_chain_decl != NULL)
2572 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
2574 insert_operand_rank (def, ++rank);
2577 /* Set up rank for each BB */
2578 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
2579 bb_rank[bbs[i]] = ++rank << 16;
2582 calculate_dominance_info (CDI_POST_DOMINATORS);
2583 plus_negates = NULL;
2586 /* Cleanup after the reassociation pass, and print stats if
2592 statistics_counter_event (cfun, "Linearized",
2593 reassociate_stats.linearized);
2594 statistics_counter_event (cfun, "Constants eliminated",
2595 reassociate_stats.constants_eliminated);
2596 statistics_counter_event (cfun, "Ops eliminated",
2597 reassociate_stats.ops_eliminated);
2598 statistics_counter_event (cfun, "Statements rewritten",
2599 reassociate_stats.rewritten);
2601 pointer_map_destroy (operand_rank);
2602 free_alloc_pool (operand_entry_pool);
2604 VEC_free (tree, heap, plus_negates);
2605 free_dominance_info (CDI_POST_DOMINATORS);
2606 loop_optimizer_finalize ();
2609 /* Gate and execute functions for Reassociation. */
2612 execute_reassoc (void)
2617 repropagate_negates ();
2624 gate_tree_ssa_reassoc (void)
2626 return flag_tree_reassoc != 0;
2629 struct gimple_opt_pass pass_reassoc =
2633 "reassoc", /* name */
2634 gate_tree_ssa_reassoc, /* gate */
2635 execute_reassoc, /* execute */
2638 0, /* static_pass_number */
2639 TV_TREE_REASSOC, /* tv_id */
2640 PROP_cfg | PROP_ssa, /* properties_required */
2641 0, /* properties_provided */
2642 0, /* properties_destroyed */
2643 0, /* todo_flags_start */
2646 | TODO_ggc_collect /* todo_flags_finish */