1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-inline.h"
31 #include "tree-flow.h"
32 #include "tree-gimple.h"
33 #include "tree-dump.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
42 /* This is a simple global reassociation pass. It is, in part, based
43 on the LLVM pass of the same name (They do some things more/less
44 than we do, in different orders, etc).
46 It consists of five steps:
48 1. Breaking up subtract operations into addition + negate, where
49 it would promote the reassociation of adds.
51 2. Left linearization of the expression trees, so that (A+B)+(C+D)
52 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
53 During linearization, we place the operands of the binary
54 expressions into a vector of operand_entry_t
56 3. Optimization of the operand lists, eliminating things like a +
59 4. Rewrite the expression trees we linearized and optimized so
60 they are in proper rank order.
62 5. Repropagate negates, as nothing else will clean it up ATM.
64 A bit of theory on #4, since nobody seems to write anything down
65 about why it makes sense to do it the way they do it:
67 We could do this much nicer theoretically, but don't (for reasons
68 explained after how to do it theoretically nice :P).
70 In order to promote the most redundancy elimination, you want
71 binary expressions whose operands are the same rank (or
72 preferably, the same value) exposed to the redundancy eliminator,
73 for possible elimination.
75 So the way to do this if we really cared, is to build the new op
76 tree from the leaves to the roots, merging as you go, and putting the
77 new op on the end of the worklist, until you are left with one
78 thing on the worklist.
80 IE if you have to rewrite the following set of operands (listed with
81 rank in parentheses), with opcode PLUS_EXPR:
83 a (1), b (1), c (1), d (2), e (2)
86 We start with our merge worklist empty, and the ops list with all of
89 You want to first merge all leaves of the same rank, as much as
92 So first build a binary op of
94 mergetmp = a + b, and put "mergetmp" on the merge worklist.
96 Because there is no three operand form of PLUS_EXPR, c is not going to
97 be exposed to redundancy elimination as a rank 1 operand.
99 So you might as well throw it on the merge worklist (you could also
100 consider it to now be a rank two operand, and merge it with d and e,
101 but in this case, you then have evicted e from a binary op. So at
102 least in this situation, you can't win.)
104 Then build a binary op of d + e
107 and put mergetmp2 on the merge worklist.
109 so merge worklist = {mergetmp, c, mergetmp2}
111 Continue building binary ops of these operations until you have only
112 one operation left on the worklist.
117 mergetmp3 = mergetmp + c
119 worklist = {mergetmp2, mergetmp3}
121 mergetmp4 = mergetmp2 + mergetmp3
123 worklist = {mergetmp4}
125 because we have one operation left, we can now just set the original
126 statement equal to the result of that operation.
128 This will at least expose a + b and d + e to redundancy elimination
129 as binary operations.
131 For extra points, you can reuse the old statements to build the
132 mergetmps, since you shouldn't run out.
134 So why don't we do this?
136 Because it's expensive, and rarely will help. Most trees we are
137 reassociating have 3 or less ops. If they have 2 ops, they already
138 will be written into a nice single binary op. If you have 3 ops, a
139 single simple check suffices to tell you whether the first two are of the
140 same rank. If so, you know to order it
143 newstmt = mergetmp + op3
147 newstmt = mergetmp + op1
149 If all three are of the same rank, you can't expose them all in a
150 single binary operator anyway, so the above is *still* the best you
153 Thus, this is what we do. When we have three ops left, we check to see
154 what order to put them in, and call it a day. As a nod to vector sum
155 reduction, we check if any of ops are a really a phi node that is a
156 destructive update for the associating op, and keep the destructive
157 update together for vector sum reduction recognition. */
164 int constants_eliminated;
169 /* Operator, rank pair. */
170 typedef struct operand_entry
176 static alloc_pool operand_entry_pool;
179 /* Starting rank number for a given basic block, so that we can rank
180 operations using unmovable instructions in that BB based on the bb
182 static long *bb_rank;
184 /* Operand->rank hashtable. */
185 static struct pointer_map_t *operand_rank;
188 /* Look up the operand rank structure for expression E. */
191 find_operand_rank (tree e)
193 void **slot = pointer_map_contains (operand_rank, e);
194 return slot ? (long) *slot : -1;
197 /* Insert {E,RANK} into the operand rank hashtable. */
200 insert_operand_rank (tree e, long rank)
203 gcc_assert (rank > 0);
204 slot = pointer_map_insert (operand_rank, e);
206 *slot = (void *) rank;
209 /* Given an expression E, return the rank of the expression. */
214 /* Constants have rank 0. */
215 if (is_gimple_min_invariant (e))
218 /* SSA_NAME's have the rank of the expression they are the result
220 For globals and uninitialized values, the rank is 0.
221 For function arguments, use the pre-setup rank.
222 For PHI nodes, stores, asm statements, etc, we use the rank of
224 For simple operations, the rank is the maximum rank of any of
225 its operands, or the bb_rank, whichever is less.
226 I make no claims that this is optimal, however, it gives good
229 if (TREE_CODE (e) == SSA_NAME)
237 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
238 && SSA_NAME_IS_DEFAULT_DEF (e))
239 return find_operand_rank (e);
241 stmt = SSA_NAME_DEF_STMT (e);
242 if (bb_for_stmt (stmt) == NULL)
245 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT
246 || !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
247 return bb_rank[bb_for_stmt (stmt)->index];
249 /* If we already have a rank for this expression, use that. */
250 rank = find_operand_rank (e);
254 /* Otherwise, find the maximum rank for the operands, or the bb
255 rank, whichever is less. */
257 maxrank = bb_rank[bb_for_stmt(stmt)->index];
258 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
259 n = TREE_OPERAND_LENGTH (rhs);
261 rank = MAX (rank, get_rank (rhs));
266 && TREE_OPERAND (rhs, i)
269 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
272 if (dump_file && (dump_flags & TDF_DETAILS))
274 fprintf (dump_file, "Rank for ");
275 print_generic_expr (dump_file, e, 0);
276 fprintf (dump_file, " is %ld\n", (rank + 1));
279 /* Note the rank in the hashtable so we don't recompute it. */
280 insert_operand_rank (e, (rank + 1));
284 /* Globals, etc, are rank 0 */
288 DEF_VEC_P(operand_entry_t);
289 DEF_VEC_ALLOC_P(operand_entry_t, heap);
291 /* We want integer ones to end up last no matter what, since they are
292 the ones we can do the most with. */
293 #define INTEGER_CONST_TYPE 1 << 3
294 #define FLOAT_CONST_TYPE 1 << 2
295 #define OTHER_CONST_TYPE 1 << 1
297 /* Classify an invariant tree into integer, float, or other, so that
298 we can sort them to be near other constants of the same type. */
300 constant_type (tree t)
302 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
303 return INTEGER_CONST_TYPE;
304 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
305 return FLOAT_CONST_TYPE;
307 return OTHER_CONST_TYPE;
310 /* qsort comparison function to sort operand entries PA and PB by rank
311 so that the sorted array is ordered by rank in decreasing order. */
313 sort_by_operand_rank (const void *pa, const void *pb)
315 const operand_entry_t oea = *(const operand_entry_t *)pa;
316 const operand_entry_t oeb = *(const operand_entry_t *)pb;
318 /* It's nicer for optimize_expression if constants that are likely
319 to fold when added/multiplied//whatever are put next to each
320 other. Since all constants have rank 0, order them by type. */
321 if (oeb->rank == 0 && oea->rank == 0)
322 return constant_type (oeb->op) - constant_type (oea->op);
324 /* Lastly, make sure the versions that are the same go next to each
325 other. We use SSA_NAME_VERSION because it's stable. */
326 if ((oeb->rank - oea->rank == 0)
327 && TREE_CODE (oea->op) == SSA_NAME
328 && TREE_CODE (oeb->op) == SSA_NAME)
329 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
331 return oeb->rank - oea->rank;
334 /* Add an operand entry to *OPS for the tree operand OP. */
337 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
339 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
342 oe->rank = get_rank (op);
343 VEC_safe_push (operand_entry_t, heap, *ops, oe);
346 /* Return true if STMT is reassociable operation containing a binary
347 operation with tree code CODE. */
350 is_reassociable_op (tree stmt, enum tree_code code)
352 if (!IS_EMPTY_STMT (stmt)
353 && TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
354 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == code
355 && has_single_use (GIMPLE_STMT_OPERAND (stmt, 0)))
361 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
362 operand of the negate operation. Otherwise, return NULL. */
365 get_unary_op (tree name, enum tree_code opcode)
367 tree stmt = SSA_NAME_DEF_STMT (name);
370 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
373 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
374 if (TREE_CODE (rhs) == opcode)
375 return TREE_OPERAND (rhs, 0);
379 /* If CURR and LAST are a pair of ops that OPCODE allows us to
380 eliminate through equivalences, do so, remove them from OPS, and
381 return true. Otherwise, return false. */
384 eliminate_duplicate_pair (enum tree_code opcode,
385 VEC (operand_entry_t, heap) **ops,
388 operand_entry_t curr,
389 operand_entry_t last)
392 /* If we have two of the same op, and the opcode is & |, min, or max,
393 we can eliminate one of them.
394 If we have two of the same op, and the opcode is ^, we can
395 eliminate both of them. */
397 if (last && last->op == curr->op)
405 if (dump_file && (dump_flags & TDF_DETAILS))
407 fprintf (dump_file, "Equivalence: ");
408 print_generic_expr (dump_file, curr->op, 0);
409 fprintf (dump_file, " [&|minmax] ");
410 print_generic_expr (dump_file, last->op, 0);
411 fprintf (dump_file, " -> ");
412 print_generic_stmt (dump_file, last->op, 0);
415 VEC_ordered_remove (operand_entry_t, *ops, i);
416 reassociate_stats.ops_eliminated ++;
421 if (dump_file && (dump_flags & TDF_DETAILS))
423 fprintf (dump_file, "Equivalence: ");
424 print_generic_expr (dump_file, curr->op, 0);
425 fprintf (dump_file, " ^ ");
426 print_generic_expr (dump_file, last->op, 0);
427 fprintf (dump_file, " -> nothing\n");
430 reassociate_stats.ops_eliminated += 2;
432 if (VEC_length (operand_entry_t, *ops) == 2)
434 VEC_free (operand_entry_t, heap, *ops);
436 add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op),
442 VEC_ordered_remove (operand_entry_t, *ops, i-1);
443 VEC_ordered_remove (operand_entry_t, *ops, i-1);
455 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
456 look in OPS for a corresponding positive operation to cancel it
457 out. If we find one, remove the other from OPS, replace
458 OPS[CURRINDEX] with 0, and return true. Otherwise, return
462 eliminate_plus_minus_pair (enum tree_code opcode,
463 VEC (operand_entry_t, heap) **ops,
464 unsigned int currindex,
465 operand_entry_t curr)
471 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
474 negateop = get_unary_op (curr->op, NEGATE_EXPR);
475 if (negateop == NULL_TREE)
478 /* Any non-negated version will have a rank that is one less than
479 the current rank. So once we hit those ranks, if we don't find
482 for (i = currindex + 1;
483 VEC_iterate (operand_entry_t, *ops, i, oe)
484 && oe->rank >= curr->rank - 1 ;
487 if (oe->op == negateop)
490 if (dump_file && (dump_flags & TDF_DETAILS))
492 fprintf (dump_file, "Equivalence: ");
493 print_generic_expr (dump_file, negateop, 0);
494 fprintf (dump_file, " + -");
495 print_generic_expr (dump_file, oe->op, 0);
496 fprintf (dump_file, " -> 0\n");
499 VEC_ordered_remove (operand_entry_t, *ops, i);
500 add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op),
502 VEC_ordered_remove (operand_entry_t, *ops, currindex);
503 reassociate_stats.ops_eliminated ++;
512 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
513 bitwise not expression, look in OPS for a corresponding operand to
514 cancel it out. If we find one, remove the other from OPS, replace
515 OPS[CURRINDEX] with 0, and return true. Otherwise, return
519 eliminate_not_pairs (enum tree_code opcode,
520 VEC (operand_entry_t, heap) **ops,
521 unsigned int currindex,
522 operand_entry_t curr)
528 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
529 || TREE_CODE (curr->op) != SSA_NAME)
532 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
533 if (notop == NULL_TREE)
536 /* Any non-not version will have a rank that is one less than
537 the current rank. So once we hit those ranks, if we don't find
540 for (i = currindex + 1;
541 VEC_iterate (operand_entry_t, *ops, i, oe)
542 && oe->rank >= curr->rank - 1;
547 if (dump_file && (dump_flags & TDF_DETAILS))
549 fprintf (dump_file, "Equivalence: ");
550 print_generic_expr (dump_file, notop, 0);
551 if (opcode == BIT_AND_EXPR)
552 fprintf (dump_file, " & ~");
553 else if (opcode == BIT_IOR_EXPR)
554 fprintf (dump_file, " | ~");
555 print_generic_expr (dump_file, oe->op, 0);
556 if (opcode == BIT_AND_EXPR)
557 fprintf (dump_file, " -> 0\n");
558 else if (opcode == BIT_IOR_EXPR)
559 fprintf (dump_file, " -> -1\n");
562 if (opcode == BIT_AND_EXPR)
563 oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node);
564 else if (opcode == BIT_IOR_EXPR)
565 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
566 TYPE_PRECISION (TREE_TYPE (oe->op)));
568 reassociate_stats.ops_eliminated
569 += VEC_length (operand_entry_t, *ops) - 1;
570 VEC_free (operand_entry_t, heap, *ops);
572 VEC_safe_push (operand_entry_t, heap, *ops, oe);
580 /* Use constant value that may be present in OPS to try to eliminate
581 operands. Note that this function is only really used when we've
582 eliminated ops for other reasons, or merged constants. Across
583 single statements, fold already does all of this, plus more. There
584 is little point in duplicating logic, so I've only included the
585 identities that I could ever construct testcases to trigger. */
588 eliminate_using_constants (enum tree_code opcode,
589 VEC(operand_entry_t, heap) **ops)
591 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
593 if (oelast->rank == 0 && INTEGRAL_TYPE_P (TREE_TYPE (oelast->op)))
598 if (integer_zerop (oelast->op))
600 if (VEC_length (operand_entry_t, *ops) != 1)
602 if (dump_file && (dump_flags & TDF_DETAILS))
603 fprintf (dump_file, "Found & 0, removing all other ops\n");
605 reassociate_stats.ops_eliminated
606 += VEC_length (operand_entry_t, *ops) - 1;
608 VEC_free (operand_entry_t, heap, *ops);
610 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
614 else if (integer_all_onesp (oelast->op))
616 if (VEC_length (operand_entry_t, *ops) != 1)
618 if (dump_file && (dump_flags & TDF_DETAILS))
619 fprintf (dump_file, "Found & -1, removing\n");
620 VEC_pop (operand_entry_t, *ops);
621 reassociate_stats.ops_eliminated++;
626 if (integer_all_onesp (oelast->op))
628 if (VEC_length (operand_entry_t, *ops) != 1)
630 if (dump_file && (dump_flags & TDF_DETAILS))
631 fprintf (dump_file, "Found | -1, removing all other ops\n");
633 reassociate_stats.ops_eliminated
634 += VEC_length (operand_entry_t, *ops) - 1;
636 VEC_free (operand_entry_t, heap, *ops);
638 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
642 else if (integer_zerop (oelast->op))
644 if (VEC_length (operand_entry_t, *ops) != 1)
646 if (dump_file && (dump_flags & TDF_DETAILS))
647 fprintf (dump_file, "Found | 0, removing\n");
648 VEC_pop (operand_entry_t, *ops);
649 reassociate_stats.ops_eliminated++;
654 if (integer_zerop (oelast->op))
656 if (VEC_length (operand_entry_t, *ops) != 1)
658 if (dump_file && (dump_flags & TDF_DETAILS))
659 fprintf (dump_file, "Found * 0, removing all other ops\n");
661 reassociate_stats.ops_eliminated
662 += VEC_length (operand_entry_t, *ops) - 1;
663 VEC_free (operand_entry_t, heap, *ops);
665 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
669 else if (integer_onep (oelast->op))
671 if (VEC_length (operand_entry_t, *ops) != 1)
673 if (dump_file && (dump_flags & TDF_DETAILS))
674 fprintf (dump_file, "Found * 1, removing\n");
675 VEC_pop (operand_entry_t, *ops);
676 reassociate_stats.ops_eliminated++;
684 if (integer_zerop (oelast->op))
686 if (VEC_length (operand_entry_t, *ops) != 1)
688 if (dump_file && (dump_flags & TDF_DETAILS))
689 fprintf (dump_file, "Found [|^+] 0, removing\n");
690 VEC_pop (operand_entry_t, *ops);
691 reassociate_stats.ops_eliminated++;
702 /* Perform various identities and other optimizations on the list of
703 operand entries, stored in OPS. The tree code for the binary
704 operation between all the operands is OPCODE. */
707 optimize_ops_list (enum tree_code opcode,
708 VEC (operand_entry_t, heap) **ops)
710 unsigned int length = VEC_length (operand_entry_t, *ops);
713 operand_entry_t oelast = NULL;
714 bool iterate = false;
719 oelast = VEC_last (operand_entry_t, *ops);
721 /* If the last two are constants, pop the constants off, merge them
722 and try the next two. */
723 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
725 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
728 && is_gimple_min_invariant (oelm1->op)
729 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
730 TREE_TYPE (oelast->op)))
732 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
733 oelm1->op, oelast->op);
735 if (folded && is_gimple_min_invariant (folded))
737 if (dump_file && (dump_flags & TDF_DETAILS))
738 fprintf (dump_file, "Merging constants\n");
740 VEC_pop (operand_entry_t, *ops);
741 VEC_pop (operand_entry_t, *ops);
743 add_to_ops_vec (ops, folded);
744 reassociate_stats.constants_eliminated++;
746 optimize_ops_list (opcode, ops);
752 eliminate_using_constants (opcode, ops);
755 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
759 if (eliminate_not_pairs (opcode, ops, i, oe))
761 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
762 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)))
774 length = VEC_length (operand_entry_t, *ops);
775 oelast = VEC_last (operand_entry_t, *ops);
778 optimize_ops_list (opcode, ops);
781 /* Return true if OPERAND is defined by a PHI node which uses the LHS
782 of STMT in it's operands. This is also known as a "destructive
783 update" operation. */
786 is_phi_for_stmt (tree stmt, tree operand)
789 tree lhs = GIMPLE_STMT_OPERAND (stmt, 0);
793 if (TREE_CODE (operand) != SSA_NAME)
796 def_stmt = SSA_NAME_DEF_STMT (operand);
797 if (TREE_CODE (def_stmt) != PHI_NODE)
800 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
801 if (lhs == USE_FROM_PTR (arg_p))
806 /* Recursively rewrite our linearized statements so that the operators
807 match those in OPS[OPINDEX], putting the computation in rank
811 rewrite_expr_tree (tree stmt, unsigned int opindex,
812 VEC(operand_entry_t, heap) * ops)
814 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
817 /* If we have three operands left, then we want to make sure the one
818 that gets the double binary op are the ones with the same rank.
820 The alternative we try is to see if this is a destructive
821 update style statement, which is like:
824 In that case, we want to use the destructive update form to
825 expose the possible vectorizer sum reduction opportunity.
826 In that case, the third operand will be the phi node.
828 We could, of course, try to be better as noted above, and do a
829 lot of work to try to find these opportunities in >3 operand
830 cases, but it is unlikely to be worth it. */
831 if (opindex + 3 == VEC_length (operand_entry_t, ops))
833 operand_entry_t oe1, oe2, oe3;
835 oe1 = VEC_index (operand_entry_t, ops, opindex);
836 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
837 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
839 if ((oe1->rank == oe2->rank
840 && oe2->rank != oe3->rank)
841 || (is_phi_for_stmt (stmt, oe3->op)
842 && !is_phi_for_stmt (stmt, oe1->op)
843 && !is_phi_for_stmt (stmt, oe2->op)))
845 struct operand_entry temp = *oe3;
847 oe3->rank = oe1->rank;
849 oe1->rank= temp.rank;
853 /* The final recursion case for this function is that you have
854 exactly two operations left.
855 If we had one exactly one op in the entire list to start with, we
856 would have never called this function, and the tail recursion
857 rewrites them one at a time. */
858 if (opindex + 2 == VEC_length (operand_entry_t, ops))
860 operand_entry_t oe1, oe2;
862 oe1 = VEC_index (operand_entry_t, ops, opindex);
863 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
865 if (TREE_OPERAND (rhs, 0) != oe1->op
866 || TREE_OPERAND (rhs, 1) != oe2->op)
869 if (dump_file && (dump_flags & TDF_DETAILS))
871 fprintf (dump_file, "Transforming ");
872 print_generic_expr (dump_file, rhs, 0);
875 TREE_OPERAND (rhs, 0) = oe1->op;
876 TREE_OPERAND (rhs, 1) = oe2->op;
879 if (dump_file && (dump_flags & TDF_DETAILS))
881 fprintf (dump_file, " into ");
882 print_generic_stmt (dump_file, rhs, 0);
889 /* If we hit here, we should have 3 or more ops left. */
890 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
892 /* Rewrite the next operator. */
893 oe = VEC_index (operand_entry_t, ops, opindex);
895 if (oe->op != TREE_OPERAND (rhs, 1))
898 if (dump_file && (dump_flags & TDF_DETAILS))
900 fprintf (dump_file, "Transforming ");
901 print_generic_expr (dump_file, rhs, 0);
904 TREE_OPERAND (rhs, 1) = oe->op;
907 if (dump_file && (dump_flags & TDF_DETAILS))
909 fprintf (dump_file, " into ");
910 print_generic_stmt (dump_file, rhs, 0);
913 /* Recurse on the LHS of the binary operator, which is guaranteed to
914 be the non-leaf side. */
915 rewrite_expr_tree (SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0)),
919 /* Transform STMT, which is really (A +B) + (C + D) into the left
920 linear form, ((A+B)+C)+D.
921 Recurse on D if necessary. */
924 linearize_expr (tree stmt)
926 block_stmt_iterator bsinow, bsirhs;
927 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
928 enum tree_code rhscode = TREE_CODE (rhs);
929 tree binrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1));
930 tree binlhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0));
931 tree newbinrhs = NULL_TREE;
933 gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs))
934 && is_reassociable_op (binrhs, TREE_CODE (rhs)));
936 bsinow = bsi_for_stmt (stmt);
937 bsirhs = bsi_for_stmt (binrhs);
938 bsi_move_before (&bsirhs, &bsinow);
940 TREE_OPERAND (rhs, 1) = TREE_OPERAND (GIMPLE_STMT_OPERAND (binrhs, 1), 0);
941 if (TREE_CODE (TREE_OPERAND (rhs, 1)) == SSA_NAME)
942 newbinrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1));
943 TREE_OPERAND (GIMPLE_STMT_OPERAND (binrhs, 1), 0)
944 = GIMPLE_STMT_OPERAND (binlhs, 0);
945 TREE_OPERAND (rhs, 0) = GIMPLE_STMT_OPERAND (binrhs, 0);
947 if (dump_file && (dump_flags & TDF_DETAILS))
949 fprintf (dump_file, "Linearized: ");
950 print_generic_stmt (dump_file, rhs, 0);
953 reassociate_stats.linearized++;
954 update_stmt (binrhs);
955 update_stmt (binlhs);
957 TREE_VISITED (binrhs) = 1;
958 TREE_VISITED (binlhs) = 1;
959 TREE_VISITED (stmt) = 1;
961 /* Tail recurse on the new rhs if it still needs reassociation. */
962 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode))
963 linearize_expr (stmt);
967 /* If LHS has a single immediate use that is a GIMPLE_MODIFY_STMT, return
968 it. Otherwise, return NULL. */
971 get_single_immediate_use (tree lhs)
973 use_operand_p immuse;
976 if (TREE_CODE (lhs) == SSA_NAME
977 && single_imm_use (lhs, &immuse, &immusestmt))
979 if (TREE_CODE (immusestmt) == RETURN_EXPR)
980 immusestmt = TREE_OPERAND (immusestmt, 0);
981 if (TREE_CODE (immusestmt) == GIMPLE_MODIFY_STMT)
986 static VEC(tree, heap) *broken_up_subtracts;
989 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
990 representing the negated value. Insertions of any necessary
991 instructions go before BSI.
992 This function is recursive in that, if you hand it "a_5" as the
993 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
994 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
997 negate_value (tree tonegate, block_stmt_iterator *bsi)
999 tree negatedef = tonegate;
1000 tree resultofnegate;
1002 if (TREE_CODE (tonegate) == SSA_NAME)
1003 negatedef = SSA_NAME_DEF_STMT (tonegate);
1005 /* If we are trying to negate a name, defined by an add, negate the
1006 add operands instead. */
1007 if (TREE_CODE (tonegate) == SSA_NAME
1008 && TREE_CODE (negatedef) == GIMPLE_MODIFY_STMT
1009 && TREE_CODE (GIMPLE_STMT_OPERAND (negatedef, 0)) == SSA_NAME
1010 && has_single_use (GIMPLE_STMT_OPERAND (negatedef, 0))
1011 && TREE_CODE (GIMPLE_STMT_OPERAND (negatedef, 1)) == PLUS_EXPR)
1013 block_stmt_iterator bsi;
1014 tree binop = GIMPLE_STMT_OPERAND (negatedef, 1);
1016 bsi = bsi_for_stmt (negatedef);
1017 TREE_OPERAND (binop, 0) = negate_value (TREE_OPERAND (binop, 0),
1019 bsi = bsi_for_stmt (negatedef);
1020 TREE_OPERAND (binop, 1) = negate_value (TREE_OPERAND (binop, 1),
1022 update_stmt (negatedef);
1023 return GIMPLE_STMT_OPERAND (negatedef, 0);
1026 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
1027 resultofnegate = force_gimple_operand_bsi (bsi, tonegate, true,
1028 NULL_TREE, true, BSI_SAME_STMT);
1029 VEC_safe_push (tree, heap, broken_up_subtracts, resultofnegate);
1030 return resultofnegate;
1034 /* Return true if we should break up the subtract in STMT into an add
1035 with negate. This is true when we the subtract operands are really
1036 adds, or the subtract itself is used in an add expression. In
1037 either case, breaking up the subtract into an add with negate
1038 exposes the adds to reassociation. */
1041 should_break_up_subtract (tree stmt)
1044 tree lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1045 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1046 tree binlhs = TREE_OPERAND (rhs, 0);
1047 tree binrhs = TREE_OPERAND (rhs, 1);
1050 if (TREE_CODE (binlhs) == SSA_NAME
1051 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR))
1054 if (TREE_CODE (binrhs) == SSA_NAME
1055 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR))
1058 if (TREE_CODE (lhs) == SSA_NAME
1059 && (immusestmt = get_single_immediate_use (lhs))
1060 && TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR)
1066 /* Transform STMT from A - B into A + -B. */
1069 break_up_subtract (tree stmt, block_stmt_iterator *bsi)
1071 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1073 if (dump_file && (dump_flags & TDF_DETAILS))
1075 fprintf (dump_file, "Breaking up subtract ");
1076 print_generic_stmt (dump_file, stmt, 0);
1079 TREE_SET_CODE (GIMPLE_STMT_OPERAND (stmt, 1), PLUS_EXPR);
1080 TREE_OPERAND (rhs, 1) = negate_value (TREE_OPERAND (rhs, 1), bsi);
1085 /* Recursively linearize a binary expression that is the RHS of STMT.
1086 Place the operands of the expression tree in the vector named OPS. */
1089 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, tree stmt)
1091 block_stmt_iterator bsinow, bsilhs;
1092 tree rhs = GENERIC_TREE_OPERAND (stmt, 1);
1093 tree binrhs = TREE_OPERAND (rhs, 1);
1094 tree binlhs = TREE_OPERAND (rhs, 0);
1095 tree binlhsdef, binrhsdef;
1096 bool binlhsisreassoc = false;
1097 bool binrhsisreassoc = false;
1098 enum tree_code rhscode = TREE_CODE (rhs);
1100 TREE_VISITED (stmt) = 1;
1102 if (TREE_CODE (binlhs) == SSA_NAME)
1104 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
1105 binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode);
1108 if (TREE_CODE (binrhs) == SSA_NAME)
1110 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
1111 binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode);
1114 /* If the LHS is not reassociable, but the RHS is, we need to swap
1115 them. If neither is reassociable, there is nothing we can do, so
1116 just put them in the ops vector. If the LHS is reassociable,
1117 linearize it. If both are reassociable, then linearize the RHS
1120 if (!binlhsisreassoc)
1124 if (!binrhsisreassoc)
1126 add_to_ops_vec (ops, binrhs);
1127 add_to_ops_vec (ops, binlhs);
1131 if (dump_file && (dump_flags & TDF_DETAILS))
1133 fprintf (dump_file, "swapping operands of ");
1134 print_generic_expr (dump_file, stmt, 0);
1137 swap_tree_operands (stmt, &TREE_OPERAND (rhs, 0),
1138 &TREE_OPERAND (rhs, 1));
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1143 fprintf (dump_file, " is now ");
1144 print_generic_stmt (dump_file, stmt, 0);
1147 /* We want to make it so the lhs is always the reassociative op,
1153 else if (binrhsisreassoc)
1155 linearize_expr (stmt);
1156 gcc_assert (rhs == GIMPLE_STMT_OPERAND (stmt, 1));
1157 binlhs = TREE_OPERAND (rhs, 0);
1158 binrhs = TREE_OPERAND (rhs, 1);
1161 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
1162 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), rhscode));
1163 bsinow = bsi_for_stmt (stmt);
1164 bsilhs = bsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
1165 bsi_move_before (&bsilhs, &bsinow);
1166 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs));
1167 add_to_ops_vec (ops, binrhs);
1170 /* Repropagate the negates back into subtracts, since no other pass
1171 currently does it. */
1174 repropagate_negates (void)
1179 for (i = 0; VEC_iterate (tree, broken_up_subtracts, i, negate); i++)
1181 tree user = get_single_immediate_use (negate);
1183 /* The negate operand can be either operand of a PLUS_EXPR
1184 (it can be the LHS if the RHS is a constant for example).
1186 Force the negate operand to the RHS of the PLUS_EXPR, then
1187 transform the PLUS_EXPR into a MINUS_EXPR. */
1189 && TREE_CODE (user) == GIMPLE_MODIFY_STMT
1190 && TREE_CODE (GIMPLE_STMT_OPERAND (user, 1)) == PLUS_EXPR)
1192 tree rhs = GIMPLE_STMT_OPERAND (user, 1);
1194 /* If the negated operand appears on the LHS of the
1195 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1196 to force the negated operand to the RHS of the PLUS_EXPR. */
1197 if (TREE_OPERAND (GIMPLE_STMT_OPERAND (user, 1), 0) == negate)
1199 tree temp = TREE_OPERAND (rhs, 0);
1200 TREE_OPERAND (rhs, 0) = TREE_OPERAND (rhs, 1);
1201 TREE_OPERAND (rhs, 1) = temp;
1204 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1205 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1206 if (TREE_OPERAND (GIMPLE_STMT_OPERAND (user, 1), 1) == negate)
1208 TREE_SET_CODE (rhs, MINUS_EXPR);
1209 TREE_OPERAND (rhs, 1) = get_unary_op (negate, NEGATE_EXPR);
1216 /* Break up subtract operations in block BB.
1218 We do this top down because we don't know whether the subtract is
1219 part of a possible chain of reassociation except at the top.
1228 we want to break up k = t - q, but we won't until we've transformed q
1229 = b - r, which won't be broken up until we transform b = c - d. */
1232 break_up_subtract_bb (basic_block bb)
1234 block_stmt_iterator bsi;
1237 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1239 tree stmt = bsi_stmt (bsi);
1241 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1243 tree lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1244 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1246 TREE_VISITED (stmt) = 0;
1247 /* If associative-math we can do reassociation for
1248 non-integral types. Or, we can do reassociation for
1249 non-saturating fixed-point types. */
1250 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1251 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
1252 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs))
1253 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs))
1254 || !flag_associative_math)
1255 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (rhs))
1256 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(lhs))))
1259 /* Check for a subtract used only in an addition. If this
1260 is the case, transform it into add of a negate for better
1261 reassociation. IE transform C = A-B into C = A + -B if C
1262 is only used in an addition. */
1263 if (TREE_CODE (rhs) == MINUS_EXPR)
1264 if (should_break_up_subtract (stmt))
1265 break_up_subtract (stmt, &bsi);
1268 for (son = first_dom_son (CDI_DOMINATORS, bb);
1270 son = next_dom_son (CDI_DOMINATORS, son))
1271 break_up_subtract_bb (son);
1274 /* Reassociate expressions in basic block BB and its post-dominator as
1278 reassociate_bb (basic_block bb)
1280 block_stmt_iterator bsi;
1283 for (bsi = bsi_last (bb); !bsi_end_p (bsi); bsi_prev (&bsi))
1285 tree stmt = bsi_stmt (bsi);
1287 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1289 tree lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1290 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1292 /* If this was part of an already processed tree, we don't
1293 need to touch it again. */
1294 if (TREE_VISITED (stmt))
1297 /* If associative-math we can do reassociation for
1298 non-integral types. Or, we can do reassociation for
1299 non-saturating fixed-point types. */
1300 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1301 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
1302 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs))
1303 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs))
1304 || !flag_associative_math)
1305 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (rhs))
1306 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(lhs))))
1309 if (associative_tree_code (TREE_CODE (rhs)))
1311 VEC(operand_entry_t, heap) *ops = NULL;
1313 /* There may be no immediate uses left by the time we
1314 get here because we may have eliminated them all. */
1315 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
1318 TREE_VISITED (stmt) = 1;
1319 linearize_expr_tree (&ops, stmt);
1320 qsort (VEC_address (operand_entry_t, ops),
1321 VEC_length (operand_entry_t, ops),
1322 sizeof (operand_entry_t),
1323 sort_by_operand_rank);
1324 optimize_ops_list (TREE_CODE (rhs), &ops);
1326 if (VEC_length (operand_entry_t, ops) == 1)
1328 if (dump_file && (dump_flags & TDF_DETAILS))
1330 fprintf (dump_file, "Transforming ");
1331 print_generic_expr (dump_file, rhs, 0);
1333 GIMPLE_STMT_OPERAND (stmt, 1)
1334 = VEC_last (operand_entry_t, ops)->op;
1337 if (dump_file && (dump_flags & TDF_DETAILS))
1339 fprintf (dump_file, " into ");
1340 print_generic_stmt (dump_file,
1341 GIMPLE_STMT_OPERAND (stmt, 1), 0);
1346 rewrite_expr_tree (stmt, 0, ops);
1349 VEC_free (operand_entry_t, heap, ops);
1353 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
1355 son = next_dom_son (CDI_POST_DOMINATORS, son))
1356 reassociate_bb (son);
1359 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
1360 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
1362 /* Dump the operand entry vector OPS to FILE. */
1365 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
1370 for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
1372 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
1373 print_generic_stmt (file, oe->op, 0);
1377 /* Dump the operand entry vector OPS to STDERR. */
1380 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
1382 dump_ops_vector (stderr, ops);
1388 break_up_subtract_bb (ENTRY_BLOCK_PTR);
1389 reassociate_bb (EXIT_BLOCK_PTR);
1392 /* Initialize the reassociation pass. */
1400 int *bbs = XNEWVEC (int, last_basic_block + 1);
1402 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
1404 operand_entry_pool = create_alloc_pool ("operand entry pool",
1405 sizeof (struct operand_entry), 30);
1407 /* Reverse RPO (Reverse Post Order) will give us something where
1408 deeper loops come later. */
1409 pre_and_rev_post_order_compute (NULL, bbs, false);
1410 bb_rank = XCNEWVEC (long, last_basic_block + 1);
1411 operand_rank = pointer_map_create ();
1413 /* Give each argument a distinct rank. */
1414 for (param = DECL_ARGUMENTS (current_function_decl);
1416 param = TREE_CHAIN (param))
1418 if (gimple_default_def (cfun, param) != NULL)
1420 tree def = gimple_default_def (cfun, param);
1421 insert_operand_rank (def, ++rank);
1425 /* Give the chain decl a distinct rank. */
1426 if (cfun->static_chain_decl != NULL)
1428 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
1430 insert_operand_rank (def, ++rank);
1433 /* Set up rank for each BB */
1434 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
1435 bb_rank[bbs[i]] = ++rank << 16;
1438 calculate_dominance_info (CDI_DOMINATORS);
1439 calculate_dominance_info (CDI_POST_DOMINATORS);
1440 broken_up_subtracts = NULL;
1443 /* Cleanup after the reassociation pass, and print stats if
1450 if (dump_file && (dump_flags & TDF_STATS))
1452 fprintf (dump_file, "Reassociation stats:\n");
1453 fprintf (dump_file, "Linearized: %d\n",
1454 reassociate_stats.linearized);
1455 fprintf (dump_file, "Constants eliminated: %d\n",
1456 reassociate_stats.constants_eliminated);
1457 fprintf (dump_file, "Ops eliminated: %d\n",
1458 reassociate_stats.ops_eliminated);
1459 fprintf (dump_file, "Statements rewritten: %d\n",
1460 reassociate_stats.rewritten);
1463 pointer_map_destroy (operand_rank);
1464 free_alloc_pool (operand_entry_pool);
1466 VEC_free (tree, heap, broken_up_subtracts);
1467 free_dominance_info (CDI_POST_DOMINATORS);
1470 /* Gate and execute functions for Reassociation. */
1473 execute_reassoc (void)
1478 repropagate_negates ();
1485 gate_tree_ssa_reassoc (void)
1487 return flag_tree_reassoc != 0;
1490 struct tree_opt_pass pass_reassoc =
1492 "reassoc", /* name */
1493 gate_tree_ssa_reassoc, /* gate */
1494 execute_reassoc, /* execute */
1497 0, /* static_pass_number */
1498 TV_TREE_REASSOC, /* tv_id */
1499 PROP_cfg | PROP_ssa | PROP_alias, /* properties_required */
1500 0, /* properties_provided */
1501 0, /* properties_destroyed */
1502 0, /* todo_flags_start */
1503 TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */