1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dan@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
27 #include "basic-block.h"
28 #include "tree-pretty-print.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-inline.h"
31 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
61 4. Rewrite the expression trees we linearized and optimized so
62 they are in proper rank order.
64 5. Repropagate negates, as nothing else will clean it up ATM.
66 A bit of theory on #4, since nobody seems to write anything down
67 about why it makes sense to do it the way they do it:
69 We could do this much nicer theoretically, but don't (for reasons
70 explained after how to do it theoretically nice :P).
72 In order to promote the most redundancy elimination, you want
73 binary expressions whose operands are the same rank (or
74 preferably, the same value) exposed to the redundancy eliminator,
75 for possible elimination.
77 So the way to do this if we really cared, is to build the new op
78 tree from the leaves to the roots, merging as you go, and putting the
79 new op on the end of the worklist, until you are left with one
80 thing on the worklist.
82 IE if you have to rewrite the following set of operands (listed with
83 rank in parentheses), with opcode PLUS_EXPR:
85 a (1), b (1), c (1), d (2), e (2)
88 We start with our merge worklist empty, and the ops list with all of
91 You want to first merge all leaves of the same rank, as much as
94 So first build a binary op of
96 mergetmp = a + b, and put "mergetmp" on the merge worklist.
98 Because there is no three operand form of PLUS_EXPR, c is not going to
99 be exposed to redundancy elimination as a rank 1 operand.
101 So you might as well throw it on the merge worklist (you could also
102 consider it to now be a rank two operand, and merge it with d and e,
103 but in this case, you then have evicted e from a binary op. So at
104 least in this situation, you can't win.)
106 Then build a binary op of d + e
109 and put mergetmp2 on the merge worklist.
111 so merge worklist = {mergetmp, c, mergetmp2}
113 Continue building binary ops of these operations until you have only
114 one operation left on the worklist.
119 mergetmp3 = mergetmp + c
121 worklist = {mergetmp2, mergetmp3}
123 mergetmp4 = mergetmp2 + mergetmp3
125 worklist = {mergetmp4}
127 because we have one operation left, we can now just set the original
128 statement equal to the result of that operation.
130 This will at least expose a + b and d + e to redundancy elimination
131 as binary operations.
133 For extra points, you can reuse the old statements to build the
134 mergetmps, since you shouldn't run out.
136 So why don't we do this?
138 Because it's expensive, and rarely will help. Most trees we are
139 reassociating have 3 or less ops. If they have 2 ops, they already
140 will be written into a nice single binary op. If you have 3 ops, a
141 single simple check suffices to tell you whether the first two are of the
142 same rank. If so, you know to order it
145 newstmt = mergetmp + op3
149 newstmt = mergetmp + op1
151 If all three are of the same rank, you can't expose them all in a
152 single binary operator anyway, so the above is *still* the best you
155 Thus, this is what we do. When we have three ops left, we check to see
156 what order to put them in, and call it a day. As a nod to vector sum
157 reduction, we check if any of the ops are really a phi node that is a
158 destructive update for the associating op, and keep the destructive
159 update together for vector sum reduction recognition. */
166 int constants_eliminated;
171 /* Operator, rank pair. */
172 typedef struct operand_entry
179 static alloc_pool operand_entry_pool;
181 /* This is used to assign a unique ID to each struct operand_entry
182 so that qsort results are identical on different hosts. */
183 static int next_operand_entry_id;
185 /* Starting rank number for a given basic block, so that we can rank
186 operations using unmovable instructions in that BB based on the bb
188 static long *bb_rank;
190 /* Operand->rank hashtable. */
191 static struct pointer_map_t *operand_rank;
194 /* Look up the operand rank structure for expression E. */
197 find_operand_rank (tree e)
199 void **slot = pointer_map_contains (operand_rank, e);
200 return slot ? (long) (intptr_t) *slot : -1;
203 /* Insert {E,RANK} into the operand rank hashtable. */
206 insert_operand_rank (tree e, long rank)
209 gcc_assert (rank > 0);
210 slot = pointer_map_insert (operand_rank, e);
212 *slot = (void *) (intptr_t) rank;
215 /* Given an expression E, return the rank of the expression. */
220 /* Constants have rank 0. */
221 if (is_gimple_min_invariant (e))
224 /* SSA_NAME's have the rank of the expression they are the result
226 For globals and uninitialized values, the rank is 0.
227 For function arguments, use the pre-setup rank.
228 For PHI nodes, stores, asm statements, etc, we use the rank of
230 For simple operations, the rank is the maximum rank of any of
231 its operands, or the bb_rank, whichever is less.
232 I make no claims that this is optimal, however, it gives good
235 if (TREE_CODE (e) == SSA_NAME)
241 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
242 && SSA_NAME_IS_DEFAULT_DEF (e))
243 return find_operand_rank (e);
245 stmt = SSA_NAME_DEF_STMT (e);
246 if (gimple_bb (stmt) == NULL)
249 if (!is_gimple_assign (stmt)
250 || gimple_vdef (stmt))
251 return bb_rank[gimple_bb (stmt)->index];
253 /* If we already have a rank for this expression, use that. */
254 rank = find_operand_rank (e);
258 /* Otherwise, find the maximum rank for the operands, or the bb
259 rank, whichever is less. */
261 if (gimple_assign_single_p (stmt))
263 tree rhs = gimple_assign_rhs1 (stmt);
264 n = TREE_OPERAND_LENGTH (rhs);
266 rank = MAX (rank, get_rank (rhs));
269 for (i = 0; i < n; i++)
270 if (TREE_OPERAND (rhs, i))
271 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
276 n = gimple_num_ops (stmt);
277 for (i = 1; i < n; i++)
279 gcc_assert (gimple_op (stmt, i));
280 rank = MAX(rank, get_rank (gimple_op (stmt, i)));
284 if (dump_file && (dump_flags & TDF_DETAILS))
286 fprintf (dump_file, "Rank for ");
287 print_generic_expr (dump_file, e, 0);
288 fprintf (dump_file, " is %ld\n", (rank + 1));
291 /* Note the rank in the hashtable so we don't recompute it. */
292 insert_operand_rank (e, (rank + 1));
296 /* Globals, etc, are rank 0 */
300 DEF_VEC_P(operand_entry_t);
301 DEF_VEC_ALLOC_P(operand_entry_t, heap);
303 /* We want integer ones to end up last no matter what, since they are
304 the ones we can do the most with. */
305 #define INTEGER_CONST_TYPE 1 << 3
306 #define FLOAT_CONST_TYPE 1 << 2
307 #define OTHER_CONST_TYPE 1 << 1
309 /* Classify an invariant tree into integer, float, or other, so that
310 we can sort them to be near other constants of the same type. */
312 constant_type (tree t)
314 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
315 return INTEGER_CONST_TYPE;
316 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
317 return FLOAT_CONST_TYPE;
319 return OTHER_CONST_TYPE;
322 /* qsort comparison function to sort operand entries PA and PB by rank
323 so that the sorted array is ordered by rank in decreasing order. */
325 sort_by_operand_rank (const void *pa, const void *pb)
327 const operand_entry_t oea = *(const operand_entry_t *)pa;
328 const operand_entry_t oeb = *(const operand_entry_t *)pb;
330 /* It's nicer for optimize_expression if constants that are likely
331 to fold when added/multiplied//whatever are put next to each
332 other. Since all constants have rank 0, order them by type. */
333 if (oeb->rank == 0 && oea->rank == 0)
335 if (constant_type (oeb->op) != constant_type (oea->op))
336 return constant_type (oeb->op) - constant_type (oea->op);
338 /* To make sorting result stable, we use unique IDs to determine
340 return oeb->id - oea->id;
343 /* Lastly, make sure the versions that are the same go next to each
344 other. We use SSA_NAME_VERSION because it's stable. */
345 if ((oeb->rank - oea->rank == 0)
346 && TREE_CODE (oea->op) == SSA_NAME
347 && TREE_CODE (oeb->op) == SSA_NAME)
349 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
350 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
352 return oeb->id - oea->id;
355 if (oeb->rank != oea->rank)
356 return oeb->rank - oea->rank;
358 return oeb->id - oea->id;
361 /* Add an operand entry to *OPS for the tree operand OP. */
364 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
366 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
369 oe->rank = get_rank (op);
370 oe->id = next_operand_entry_id++;
371 VEC_safe_push (operand_entry_t, heap, *ops, oe);
374 /* Return true if STMT is reassociable operation containing a binary
375 operation with tree code CODE, and is inside LOOP. */
378 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
380 basic_block bb = gimple_bb (stmt);
382 if (gimple_bb (stmt) == NULL)
385 if (!flow_bb_inside_loop_p (loop, bb))
388 if (is_gimple_assign (stmt)
389 && gimple_assign_rhs_code (stmt) == code
390 && has_single_use (gimple_assign_lhs (stmt)))
397 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
398 operand of the negate operation. Otherwise, return NULL. */
401 get_unary_op (tree name, enum tree_code opcode)
403 gimple stmt = SSA_NAME_DEF_STMT (name);
405 if (!is_gimple_assign (stmt))
408 if (gimple_assign_rhs_code (stmt) == opcode)
409 return gimple_assign_rhs1 (stmt);
413 /* If CURR and LAST are a pair of ops that OPCODE allows us to
414 eliminate through equivalences, do so, remove them from OPS, and
415 return true. Otherwise, return false. */
418 eliminate_duplicate_pair (enum tree_code opcode,
419 VEC (operand_entry_t, heap) **ops,
422 operand_entry_t curr,
423 operand_entry_t last)
426 /* If we have two of the same op, and the opcode is & |, min, or max,
427 we can eliminate one of them.
428 If we have two of the same op, and the opcode is ^, we can
429 eliminate both of them. */
431 if (last && last->op == curr->op)
439 if (dump_file && (dump_flags & TDF_DETAILS))
441 fprintf (dump_file, "Equivalence: ");
442 print_generic_expr (dump_file, curr->op, 0);
443 fprintf (dump_file, " [&|minmax] ");
444 print_generic_expr (dump_file, last->op, 0);
445 fprintf (dump_file, " -> ");
446 print_generic_stmt (dump_file, last->op, 0);
449 VEC_ordered_remove (operand_entry_t, *ops, i);
450 reassociate_stats.ops_eliminated ++;
455 if (dump_file && (dump_flags & TDF_DETAILS))
457 fprintf (dump_file, "Equivalence: ");
458 print_generic_expr (dump_file, curr->op, 0);
459 fprintf (dump_file, " ^ ");
460 print_generic_expr (dump_file, last->op, 0);
461 fprintf (dump_file, " -> nothing\n");
464 reassociate_stats.ops_eliminated += 2;
466 if (VEC_length (operand_entry_t, *ops) == 2)
468 VEC_free (operand_entry_t, heap, *ops);
470 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
475 VEC_ordered_remove (operand_entry_t, *ops, i-1);
476 VEC_ordered_remove (operand_entry_t, *ops, i-1);
488 static VEC(tree, heap) *plus_negates;
490 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
491 expression, look in OPS for a corresponding positive operation to cancel
492 it out. If we find one, remove the other from OPS, replace
493 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
497 eliminate_plus_minus_pair (enum tree_code opcode,
498 VEC (operand_entry_t, heap) **ops,
499 unsigned int currindex,
500 operand_entry_t curr)
507 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
510 negateop = get_unary_op (curr->op, NEGATE_EXPR);
511 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
512 if (negateop == NULL_TREE && notop == NULL_TREE)
515 /* Any non-negated version will have a rank that is one less than
516 the current rank. So once we hit those ranks, if we don't find
519 for (i = currindex + 1;
520 VEC_iterate (operand_entry_t, *ops, i, oe)
521 && oe->rank >= curr->rank - 1 ;
524 if (oe->op == negateop)
527 if (dump_file && (dump_flags & TDF_DETAILS))
529 fprintf (dump_file, "Equivalence: ");
530 print_generic_expr (dump_file, negateop, 0);
531 fprintf (dump_file, " + -");
532 print_generic_expr (dump_file, oe->op, 0);
533 fprintf (dump_file, " -> 0\n");
536 VEC_ordered_remove (operand_entry_t, *ops, i);
537 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
538 VEC_ordered_remove (operand_entry_t, *ops, currindex);
539 reassociate_stats.ops_eliminated ++;
543 else if (oe->op == notop)
545 tree op_type = TREE_TYPE (oe->op);
547 if (dump_file && (dump_flags & TDF_DETAILS))
549 fprintf (dump_file, "Equivalence: ");
550 print_generic_expr (dump_file, notop, 0);
551 fprintf (dump_file, " + ~");
552 print_generic_expr (dump_file, oe->op, 0);
553 fprintf (dump_file, " -> -1\n");
556 VEC_ordered_remove (operand_entry_t, *ops, i);
557 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
558 VEC_ordered_remove (operand_entry_t, *ops, currindex);
559 reassociate_stats.ops_eliminated ++;
565 /* CURR->OP is a negate expr in a plus expr: save it for later
566 inspection in repropagate_negates(). */
567 if (negateop != NULL_TREE)
568 VEC_safe_push (tree, heap, plus_negates, curr->op);
573 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
574 bitwise not expression, look in OPS for a corresponding operand to
575 cancel it out. If we find one, remove the other from OPS, replace
576 OPS[CURRINDEX] with 0, and return true. Otherwise, return
580 eliminate_not_pairs (enum tree_code opcode,
581 VEC (operand_entry_t, heap) **ops,
582 unsigned int currindex,
583 operand_entry_t curr)
589 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
590 || TREE_CODE (curr->op) != SSA_NAME)
593 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
594 if (notop == NULL_TREE)
597 /* Any non-not version will have a rank that is one less than
598 the current rank. So once we hit those ranks, if we don't find
601 for (i = currindex + 1;
602 VEC_iterate (operand_entry_t, *ops, i, oe)
603 && oe->rank >= curr->rank - 1;
608 if (dump_file && (dump_flags & TDF_DETAILS))
610 fprintf (dump_file, "Equivalence: ");
611 print_generic_expr (dump_file, notop, 0);
612 if (opcode == BIT_AND_EXPR)
613 fprintf (dump_file, " & ~");
614 else if (opcode == BIT_IOR_EXPR)
615 fprintf (dump_file, " | ~");
616 print_generic_expr (dump_file, oe->op, 0);
617 if (opcode == BIT_AND_EXPR)
618 fprintf (dump_file, " -> 0\n");
619 else if (opcode == BIT_IOR_EXPR)
620 fprintf (dump_file, " -> -1\n");
623 if (opcode == BIT_AND_EXPR)
624 oe->op = build_zero_cst (TREE_TYPE (oe->op));
625 else if (opcode == BIT_IOR_EXPR)
626 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
627 TYPE_PRECISION (TREE_TYPE (oe->op)));
629 reassociate_stats.ops_eliminated
630 += VEC_length (operand_entry_t, *ops) - 1;
631 VEC_free (operand_entry_t, heap, *ops);
633 VEC_safe_push (operand_entry_t, heap, *ops, oe);
641 /* Use constant value that may be present in OPS to try to eliminate
642 operands. Note that this function is only really used when we've
643 eliminated ops for other reasons, or merged constants. Across
644 single statements, fold already does all of this, plus more. There
645 is little point in duplicating logic, so I've only included the
646 identities that I could ever construct testcases to trigger. */
649 eliminate_using_constants (enum tree_code opcode,
650 VEC(operand_entry_t, heap) **ops)
652 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
653 tree type = TREE_TYPE (oelast->op);
655 if (oelast->rank == 0
656 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
661 if (integer_zerop (oelast->op))
663 if (VEC_length (operand_entry_t, *ops) != 1)
665 if (dump_file && (dump_flags & TDF_DETAILS))
666 fprintf (dump_file, "Found & 0, removing all other ops\n");
668 reassociate_stats.ops_eliminated
669 += VEC_length (operand_entry_t, *ops) - 1;
671 VEC_free (operand_entry_t, heap, *ops);
673 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
677 else if (integer_all_onesp (oelast->op))
679 if (VEC_length (operand_entry_t, *ops) != 1)
681 if (dump_file && (dump_flags & TDF_DETAILS))
682 fprintf (dump_file, "Found & -1, removing\n");
683 VEC_pop (operand_entry_t, *ops);
684 reassociate_stats.ops_eliminated++;
689 if (integer_all_onesp (oelast->op))
691 if (VEC_length (operand_entry_t, *ops) != 1)
693 if (dump_file && (dump_flags & TDF_DETAILS))
694 fprintf (dump_file, "Found | -1, removing all other ops\n");
696 reassociate_stats.ops_eliminated
697 += VEC_length (operand_entry_t, *ops) - 1;
699 VEC_free (operand_entry_t, heap, *ops);
701 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
705 else if (integer_zerop (oelast->op))
707 if (VEC_length (operand_entry_t, *ops) != 1)
709 if (dump_file && (dump_flags & TDF_DETAILS))
710 fprintf (dump_file, "Found | 0, removing\n");
711 VEC_pop (operand_entry_t, *ops);
712 reassociate_stats.ops_eliminated++;
717 if (integer_zerop (oelast->op)
718 || (FLOAT_TYPE_P (type)
719 && !HONOR_NANS (TYPE_MODE (type))
720 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
721 && real_zerop (oelast->op)))
723 if (VEC_length (operand_entry_t, *ops) != 1)
725 if (dump_file && (dump_flags & TDF_DETAILS))
726 fprintf (dump_file, "Found * 0, removing all other ops\n");
728 reassociate_stats.ops_eliminated
729 += VEC_length (operand_entry_t, *ops) - 1;
730 VEC_free (operand_entry_t, heap, *ops);
732 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
736 else if (integer_onep (oelast->op)
737 || (FLOAT_TYPE_P (type)
738 && !HONOR_SNANS (TYPE_MODE (type))
739 && real_onep (oelast->op)))
741 if (VEC_length (operand_entry_t, *ops) != 1)
743 if (dump_file && (dump_flags & TDF_DETAILS))
744 fprintf (dump_file, "Found * 1, removing\n");
745 VEC_pop (operand_entry_t, *ops);
746 reassociate_stats.ops_eliminated++;
754 if (integer_zerop (oelast->op)
755 || (FLOAT_TYPE_P (type)
756 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
757 && fold_real_zero_addition_p (type, oelast->op,
758 opcode == MINUS_EXPR)))
760 if (VEC_length (operand_entry_t, *ops) != 1)
762 if (dump_file && (dump_flags & TDF_DETAILS))
763 fprintf (dump_file, "Found [|^+] 0, removing\n");
764 VEC_pop (operand_entry_t, *ops);
765 reassociate_stats.ops_eliminated++;
777 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
780 /* Structure for tracking and counting operands. */
781 typedef struct oecount_s {
784 enum tree_code oecode;
789 DEF_VEC_ALLOC_O(oecount,heap);
791 /* The heap for the oecount hashtable and the sorted list of operands. */
792 static VEC (oecount, heap) *cvec;
794 /* Hash function for oecount. */
797 oecount_hash (const void *p)
799 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
800 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
803 /* Comparison function for oecount. */
806 oecount_eq (const void *p1, const void *p2)
808 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
809 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
810 return (c1->oecode == c2->oecode
811 && c1->op == c2->op);
814 /* Comparison function for qsort sorting oecount elements by count. */
817 oecount_cmp (const void *p1, const void *p2)
819 const oecount *c1 = (const oecount *)p1;
820 const oecount *c2 = (const oecount *)p2;
821 if (c1->cnt != c2->cnt)
822 return c1->cnt - c2->cnt;
824 /* If counts are identical, use unique IDs to stabilize qsort. */
825 return c1->id - c2->id;
828 /* Walks the linear chain with result *DEF searching for an operation
829 with operand OP and code OPCODE removing that from the chain. *DEF
830 is updated if there is only one operand but no operation left. */
833 zero_one_operation (tree *def, enum tree_code opcode, tree op)
835 gimple stmt = SSA_NAME_DEF_STMT (*def);
839 tree name = gimple_assign_rhs1 (stmt);
841 /* If this is the operation we look for and one of the operands
842 is ours simply propagate the other operand into the stmts
844 if (gimple_assign_rhs_code (stmt) == opcode
846 || gimple_assign_rhs2 (stmt) == op))
850 gimple_stmt_iterator gsi;
852 name = gimple_assign_rhs2 (stmt);
853 gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
854 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
855 if (gimple_assign_lhs (stmt) == *def)
858 if (TREE_CODE (name) != SSA_NAME)
859 update_stmt (use_stmt);
860 gsi = gsi_for_stmt (stmt);
861 gsi_remove (&gsi, true);
866 /* Continue walking the chain. */
867 gcc_assert (name != op
868 && TREE_CODE (name) == SSA_NAME);
869 stmt = SSA_NAME_DEF_STMT (name);
874 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
875 the result. Places the statement after the definition of either
876 OP1 or OP2. Returns the new statement. */
879 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
881 gimple op1def = NULL, op2def = NULL;
882 gimple_stmt_iterator gsi;
886 /* Create the addition statement. */
887 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
888 op = make_ssa_name (tmpvar, sum);
889 gimple_assign_set_lhs (sum, op);
891 /* Find an insertion place and insert. */
892 if (TREE_CODE (op1) == SSA_NAME)
893 op1def = SSA_NAME_DEF_STMT (op1);
894 if (TREE_CODE (op2) == SSA_NAME)
895 op2def = SSA_NAME_DEF_STMT (op2);
896 if ((!op1def || gimple_nop_p (op1def))
897 && (!op2def || gimple_nop_p (op2def)))
899 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
900 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
902 else if ((!op1def || gimple_nop_p (op1def))
903 || (op2def && !gimple_nop_p (op2def)
904 && stmt_dominates_stmt_p (op1def, op2def)))
906 if (gimple_code (op2def) == GIMPLE_PHI)
908 gsi = gsi_after_labels (gimple_bb (op2def));
909 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
913 if (!stmt_ends_bb_p (op2def))
915 gsi = gsi_for_stmt (op2def);
916 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
923 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
924 if (e->flags & EDGE_FALLTHRU)
925 gsi_insert_on_edge_immediate (e, sum);
931 if (gimple_code (op1def) == GIMPLE_PHI)
933 gsi = gsi_after_labels (gimple_bb (op1def));
934 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
938 if (!stmt_ends_bb_p (op1def))
940 gsi = gsi_for_stmt (op1def);
941 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
948 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
949 if (e->flags & EDGE_FALLTHRU)
950 gsi_insert_on_edge_immediate (e, sum);
959 /* Perform un-distribution of divisions and multiplications.
960 A * X + B * X is transformed into (A + B) * X and A / X + B / X
961 to (A + B) / X for real X.
963 The algorithm is organized as follows.
965 - First we walk the addition chain *OPS looking for summands that
966 are defined by a multiplication or a real division. This results
967 in the candidates bitmap with relevant indices into *OPS.
969 - Second we build the chains of multiplications or divisions for
970 these candidates, counting the number of occurences of (operand, code)
971 pairs in all of the candidates chains.
973 - Third we sort the (operand, code) pairs by number of occurence and
974 process them starting with the pair with the most uses.
976 * For each such pair we walk the candidates again to build a
977 second candidate bitmap noting all multiplication/division chains
978 that have at least one occurence of (operand, code).
980 * We build an alternate addition chain only covering these
981 candidates with one (operand, code) operation removed from their
982 multiplication/division chain.
984 * The first candidate gets replaced by the alternate addition chain
985 multiplied/divided by the operand.
987 * All candidate chains get disabled for further processing and
988 processing of (operand, code) pairs continues.
990 The alternate addition chains built are re-processed by the main
991 reassociation algorithm which allows optimizing a * x * y + b * y * x
992 to (a + b ) * x * y in one invocation of the reassociation pass. */
995 undistribute_ops_list (enum tree_code opcode,
996 VEC (operand_entry_t, heap) **ops, struct loop *loop)
998 unsigned int length = VEC_length (operand_entry_t, *ops);
1001 sbitmap candidates, candidates2;
1002 unsigned nr_candidates, nr_candidates2;
1003 sbitmap_iterator sbi0;
1004 VEC (operand_entry_t, heap) **subops;
1006 bool changed = false;
1007 int next_oecount_id = 0;
1010 || opcode != PLUS_EXPR)
1013 /* Build a list of candidates to process. */
1014 candidates = sbitmap_alloc (length);
1015 sbitmap_zero (candidates);
1017 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1019 enum tree_code dcode;
1022 if (TREE_CODE (oe1->op) != SSA_NAME)
1024 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1025 if (!is_gimple_assign (oe1def))
1027 dcode = gimple_assign_rhs_code (oe1def);
1028 if ((dcode != MULT_EXPR
1029 && dcode != RDIV_EXPR)
1030 || !is_reassociable_op (oe1def, dcode, loop))
1033 SET_BIT (candidates, i);
1037 if (nr_candidates < 2)
1039 sbitmap_free (candidates);
1043 if (dump_file && (dump_flags & TDF_DETAILS))
1045 fprintf (dump_file, "searching for un-distribute opportunities ");
1046 print_generic_expr (dump_file,
1047 VEC_index (operand_entry_t, *ops,
1048 sbitmap_first_set_bit (candidates))->op, 0);
1049 fprintf (dump_file, " %d\n", nr_candidates);
1052 /* Build linearized sub-operand lists and the counting table. */
1054 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1055 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1056 VEC_length (operand_entry_t, *ops));
1057 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1060 enum tree_code oecode;
1063 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1064 oecode = gimple_assign_rhs_code (oedef);
1065 linearize_expr_tree (&subops[i], oedef,
1066 associative_tree_code (oecode), false);
1068 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1075 c.id = next_oecount_id++;
1077 VEC_safe_push (oecount, heap, cvec, &c);
1078 idx = VEC_length (oecount, cvec) + 41;
1079 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1082 *slot = (void *)idx;
1086 VEC_pop (oecount, cvec);
1087 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1091 htab_delete (ctable);
1093 /* Sort the counting table. */
1094 VEC_qsort (oecount, cvec, oecount_cmp);
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1099 fprintf (dump_file, "Candidates:\n");
1100 FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1102 fprintf (dump_file, " %u %s: ", c->cnt,
1103 c->oecode == MULT_EXPR
1104 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1105 print_generic_expr (dump_file, c->op, 0);
1106 fprintf (dump_file, "\n");
1110 /* Process the (operand, code) pairs in order of most occurence. */
1111 candidates2 = sbitmap_alloc (length);
1112 while (!VEC_empty (oecount, cvec))
1114 oecount *c = VEC_last (oecount, cvec);
1118 /* Now collect the operands in the outer chain that contain
1119 the common operand in their inner chain. */
1120 sbitmap_zero (candidates2);
1122 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1125 enum tree_code oecode;
1127 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1129 /* If we undistributed in this chain already this may be
1131 if (TREE_CODE (op) != SSA_NAME)
1134 oedef = SSA_NAME_DEF_STMT (op);
1135 oecode = gimple_assign_rhs_code (oedef);
1136 if (oecode != c->oecode)
1139 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1141 if (oe1->op == c->op)
1143 SET_BIT (candidates2, i);
1150 if (nr_candidates2 >= 2)
1152 operand_entry_t oe1, oe2;
1155 int first = sbitmap_first_set_bit (candidates2);
1157 /* Build the new addition chain. */
1158 oe1 = VEC_index (operand_entry_t, *ops, first);
1159 if (dump_file && (dump_flags & TDF_DETAILS))
1161 fprintf (dump_file, "Building (");
1162 print_generic_expr (dump_file, oe1->op, 0);
1164 tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1165 add_referenced_var (tmpvar);
1166 zero_one_operation (&oe1->op, c->oecode, c->op);
1167 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1170 oe2 = VEC_index (operand_entry_t, *ops, i);
1171 if (dump_file && (dump_flags & TDF_DETAILS))
1173 fprintf (dump_file, " + ");
1174 print_generic_expr (dump_file, oe2->op, 0);
1176 zero_one_operation (&oe2->op, c->oecode, c->op);
1177 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1178 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1180 oe1->op = gimple_get_lhs (sum);
1183 /* Apply the multiplication/division. */
1184 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1185 if (dump_file && (dump_flags & TDF_DETAILS))
1187 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1188 print_generic_expr (dump_file, c->op, 0);
1189 fprintf (dump_file, "\n");
1192 /* Record it in the addition chain and disable further
1193 undistribution with this op. */
1194 oe1->op = gimple_assign_lhs (prod);
1195 oe1->rank = get_rank (oe1->op);
1196 VEC_free (operand_entry_t, heap, subops[first]);
1201 VEC_pop (oecount, cvec);
1204 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1205 VEC_free (operand_entry_t, heap, subops[i]);
1207 VEC_free (oecount, heap, cvec);
1208 sbitmap_free (candidates);
1209 sbitmap_free (candidates2);
1214 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1215 expression, examine the other OPS to see if any of them are comparisons
1216 of the same values, which we may be able to combine or eliminate.
1217 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1220 eliminate_redundant_comparison (enum tree_code opcode,
1221 VEC (operand_entry_t, heap) **ops,
1222 unsigned int currindex,
1223 operand_entry_t curr)
1226 enum tree_code lcode, rcode;
1231 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1234 /* Check that CURR is a comparison. */
1235 if (TREE_CODE (curr->op) != SSA_NAME)
1237 def1 = SSA_NAME_DEF_STMT (curr->op);
1238 if (!is_gimple_assign (def1))
1240 lcode = gimple_assign_rhs_code (def1);
1241 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1243 op1 = gimple_assign_rhs1 (def1);
1244 op2 = gimple_assign_rhs2 (def1);
1246 /* Now look for a similar comparison in the remaining OPS. */
1247 for (i = currindex + 1;
1248 VEC_iterate (operand_entry_t, *ops, i, oe);
1253 if (TREE_CODE (oe->op) != SSA_NAME)
1255 def2 = SSA_NAME_DEF_STMT (oe->op);
1256 if (!is_gimple_assign (def2))
1258 rcode = gimple_assign_rhs_code (def2);
1259 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1262 /* If we got here, we have a match. See if we can combine the
1264 if (opcode == BIT_IOR_EXPR)
1265 t = maybe_fold_or_comparisons (lcode, op1, op2,
1266 rcode, gimple_assign_rhs1 (def2),
1267 gimple_assign_rhs2 (def2));
1269 t = maybe_fold_and_comparisons (lcode, op1, op2,
1270 rcode, gimple_assign_rhs1 (def2),
1271 gimple_assign_rhs2 (def2));
1275 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1276 always give us a boolean_type_node value back. If the original
1277 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1278 we need to convert. */
1279 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1280 t = fold_convert (TREE_TYPE (curr->op), t);
1282 if (TREE_CODE (t) != INTEGER_CST
1283 && !operand_equal_p (t, curr->op, 0))
1285 enum tree_code subcode;
1286 tree newop1, newop2;
1287 if (!COMPARISON_CLASS_P (t))
1289 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1290 STRIP_USELESS_TYPE_CONVERSION (newop1);
1291 STRIP_USELESS_TYPE_CONVERSION (newop2);
1292 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1296 if (dump_file && (dump_flags & TDF_DETAILS))
1298 fprintf (dump_file, "Equivalence: ");
1299 print_generic_expr (dump_file, curr->op, 0);
1300 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1301 print_generic_expr (dump_file, oe->op, 0);
1302 fprintf (dump_file, " -> ");
1303 print_generic_expr (dump_file, t, 0);
1304 fprintf (dump_file, "\n");
1307 /* Now we can delete oe, as it has been subsumed by the new combined
1309 VEC_ordered_remove (operand_entry_t, *ops, i);
1310 reassociate_stats.ops_eliminated ++;
1312 /* If t is the same as curr->op, we're done. Otherwise we must
1313 replace curr->op with t. Special case is if we got a constant
1314 back, in which case we add it to the end instead of in place of
1315 the current entry. */
1316 if (TREE_CODE (t) == INTEGER_CST)
1318 VEC_ordered_remove (operand_entry_t, *ops, currindex);
1319 add_to_ops_vec (ops, t);
1321 else if (!operand_equal_p (t, curr->op, 0))
1325 enum tree_code subcode;
1328 gcc_assert (COMPARISON_CLASS_P (t));
1329 tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1330 add_referenced_var (tmpvar);
1331 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1332 STRIP_USELESS_TYPE_CONVERSION (newop1);
1333 STRIP_USELESS_TYPE_CONVERSION (newop2);
1334 gcc_checking_assert (is_gimple_val (newop1)
1335 && is_gimple_val (newop2));
1336 sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1337 curr->op = gimple_get_lhs (sum);
1345 /* Perform various identities and other optimizations on the list of
1346 operand entries, stored in OPS. The tree code for the binary
1347 operation between all the operands is OPCODE. */
1350 optimize_ops_list (enum tree_code opcode,
1351 VEC (operand_entry_t, heap) **ops)
1353 unsigned int length = VEC_length (operand_entry_t, *ops);
1356 operand_entry_t oelast = NULL;
1357 bool iterate = false;
1362 oelast = VEC_last (operand_entry_t, *ops);
1364 /* If the last two are constants, pop the constants off, merge them
1365 and try the next two. */
1366 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1368 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1370 if (oelm1->rank == 0
1371 && is_gimple_min_invariant (oelm1->op)
1372 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1373 TREE_TYPE (oelast->op)))
1375 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1376 oelm1->op, oelast->op);
1378 if (folded && is_gimple_min_invariant (folded))
1380 if (dump_file && (dump_flags & TDF_DETAILS))
1381 fprintf (dump_file, "Merging constants\n");
1383 VEC_pop (operand_entry_t, *ops);
1384 VEC_pop (operand_entry_t, *ops);
1386 add_to_ops_vec (ops, folded);
1387 reassociate_stats.constants_eliminated++;
1389 optimize_ops_list (opcode, ops);
1395 eliminate_using_constants (opcode, ops);
1398 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1402 if (eliminate_not_pairs (opcode, ops, i, oe))
1404 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1405 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1406 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1418 length = VEC_length (operand_entry_t, *ops);
1419 oelast = VEC_last (operand_entry_t, *ops);
1422 optimize_ops_list (opcode, ops);
1425 /* Return true if OPERAND is defined by a PHI node which uses the LHS
1426 of STMT in it's operands. This is also known as a "destructive
1427 update" operation. */
1430 is_phi_for_stmt (gimple stmt, tree operand)
1434 use_operand_p arg_p;
1437 if (TREE_CODE (operand) != SSA_NAME)
1440 lhs = gimple_assign_lhs (stmt);
1442 def_stmt = SSA_NAME_DEF_STMT (operand);
1443 if (gimple_code (def_stmt) != GIMPLE_PHI)
1446 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
1447 if (lhs == USE_FROM_PTR (arg_p))
1452 /* Remove def stmt of VAR if VAR has zero uses and recurse
1453 on rhs1 operand if so. */
1456 remove_visited_stmt_chain (tree var)
1459 gimple_stmt_iterator gsi;
1463 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
1465 stmt = SSA_NAME_DEF_STMT (var);
1466 if (!is_gimple_assign (stmt)
1467 || !gimple_visited_p (stmt))
1469 var = gimple_assign_rhs1 (stmt);
1470 gsi = gsi_for_stmt (stmt);
1471 gsi_remove (&gsi, true);
1472 release_defs (stmt);
1476 /* Recursively rewrite our linearized statements so that the operators
1477 match those in OPS[OPINDEX], putting the computation in rank
1481 rewrite_expr_tree (gimple stmt, unsigned int opindex,
1482 VEC(operand_entry_t, heap) * ops, bool moved)
1484 tree rhs1 = gimple_assign_rhs1 (stmt);
1485 tree rhs2 = gimple_assign_rhs2 (stmt);
1488 /* If we have three operands left, then we want to make sure the one
1489 that gets the double binary op are the ones with the same rank.
1491 The alternative we try is to see if this is a destructive
1492 update style statement, which is like:
1495 In that case, we want to use the destructive update form to
1496 expose the possible vectorizer sum reduction opportunity.
1497 In that case, the third operand will be the phi node.
1499 We could, of course, try to be better as noted above, and do a
1500 lot of work to try to find these opportunities in >3 operand
1501 cases, but it is unlikely to be worth it. */
1502 if (opindex + 3 == VEC_length (operand_entry_t, ops))
1504 operand_entry_t oe1, oe2, oe3;
1506 oe1 = VEC_index (operand_entry_t, ops, opindex);
1507 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1508 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
1510 if ((oe1->rank == oe2->rank
1511 && oe2->rank != oe3->rank)
1512 || (is_phi_for_stmt (stmt, oe3->op)
1513 && !is_phi_for_stmt (stmt, oe1->op)
1514 && !is_phi_for_stmt (stmt, oe2->op)))
1516 struct operand_entry temp = *oe3;
1518 oe3->rank = oe1->rank;
1520 oe1->rank= temp.rank;
1522 else if ((oe1->rank == oe3->rank
1523 && oe2->rank != oe3->rank)
1524 || (is_phi_for_stmt (stmt, oe2->op)
1525 && !is_phi_for_stmt (stmt, oe1->op)
1526 && !is_phi_for_stmt (stmt, oe3->op)))
1528 struct operand_entry temp = *oe2;
1530 oe2->rank = oe1->rank;
1532 oe1->rank= temp.rank;
1536 /* The final recursion case for this function is that you have
1537 exactly two operations left.
1538 If we had one exactly one op in the entire list to start with, we
1539 would have never called this function, and the tail recursion
1540 rewrites them one at a time. */
1541 if (opindex + 2 == VEC_length (operand_entry_t, ops))
1543 operand_entry_t oe1, oe2;
1545 oe1 = VEC_index (operand_entry_t, ops, opindex);
1546 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1548 if (rhs1 != oe1->op || rhs2 != oe2->op)
1550 if (dump_file && (dump_flags & TDF_DETAILS))
1552 fprintf (dump_file, "Transforming ");
1553 print_gimple_stmt (dump_file, stmt, 0, 0);
1556 gimple_assign_set_rhs1 (stmt, oe1->op);
1557 gimple_assign_set_rhs2 (stmt, oe2->op);
1559 if (rhs1 != oe1->op && rhs1 != oe2->op)
1560 remove_visited_stmt_chain (rhs1);
1562 if (dump_file && (dump_flags & TDF_DETAILS))
1564 fprintf (dump_file, " into ");
1565 print_gimple_stmt (dump_file, stmt, 0, 0);
1572 /* If we hit here, we should have 3 or more ops left. */
1573 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
1575 /* Rewrite the next operator. */
1576 oe = VEC_index (operand_entry_t, ops, opindex);
1582 gimple_stmt_iterator gsinow, gsirhs1;
1583 gimple stmt1 = stmt, stmt2;
1586 gsinow = gsi_for_stmt (stmt);
1587 count = VEC_length (operand_entry_t, ops) - opindex - 2;
1588 while (count-- != 0)
1590 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
1591 gsirhs1 = gsi_for_stmt (stmt2);
1592 gsi_move_before (&gsirhs1, &gsinow);
1599 if (dump_file && (dump_flags & TDF_DETAILS))
1601 fprintf (dump_file, "Transforming ");
1602 print_gimple_stmt (dump_file, stmt, 0, 0);
1605 gimple_assign_set_rhs2 (stmt, oe->op);
1608 if (dump_file && (dump_flags & TDF_DETAILS))
1610 fprintf (dump_file, " into ");
1611 print_gimple_stmt (dump_file, stmt, 0, 0);
1614 /* Recurse on the LHS of the binary operator, which is guaranteed to
1615 be the non-leaf side. */
1616 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
1619 /* Transform STMT, which is really (A +B) + (C + D) into the left
1620 linear form, ((A+B)+C)+D.
1621 Recurse on D if necessary. */
1624 linearize_expr (gimple stmt)
1626 gimple_stmt_iterator gsinow, gsirhs;
1627 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1628 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1629 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1630 gimple newbinrhs = NULL;
1631 struct loop *loop = loop_containing_stmt (stmt);
1633 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
1634 && is_reassociable_op (binrhs, rhscode, loop));
1636 gsinow = gsi_for_stmt (stmt);
1637 gsirhs = gsi_for_stmt (binrhs);
1638 gsi_move_before (&gsirhs, &gsinow);
1640 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
1641 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
1642 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
1644 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1645 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1647 if (dump_file && (dump_flags & TDF_DETAILS))
1649 fprintf (dump_file, "Linearized: ");
1650 print_gimple_stmt (dump_file, stmt, 0, 0);
1653 reassociate_stats.linearized++;
1654 update_stmt (binrhs);
1655 update_stmt (binlhs);
1658 gimple_set_visited (stmt, true);
1659 gimple_set_visited (binlhs, true);
1660 gimple_set_visited (binrhs, true);
1662 /* Tail recurse on the new rhs if it still needs reassociation. */
1663 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
1664 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
1665 want to change the algorithm while converting to tuples. */
1666 linearize_expr (stmt);
1669 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
1670 it. Otherwise, return NULL. */
1673 get_single_immediate_use (tree lhs)
1675 use_operand_p immuse;
1678 if (TREE_CODE (lhs) == SSA_NAME
1679 && single_imm_use (lhs, &immuse, &immusestmt)
1680 && is_gimple_assign (immusestmt))
1686 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
1687 representing the negated value. Insertions of any necessary
1688 instructions go before GSI.
1689 This function is recursive in that, if you hand it "a_5" as the
1690 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
1691 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
1694 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
1696 gimple negatedefstmt= NULL;
1697 tree resultofnegate;
1699 /* If we are trying to negate a name, defined by an add, negate the
1700 add operands instead. */
1701 if (TREE_CODE (tonegate) == SSA_NAME)
1702 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
1703 if (TREE_CODE (tonegate) == SSA_NAME
1704 && is_gimple_assign (negatedefstmt)
1705 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
1706 && has_single_use (gimple_assign_lhs (negatedefstmt))
1707 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
1709 gimple_stmt_iterator gsi;
1710 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
1711 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
1713 gsi = gsi_for_stmt (negatedefstmt);
1714 rhs1 = negate_value (rhs1, &gsi);
1715 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
1717 gsi = gsi_for_stmt (negatedefstmt);
1718 rhs2 = negate_value (rhs2, &gsi);
1719 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
1721 update_stmt (negatedefstmt);
1722 return gimple_assign_lhs (negatedefstmt);
1725 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
1726 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
1727 NULL_TREE, true, GSI_SAME_STMT);
1728 return resultofnegate;
1731 /* Return true if we should break up the subtract in STMT into an add
1732 with negate. This is true when we the subtract operands are really
1733 adds, or the subtract itself is used in an add expression. In
1734 either case, breaking up the subtract into an add with negate
1735 exposes the adds to reassociation. */
1738 should_break_up_subtract (gimple stmt)
1740 tree lhs = gimple_assign_lhs (stmt);
1741 tree binlhs = gimple_assign_rhs1 (stmt);
1742 tree binrhs = gimple_assign_rhs2 (stmt);
1744 struct loop *loop = loop_containing_stmt (stmt);
1746 if (TREE_CODE (binlhs) == SSA_NAME
1747 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
1750 if (TREE_CODE (binrhs) == SSA_NAME
1751 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
1754 if (TREE_CODE (lhs) == SSA_NAME
1755 && (immusestmt = get_single_immediate_use (lhs))
1756 && is_gimple_assign (immusestmt)
1757 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
1758 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
1763 /* Transform STMT from A - B into A + -B. */
1766 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
1768 tree rhs1 = gimple_assign_rhs1 (stmt);
1769 tree rhs2 = gimple_assign_rhs2 (stmt);
1771 if (dump_file && (dump_flags & TDF_DETAILS))
1773 fprintf (dump_file, "Breaking up subtract ");
1774 print_gimple_stmt (dump_file, stmt, 0, 0);
1777 rhs2 = negate_value (rhs2, gsip);
1778 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
1782 /* Recursively linearize a binary expression that is the RHS of STMT.
1783 Place the operands of the expression tree in the vector named OPS. */
1786 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
1787 bool is_associative, bool set_visited)
1789 tree binlhs = gimple_assign_rhs1 (stmt);
1790 tree binrhs = gimple_assign_rhs2 (stmt);
1791 gimple binlhsdef, binrhsdef;
1792 bool binlhsisreassoc = false;
1793 bool binrhsisreassoc = false;
1794 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1795 struct loop *loop = loop_containing_stmt (stmt);
1798 gimple_set_visited (stmt, true);
1800 if (TREE_CODE (binlhs) == SSA_NAME)
1802 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
1803 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
1804 && !stmt_could_throw_p (binlhsdef));
1807 if (TREE_CODE (binrhs) == SSA_NAME)
1809 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
1810 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
1811 && !stmt_could_throw_p (binrhsdef));
1814 /* If the LHS is not reassociable, but the RHS is, we need to swap
1815 them. If neither is reassociable, there is nothing we can do, so
1816 just put them in the ops vector. If the LHS is reassociable,
1817 linearize it. If both are reassociable, then linearize the RHS
1820 if (!binlhsisreassoc)
1824 /* If this is not a associative operation like division, give up. */
1825 if (!is_associative)
1827 add_to_ops_vec (ops, binrhs);
1831 if (!binrhsisreassoc)
1833 add_to_ops_vec (ops, binrhs);
1834 add_to_ops_vec (ops, binlhs);
1838 if (dump_file && (dump_flags & TDF_DETAILS))
1840 fprintf (dump_file, "swapping operands of ");
1841 print_gimple_stmt (dump_file, stmt, 0, 0);
1844 swap_tree_operands (stmt,
1845 gimple_assign_rhs1_ptr (stmt),
1846 gimple_assign_rhs2_ptr (stmt));
1849 if (dump_file && (dump_flags & TDF_DETAILS))
1851 fprintf (dump_file, " is now ");
1852 print_gimple_stmt (dump_file, stmt, 0, 0);
1855 /* We want to make it so the lhs is always the reassociative op,
1861 else if (binrhsisreassoc)
1863 linearize_expr (stmt);
1864 binlhs = gimple_assign_rhs1 (stmt);
1865 binrhs = gimple_assign_rhs2 (stmt);
1868 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
1869 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
1871 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
1872 is_associative, set_visited);
1873 add_to_ops_vec (ops, binrhs);
1876 /* Repropagate the negates back into subtracts, since no other pass
1877 currently does it. */
1880 repropagate_negates (void)
1885 FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
1887 gimple user = get_single_immediate_use (negate);
1889 if (!user || !is_gimple_assign (user))
1892 /* The negate operand can be either operand of a PLUS_EXPR
1893 (it can be the LHS if the RHS is a constant for example).
1895 Force the negate operand to the RHS of the PLUS_EXPR, then
1896 transform the PLUS_EXPR into a MINUS_EXPR. */
1897 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
1899 /* If the negated operand appears on the LHS of the
1900 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1901 to force the negated operand to the RHS of the PLUS_EXPR. */
1902 if (gimple_assign_rhs1 (user) == negate)
1904 swap_tree_operands (user,
1905 gimple_assign_rhs1_ptr (user),
1906 gimple_assign_rhs2_ptr (user));
1909 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1910 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1911 if (gimple_assign_rhs2 (user) == negate)
1913 tree rhs1 = gimple_assign_rhs1 (user);
1914 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
1915 gimple_stmt_iterator gsi = gsi_for_stmt (user);
1916 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
1920 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
1922 if (gimple_assign_rhs1 (user) == negate)
1927 which we transform into
1930 This pushes down the negate which we possibly can merge
1931 into some other operation, hence insert it into the
1932 plus_negates vector. */
1933 gimple feed = SSA_NAME_DEF_STMT (negate);
1934 tree a = gimple_assign_rhs1 (feed);
1935 tree rhs2 = gimple_assign_rhs2 (user);
1936 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
1937 gimple_replace_lhs (feed, negate);
1938 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
1939 update_stmt (gsi_stmt (gsi));
1940 gsi2 = gsi_for_stmt (user);
1941 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
1942 update_stmt (gsi_stmt (gsi2));
1943 gsi_move_before (&gsi, &gsi2);
1944 VEC_safe_push (tree, heap, plus_negates,
1945 gimple_assign_lhs (gsi_stmt (gsi2)));
1949 /* Transform "x = -a; y = b - x" into "y = b + a", getting
1950 rid of one operation. */
1951 gimple feed = SSA_NAME_DEF_STMT (negate);
1952 tree a = gimple_assign_rhs1 (feed);
1953 tree rhs1 = gimple_assign_rhs1 (user);
1954 gimple_stmt_iterator gsi = gsi_for_stmt (user);
1955 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
1956 update_stmt (gsi_stmt (gsi));
1962 /* Returns true if OP is of a type for which we can do reassociation.
1963 That is for integral or non-saturating fixed-point types, and for
1964 floating point type when associative-math is enabled. */
1967 can_reassociate_p (tree op)
1969 tree type = TREE_TYPE (op);
1970 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
1971 || NON_SAT_FIXED_POINT_TYPE_P (type)
1972 || (flag_associative_math && FLOAT_TYPE_P (type)))
1977 /* Break up subtract operations in block BB.
1979 We do this top down because we don't know whether the subtract is
1980 part of a possible chain of reassociation except at the top.
1989 we want to break up k = t - q, but we won't until we've transformed q
1990 = b - r, which won't be broken up until we transform b = c - d.
1992 En passant, clear the GIMPLE visited flag on every statement. */
1995 break_up_subtract_bb (basic_block bb)
1997 gimple_stmt_iterator gsi;
2000 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2002 gimple stmt = gsi_stmt (gsi);
2003 gimple_set_visited (stmt, false);
2005 if (!is_gimple_assign (stmt)
2006 || !can_reassociate_p (gimple_assign_lhs (stmt)))
2009 /* Look for simple gimple subtract operations. */
2010 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
2012 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
2013 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
2016 /* Check for a subtract used only in an addition. If this
2017 is the case, transform it into add of a negate for better
2018 reassociation. IE transform C = A-B into C = A + -B if C
2019 is only used in an addition. */
2020 if (should_break_up_subtract (stmt))
2021 break_up_subtract (stmt, &gsi);
2023 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
2024 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
2025 VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
2027 for (son = first_dom_son (CDI_DOMINATORS, bb);
2029 son = next_dom_son (CDI_DOMINATORS, son))
2030 break_up_subtract_bb (son);
2033 /* Reassociate expressions in basic block BB and its post-dominator as
2037 reassociate_bb (basic_block bb)
2039 gimple_stmt_iterator gsi;
2042 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2044 gimple stmt = gsi_stmt (gsi);
2046 if (is_gimple_assign (stmt)
2047 && !stmt_could_throw_p (stmt))
2049 tree lhs, rhs1, rhs2;
2050 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2052 /* If this is not a gimple binary expression, there is
2053 nothing for us to do with it. */
2054 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
2057 /* If this was part of an already processed statement,
2058 we don't need to touch it again. */
2059 if (gimple_visited_p (stmt))
2061 /* This statement might have become dead because of previous
2063 if (has_zero_uses (gimple_get_lhs (stmt)))
2065 gsi_remove (&gsi, true);
2066 release_defs (stmt);
2067 /* We might end up removing the last stmt above which
2068 places the iterator to the end of the sequence.
2069 Reset it to the last stmt in this case which might
2070 be the end of the sequence as well if we removed
2071 the last statement of the sequence. In which case
2072 we need to bail out. */
2073 if (gsi_end_p (gsi))
2075 gsi = gsi_last_bb (bb);
2076 if (gsi_end_p (gsi))
2083 lhs = gimple_assign_lhs (stmt);
2084 rhs1 = gimple_assign_rhs1 (stmt);
2085 rhs2 = gimple_assign_rhs2 (stmt);
2087 /* For non-bit or min/max operations we can't associate
2088 all types. Verify that here. */
2089 if (rhs_code != BIT_IOR_EXPR
2090 && rhs_code != BIT_AND_EXPR
2091 && rhs_code != BIT_XOR_EXPR
2092 && rhs_code != MIN_EXPR
2093 && rhs_code != MAX_EXPR
2094 && (!can_reassociate_p (lhs)
2095 || !can_reassociate_p (rhs1)
2096 || !can_reassociate_p (rhs2)))
2099 if (associative_tree_code (rhs_code))
2101 VEC(operand_entry_t, heap) *ops = NULL;
2103 /* There may be no immediate uses left by the time we
2104 get here because we may have eliminated them all. */
2105 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
2108 gimple_set_visited (stmt, true);
2109 linearize_expr_tree (&ops, stmt, true, true);
2110 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2111 optimize_ops_list (rhs_code, &ops);
2112 if (undistribute_ops_list (rhs_code, &ops,
2113 loop_containing_stmt (stmt)))
2115 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2116 optimize_ops_list (rhs_code, &ops);
2119 if (VEC_length (operand_entry_t, ops) == 1)
2121 if (dump_file && (dump_flags & TDF_DETAILS))
2123 fprintf (dump_file, "Transforming ");
2124 print_gimple_stmt (dump_file, stmt, 0, 0);
2127 rhs1 = gimple_assign_rhs1 (stmt);
2128 gimple_assign_set_rhs_from_tree (&gsi,
2129 VEC_last (operand_entry_t,
2132 remove_visited_stmt_chain (rhs1);
2134 if (dump_file && (dump_flags & TDF_DETAILS))
2136 fprintf (dump_file, " into ");
2137 print_gimple_stmt (dump_file, stmt, 0, 0);
2141 rewrite_expr_tree (stmt, 0, ops, false);
2143 VEC_free (operand_entry_t, heap, ops);
2147 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
2149 son = next_dom_son (CDI_POST_DOMINATORS, son))
2150 reassociate_bb (son);
2153 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2154 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2156 /* Dump the operand entry vector OPS to FILE. */
2159 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2164 FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
2166 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2167 print_generic_expr (file, oe->op, 0);
2171 /* Dump the operand entry vector OPS to STDERR. */
2174 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2176 dump_ops_vector (stderr, ops);
2182 break_up_subtract_bb (ENTRY_BLOCK_PTR);
2183 reassociate_bb (EXIT_BLOCK_PTR);
2186 /* Initialize the reassociation pass. */
2194 int *bbs = XNEWVEC (int, last_basic_block + 1);
2196 /* Find the loops, so that we can prevent moving calculations in
2198 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2200 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
2202 operand_entry_pool = create_alloc_pool ("operand entry pool",
2203 sizeof (struct operand_entry), 30);
2204 next_operand_entry_id = 0;
2206 /* Reverse RPO (Reverse Post Order) will give us something where
2207 deeper loops come later. */
2208 pre_and_rev_post_order_compute (NULL, bbs, false);
2209 bb_rank = XCNEWVEC (long, last_basic_block + 1);
2210 operand_rank = pointer_map_create ();
2212 /* Give each argument a distinct rank. */
2213 for (param = DECL_ARGUMENTS (current_function_decl);
2215 param = DECL_CHAIN (param))
2217 if (gimple_default_def (cfun, param) != NULL)
2219 tree def = gimple_default_def (cfun, param);
2220 insert_operand_rank (def, ++rank);
2224 /* Give the chain decl a distinct rank. */
2225 if (cfun->static_chain_decl != NULL)
2227 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
2229 insert_operand_rank (def, ++rank);
2232 /* Set up rank for each BB */
2233 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
2234 bb_rank[bbs[i]] = ++rank << 16;
2237 calculate_dominance_info (CDI_POST_DOMINATORS);
2238 plus_negates = NULL;
2241 /* Cleanup after the reassociation pass, and print stats if
2247 statistics_counter_event (cfun, "Linearized",
2248 reassociate_stats.linearized);
2249 statistics_counter_event (cfun, "Constants eliminated",
2250 reassociate_stats.constants_eliminated);
2251 statistics_counter_event (cfun, "Ops eliminated",
2252 reassociate_stats.ops_eliminated);
2253 statistics_counter_event (cfun, "Statements rewritten",
2254 reassociate_stats.rewritten);
2256 pointer_map_destroy (operand_rank);
2257 free_alloc_pool (operand_entry_pool);
2259 VEC_free (tree, heap, plus_negates);
2260 free_dominance_info (CDI_POST_DOMINATORS);
2261 loop_optimizer_finalize ();
2264 /* Gate and execute functions for Reassociation. */
2267 execute_reassoc (void)
2272 repropagate_negates ();
2279 gate_tree_ssa_reassoc (void)
2281 return flag_tree_reassoc != 0;
2284 struct gimple_opt_pass pass_reassoc =
2288 "reassoc", /* name */
2289 gate_tree_ssa_reassoc, /* gate */
2290 execute_reassoc, /* execute */
2293 0, /* static_pass_number */
2294 TV_TREE_REASSOC, /* tv_id */
2295 PROP_cfg | PROP_ssa, /* properties_required */
2296 0, /* properties_provided */
2297 0, /* properties_destroyed */
2298 0, /* todo_flags_start */
2301 | TODO_ggc_collect /* todo_flags_finish */