1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "basic-block.h"
28 #include "diagnostic.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
32 #include "tree-dump.h"
34 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
61 4. Rewrite the expression trees we linearized and optimized so
62 they are in proper rank order.
64 5. Repropagate negates, as nothing else will clean it up ATM.
66 A bit of theory on #4, since nobody seems to write anything down
67 about why it makes sense to do it the way they do it:
69 We could do this much nicer theoretically, but don't (for reasons
70 explained after how to do it theoretically nice :P).
72 In order to promote the most redundancy elimination, you want
73 binary expressions whose operands are the same rank (or
74 preferably, the same value) exposed to the redundancy eliminator,
75 for possible elimination.
77 So the way to do this if we really cared, is to build the new op
78 tree from the leaves to the roots, merging as you go, and putting the
79 new op on the end of the worklist, until you are left with one
80 thing on the worklist.
82 IE if you have to rewrite the following set of operands (listed with
83 rank in parentheses), with opcode PLUS_EXPR:
85 a (1), b (1), c (1), d (2), e (2)
88 We start with our merge worklist empty, and the ops list with all of
91 You want to first merge all leaves of the same rank, as much as
94 So first build a binary op of
96 mergetmp = a + b, and put "mergetmp" on the merge worklist.
98 Because there is no three operand form of PLUS_EXPR, c is not going to
99 be exposed to redundancy elimination as a rank 1 operand.
101 So you might as well throw it on the merge worklist (you could also
102 consider it to now be a rank two operand, and merge it with d and e,
103 but in this case, you then have evicted e from a binary op. So at
104 least in this situation, you can't win.)
106 Then build a binary op of d + e
109 and put mergetmp2 on the merge worklist.
111 so merge worklist = {mergetmp, c, mergetmp2}
113 Continue building binary ops of these operations until you have only
114 one operation left on the worklist.
119 mergetmp3 = mergetmp + c
121 worklist = {mergetmp2, mergetmp3}
123 mergetmp4 = mergetmp2 + mergetmp3
125 worklist = {mergetmp4}
127 because we have one operation left, we can now just set the original
128 statement equal to the result of that operation.
130 This will at least expose a + b and d + e to redundancy elimination
131 as binary operations.
133 For extra points, you can reuse the old statements to build the
134 mergetmps, since you shouldn't run out.
136 So why don't we do this?
138 Because it's expensive, and rarely will help. Most trees we are
139 reassociating have 3 or less ops. If they have 2 ops, they already
140 will be written into a nice single binary op. If you have 3 ops, a
141 single simple check suffices to tell you whether the first two are of the
142 same rank. If so, you know to order it
145 newstmt = mergetmp + op3
149 newstmt = mergetmp + op1
151 If all three are of the same rank, you can't expose them all in a
152 single binary operator anyway, so the above is *still* the best you
155 Thus, this is what we do. When we have three ops left, we check to see
156 what order to put them in, and call it a day. As a nod to vector sum
157 reduction, we check if any of the ops are really a phi node that is a
158 destructive update for the associating op, and keep the destructive
159 update together for vector sum reduction recognition. */
166 int constants_eliminated;
171 /* Operator, rank pair. */
172 typedef struct operand_entry
179 static alloc_pool operand_entry_pool;
181 /* This is used to assign a unique ID to each struct operand_entry
182 so that qsort results are identical on different hosts. */
183 static int next_operand_entry_id;
185 /* Starting rank number for a given basic block, so that we can rank
186 operations using unmovable instructions in that BB based on the bb
188 static long *bb_rank;
190 /* Operand->rank hashtable. */
191 static struct pointer_map_t *operand_rank;
194 /* Look up the operand rank structure for expression E. */
197 find_operand_rank (tree e)
199 void **slot = pointer_map_contains (operand_rank, e);
200 return slot ? (long) (intptr_t) *slot : -1;
203 /* Insert {E,RANK} into the operand rank hashtable. */
206 insert_operand_rank (tree e, long rank)
209 gcc_assert (rank > 0);
210 slot = pointer_map_insert (operand_rank, e);
212 *slot = (void *) (intptr_t) rank;
215 /* Given an expression E, return the rank of the expression. */
220 /* Constants have rank 0. */
221 if (is_gimple_min_invariant (e))
224 /* SSA_NAME's have the rank of the expression they are the result
226 For globals and uninitialized values, the rank is 0.
227 For function arguments, use the pre-setup rank.
228 For PHI nodes, stores, asm statements, etc, we use the rank of
230 For simple operations, the rank is the maximum rank of any of
231 its operands, or the bb_rank, whichever is less.
232 I make no claims that this is optimal, however, it gives good
235 if (TREE_CODE (e) == SSA_NAME)
241 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
242 && SSA_NAME_IS_DEFAULT_DEF (e))
243 return find_operand_rank (e);
245 stmt = SSA_NAME_DEF_STMT (e);
246 if (gimple_bb (stmt) == NULL)
249 if (!is_gimple_assign (stmt)
250 || gimple_vdef (stmt))
251 return bb_rank[gimple_bb (stmt)->index];
253 /* If we already have a rank for this expression, use that. */
254 rank = find_operand_rank (e);
258 /* Otherwise, find the maximum rank for the operands, or the bb
259 rank, whichever is less. */
261 maxrank = bb_rank[gimple_bb(stmt)->index];
262 if (gimple_assign_single_p (stmt))
264 tree rhs = gimple_assign_rhs1 (stmt);
265 n = TREE_OPERAND_LENGTH (rhs);
267 rank = MAX (rank, get_rank (rhs));
271 i < n && TREE_OPERAND (rhs, i) && rank != maxrank; i++)
272 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
277 n = gimple_num_ops (stmt);
278 for (i = 1; i < n && rank != maxrank; i++)
280 gcc_assert (gimple_op (stmt, i));
281 rank = MAX(rank, get_rank (gimple_op (stmt, i)));
285 if (dump_file && (dump_flags & TDF_DETAILS))
287 fprintf (dump_file, "Rank for ");
288 print_generic_expr (dump_file, e, 0);
289 fprintf (dump_file, " is %ld\n", (rank + 1));
292 /* Note the rank in the hashtable so we don't recompute it. */
293 insert_operand_rank (e, (rank + 1));
297 /* Globals, etc, are rank 0 */
301 DEF_VEC_P(operand_entry_t);
302 DEF_VEC_ALLOC_P(operand_entry_t, heap);
304 /* We want integer ones to end up last no matter what, since they are
305 the ones we can do the most with. */
306 #define INTEGER_CONST_TYPE 1 << 3
307 #define FLOAT_CONST_TYPE 1 << 2
308 #define OTHER_CONST_TYPE 1 << 1
310 /* Classify an invariant tree into integer, float, or other, so that
311 we can sort them to be near other constants of the same type. */
313 constant_type (tree t)
315 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
316 return INTEGER_CONST_TYPE;
317 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
318 return FLOAT_CONST_TYPE;
320 return OTHER_CONST_TYPE;
323 /* qsort comparison function to sort operand entries PA and PB by rank
324 so that the sorted array is ordered by rank in decreasing order. */
326 sort_by_operand_rank (const void *pa, const void *pb)
328 const operand_entry_t oea = *(const operand_entry_t *)pa;
329 const operand_entry_t oeb = *(const operand_entry_t *)pb;
331 /* It's nicer for optimize_expression if constants that are likely
332 to fold when added/multiplied//whatever are put next to each
333 other. Since all constants have rank 0, order them by type. */
334 if (oeb->rank == 0 && oea->rank == 0)
336 if (constant_type (oeb->op) != constant_type (oea->op))
337 return constant_type (oeb->op) - constant_type (oea->op);
339 /* To make sorting result stable, we use unique IDs to determine
341 return oeb->id - oea->id;
344 /* Lastly, make sure the versions that are the same go next to each
345 other. We use SSA_NAME_VERSION because it's stable. */
346 if ((oeb->rank - oea->rank == 0)
347 && TREE_CODE (oea->op) == SSA_NAME
348 && TREE_CODE (oeb->op) == SSA_NAME)
350 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
351 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
353 return oeb->id - oea->id;
356 if (oeb->rank != oea->rank)
357 return oeb->rank - oea->rank;
359 return oeb->id - oea->id;
362 /* Add an operand entry to *OPS for the tree operand OP. */
365 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
367 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
370 oe->rank = get_rank (op);
371 oe->id = next_operand_entry_id++;
372 VEC_safe_push (operand_entry_t, heap, *ops, oe);
375 /* Return true if STMT is reassociable operation containing a binary
376 operation with tree code CODE, and is inside LOOP. */
379 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
381 basic_block bb = gimple_bb (stmt);
383 if (gimple_bb (stmt) == NULL)
386 if (!flow_bb_inside_loop_p (loop, bb))
389 if (is_gimple_assign (stmt)
390 && gimple_assign_rhs_code (stmt) == code
391 && has_single_use (gimple_assign_lhs (stmt)))
398 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
399 operand of the negate operation. Otherwise, return NULL. */
402 get_unary_op (tree name, enum tree_code opcode)
404 gimple stmt = SSA_NAME_DEF_STMT (name);
406 if (!is_gimple_assign (stmt))
409 if (gimple_assign_rhs_code (stmt) == opcode)
410 return gimple_assign_rhs1 (stmt);
414 /* If CURR and LAST are a pair of ops that OPCODE allows us to
415 eliminate through equivalences, do so, remove them from OPS, and
416 return true. Otherwise, return false. */
419 eliminate_duplicate_pair (enum tree_code opcode,
420 VEC (operand_entry_t, heap) **ops,
423 operand_entry_t curr,
424 operand_entry_t last)
427 /* If we have two of the same op, and the opcode is & |, min, or max,
428 we can eliminate one of them.
429 If we have two of the same op, and the opcode is ^, we can
430 eliminate both of them. */
432 if (last && last->op == curr->op)
440 if (dump_file && (dump_flags & TDF_DETAILS))
442 fprintf (dump_file, "Equivalence: ");
443 print_generic_expr (dump_file, curr->op, 0);
444 fprintf (dump_file, " [&|minmax] ");
445 print_generic_expr (dump_file, last->op, 0);
446 fprintf (dump_file, " -> ");
447 print_generic_stmt (dump_file, last->op, 0);
450 VEC_ordered_remove (operand_entry_t, *ops, i);
451 reassociate_stats.ops_eliminated ++;
456 if (dump_file && (dump_flags & TDF_DETAILS))
458 fprintf (dump_file, "Equivalence: ");
459 print_generic_expr (dump_file, curr->op, 0);
460 fprintf (dump_file, " ^ ");
461 print_generic_expr (dump_file, last->op, 0);
462 fprintf (dump_file, " -> nothing\n");
465 reassociate_stats.ops_eliminated += 2;
467 if (VEC_length (operand_entry_t, *ops) == 2)
469 VEC_free (operand_entry_t, heap, *ops);
471 add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op),
477 VEC_ordered_remove (operand_entry_t, *ops, i-1);
478 VEC_ordered_remove (operand_entry_t, *ops, i-1);
490 static VEC(tree, heap) *plus_negates;
492 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
493 look in OPS for a corresponding positive operation to cancel it
494 out. If we find one, remove the other from OPS, replace
495 OPS[CURRINDEX] with 0, and return true. Otherwise, return
499 eliminate_plus_minus_pair (enum tree_code opcode,
500 VEC (operand_entry_t, heap) **ops,
501 unsigned int currindex,
502 operand_entry_t curr)
509 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
512 negateop = get_unary_op (curr->op, NEGATE_EXPR);
513 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
514 if (negateop == NULL_TREE && notop == NULL_TREE)
517 /* Any non-negated version will have a rank that is one less than
518 the current rank. So once we hit those ranks, if we don't find
521 for (i = currindex + 1;
522 VEC_iterate (operand_entry_t, *ops, i, oe)
523 && oe->rank >= curr->rank - 1 ;
526 if (oe->op == negateop)
529 if (dump_file && (dump_flags & TDF_DETAILS))
531 fprintf (dump_file, "Equivalence: ");
532 print_generic_expr (dump_file, negateop, 0);
533 fprintf (dump_file, " + -");
534 print_generic_expr (dump_file, oe->op, 0);
535 fprintf (dump_file, " -> 0\n");
538 VEC_ordered_remove (operand_entry_t, *ops, i);
539 add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op),
541 VEC_ordered_remove (operand_entry_t, *ops, currindex);
542 reassociate_stats.ops_eliminated ++;
546 else if (oe->op == notop)
548 tree op_type = TREE_TYPE (oe->op);
550 if (dump_file && (dump_flags & TDF_DETAILS))
552 fprintf (dump_file, "Equivalence: ");
553 print_generic_expr (dump_file, notop, 0);
554 fprintf (dump_file, " + ~");
555 print_generic_expr (dump_file, oe->op, 0);
556 fprintf (dump_file, " -> -1\n");
559 VEC_ordered_remove (operand_entry_t, *ops, i);
560 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
561 VEC_ordered_remove (operand_entry_t, *ops, currindex);
562 reassociate_stats.ops_eliminated ++;
568 /* CURR->OP is a negate expr in a plus expr: save it for later
569 inspection in repropagate_negates(). */
570 VEC_safe_push (tree, heap, plus_negates, curr->op);
575 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
576 bitwise not expression, look in OPS for a corresponding operand to
577 cancel it out. If we find one, remove the other from OPS, replace
578 OPS[CURRINDEX] with 0, and return true. Otherwise, return
582 eliminate_not_pairs (enum tree_code opcode,
583 VEC (operand_entry_t, heap) **ops,
584 unsigned int currindex,
585 operand_entry_t curr)
591 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
592 || TREE_CODE (curr->op) != SSA_NAME)
595 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
596 if (notop == NULL_TREE)
599 /* Any non-not version will have a rank that is one less than
600 the current rank. So once we hit those ranks, if we don't find
603 for (i = currindex + 1;
604 VEC_iterate (operand_entry_t, *ops, i, oe)
605 && oe->rank >= curr->rank - 1;
610 if (dump_file && (dump_flags & TDF_DETAILS))
612 fprintf (dump_file, "Equivalence: ");
613 print_generic_expr (dump_file, notop, 0);
614 if (opcode == BIT_AND_EXPR)
615 fprintf (dump_file, " & ~");
616 else if (opcode == BIT_IOR_EXPR)
617 fprintf (dump_file, " | ~");
618 print_generic_expr (dump_file, oe->op, 0);
619 if (opcode == BIT_AND_EXPR)
620 fprintf (dump_file, " -> 0\n");
621 else if (opcode == BIT_IOR_EXPR)
622 fprintf (dump_file, " -> -1\n");
625 if (opcode == BIT_AND_EXPR)
626 oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node);
627 else if (opcode == BIT_IOR_EXPR)
628 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
629 TYPE_PRECISION (TREE_TYPE (oe->op)));
631 reassociate_stats.ops_eliminated
632 += VEC_length (operand_entry_t, *ops) - 1;
633 VEC_free (operand_entry_t, heap, *ops);
635 VEC_safe_push (operand_entry_t, heap, *ops, oe);
643 /* Use constant value that may be present in OPS to try to eliminate
644 operands. Note that this function is only really used when we've
645 eliminated ops for other reasons, or merged constants. Across
646 single statements, fold already does all of this, plus more. There
647 is little point in duplicating logic, so I've only included the
648 identities that I could ever construct testcases to trigger. */
651 eliminate_using_constants (enum tree_code opcode,
652 VEC(operand_entry_t, heap) **ops)
654 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
655 tree type = TREE_TYPE (oelast->op);
657 if (oelast->rank == 0
658 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
663 if (integer_zerop (oelast->op))
665 if (VEC_length (operand_entry_t, *ops) != 1)
667 if (dump_file && (dump_flags & TDF_DETAILS))
668 fprintf (dump_file, "Found & 0, removing all other ops\n");
670 reassociate_stats.ops_eliminated
671 += VEC_length (operand_entry_t, *ops) - 1;
673 VEC_free (operand_entry_t, heap, *ops);
675 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
679 else if (integer_all_onesp (oelast->op))
681 if (VEC_length (operand_entry_t, *ops) != 1)
683 if (dump_file && (dump_flags & TDF_DETAILS))
684 fprintf (dump_file, "Found & -1, removing\n");
685 VEC_pop (operand_entry_t, *ops);
686 reassociate_stats.ops_eliminated++;
691 if (integer_all_onesp (oelast->op))
693 if (VEC_length (operand_entry_t, *ops) != 1)
695 if (dump_file && (dump_flags & TDF_DETAILS))
696 fprintf (dump_file, "Found | -1, removing all other ops\n");
698 reassociate_stats.ops_eliminated
699 += VEC_length (operand_entry_t, *ops) - 1;
701 VEC_free (operand_entry_t, heap, *ops);
703 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
707 else if (integer_zerop (oelast->op))
709 if (VEC_length (operand_entry_t, *ops) != 1)
711 if (dump_file && (dump_flags & TDF_DETAILS))
712 fprintf (dump_file, "Found | 0, removing\n");
713 VEC_pop (operand_entry_t, *ops);
714 reassociate_stats.ops_eliminated++;
719 if (integer_zerop (oelast->op)
720 || (FLOAT_TYPE_P (type)
721 && !HONOR_NANS (TYPE_MODE (type))
722 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
723 && real_zerop (oelast->op)))
725 if (VEC_length (operand_entry_t, *ops) != 1)
727 if (dump_file && (dump_flags & TDF_DETAILS))
728 fprintf (dump_file, "Found * 0, removing all other ops\n");
730 reassociate_stats.ops_eliminated
731 += VEC_length (operand_entry_t, *ops) - 1;
732 VEC_free (operand_entry_t, heap, *ops);
734 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
738 else if (integer_onep (oelast->op)
739 || (FLOAT_TYPE_P (type)
740 && !HONOR_SNANS (TYPE_MODE (type))
741 && real_onep (oelast->op)))
743 if (VEC_length (operand_entry_t, *ops) != 1)
745 if (dump_file && (dump_flags & TDF_DETAILS))
746 fprintf (dump_file, "Found * 1, removing\n");
747 VEC_pop (operand_entry_t, *ops);
748 reassociate_stats.ops_eliminated++;
756 if (integer_zerop (oelast->op)
757 || (FLOAT_TYPE_P (type)
758 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
759 && fold_real_zero_addition_p (type, oelast->op,
760 opcode == MINUS_EXPR)))
762 if (VEC_length (operand_entry_t, *ops) != 1)
764 if (dump_file && (dump_flags & TDF_DETAILS))
765 fprintf (dump_file, "Found [|^+] 0, removing\n");
766 VEC_pop (operand_entry_t, *ops);
767 reassociate_stats.ops_eliminated++;
779 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
782 /* Structure for tracking and counting operands. */
783 typedef struct oecount_s {
786 enum tree_code oecode;
791 DEF_VEC_ALLOC_O(oecount,heap);
793 /* The heap for the oecount hashtable and the sorted list of operands. */
794 static VEC (oecount, heap) *cvec;
796 /* Hash function for oecount. */
799 oecount_hash (const void *p)
801 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
802 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
805 /* Comparison function for oecount. */
808 oecount_eq (const void *p1, const void *p2)
810 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
811 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
812 return (c1->oecode == c2->oecode
813 && c1->op == c2->op);
816 /* Comparison function for qsort sorting oecount elements by count. */
819 oecount_cmp (const void *p1, const void *p2)
821 const oecount *c1 = (const oecount *)p1;
822 const oecount *c2 = (const oecount *)p2;
823 if (c1->cnt != c2->cnt)
824 return c1->cnt - c2->cnt;
826 /* If counts are identical, use unique IDs to stabilize qsort. */
827 return c1->id - c2->id;
830 /* Walks the linear chain with result *DEF searching for an operation
831 with operand OP and code OPCODE removing that from the chain. *DEF
832 is updated if there is only one operand but no operation left. */
835 zero_one_operation (tree *def, enum tree_code opcode, tree op)
837 gimple stmt = SSA_NAME_DEF_STMT (*def);
841 tree name = gimple_assign_rhs1 (stmt);
843 /* If this is the operation we look for and one of the operands
844 is ours simply propagate the other operand into the stmts
846 if (gimple_assign_rhs_code (stmt) == opcode
848 || gimple_assign_rhs2 (stmt) == op))
852 gimple_stmt_iterator gsi;
854 name = gimple_assign_rhs2 (stmt);
855 gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
856 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
857 if (gimple_assign_lhs (stmt) == *def)
860 if (TREE_CODE (name) != SSA_NAME)
861 update_stmt (use_stmt);
862 gsi = gsi_for_stmt (stmt);
863 gsi_remove (&gsi, true);
868 /* Continue walking the chain. */
869 gcc_assert (name != op
870 && TREE_CODE (name) == SSA_NAME);
871 stmt = SSA_NAME_DEF_STMT (name);
876 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
877 the result. Places the statement after the definition of either
878 OP1 or OP2. Returns the new statement. */
881 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
883 gimple op1def = NULL, op2def = NULL;
884 gimple_stmt_iterator gsi;
888 /* Create the addition statement. */
889 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
890 op = make_ssa_name (tmpvar, sum);
891 gimple_assign_set_lhs (sum, op);
893 /* Find an insertion place and insert. */
894 if (TREE_CODE (op1) == SSA_NAME)
895 op1def = SSA_NAME_DEF_STMT (op1);
896 if (TREE_CODE (op2) == SSA_NAME)
897 op2def = SSA_NAME_DEF_STMT (op2);
898 if ((!op1def || gimple_nop_p (op1def))
899 && (!op2def || gimple_nop_p (op2def)))
901 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
902 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
904 else if ((!op1def || gimple_nop_p (op1def))
905 || (op2def && !gimple_nop_p (op2def)
906 && stmt_dominates_stmt_p (op1def, op2def)))
908 if (gimple_code (op2def) == GIMPLE_PHI)
910 gsi = gsi_after_labels (gimple_bb (op2def));
911 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
915 if (!stmt_ends_bb_p (op2def))
917 gsi = gsi_for_stmt (op2def);
918 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
925 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
926 if (e->flags & EDGE_FALLTHRU)
927 gsi_insert_on_edge_immediate (e, sum);
933 if (gimple_code (op1def) == GIMPLE_PHI)
935 gsi = gsi_after_labels (gimple_bb (op1def));
936 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
940 if (!stmt_ends_bb_p (op1def))
942 gsi = gsi_for_stmt (op1def);
943 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
950 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
951 if (e->flags & EDGE_FALLTHRU)
952 gsi_insert_on_edge_immediate (e, sum);
961 /* Perform un-distribution of divisions and multiplications.
962 A * X + B * X is transformed into (A + B) * X and A / X + B / X
963 to (A + B) / X for real X.
965 The algorithm is organized as follows.
967 - First we walk the addition chain *OPS looking for summands that
968 are defined by a multiplication or a real division. This results
969 in the candidates bitmap with relevant indices into *OPS.
971 - Second we build the chains of multiplications or divisions for
972 these candidates, counting the number of occurences of (operand, code)
973 pairs in all of the candidates chains.
975 - Third we sort the (operand, code) pairs by number of occurence and
976 process them starting with the pair with the most uses.
978 * For each such pair we walk the candidates again to build a
979 second candidate bitmap noting all multiplication/division chains
980 that have at least one occurence of (operand, code).
982 * We build an alternate addition chain only covering these
983 candidates with one (operand, code) operation removed from their
984 multiplication/division chain.
986 * The first candidate gets replaced by the alternate addition chain
987 multiplied/divided by the operand.
989 * All candidate chains get disabled for further processing and
990 processing of (operand, code) pairs continues.
992 The alternate addition chains built are re-processed by the main
993 reassociation algorithm which allows optimizing a * x * y + b * y * x
994 to (a + b ) * x * y in one invocation of the reassociation pass. */
997 undistribute_ops_list (enum tree_code opcode,
998 VEC (operand_entry_t, heap) **ops, struct loop *loop)
1000 unsigned int length = VEC_length (operand_entry_t, *ops);
1001 operand_entry_t oe1;
1003 sbitmap candidates, candidates2;
1004 unsigned nr_candidates, nr_candidates2;
1005 sbitmap_iterator sbi0;
1006 VEC (operand_entry_t, heap) **subops;
1008 bool changed = false;
1009 int next_oecount_id = 0;
1012 || opcode != PLUS_EXPR)
1015 /* Build a list of candidates to process. */
1016 candidates = sbitmap_alloc (length);
1017 sbitmap_zero (candidates);
1019 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
1021 enum tree_code dcode;
1024 if (TREE_CODE (oe1->op) != SSA_NAME)
1026 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1027 if (!is_gimple_assign (oe1def))
1029 dcode = gimple_assign_rhs_code (oe1def);
1030 if ((dcode != MULT_EXPR
1031 && dcode != RDIV_EXPR)
1032 || !is_reassociable_op (oe1def, dcode, loop))
1035 SET_BIT (candidates, i);
1039 if (nr_candidates < 2)
1041 sbitmap_free (candidates);
1045 if (dump_file && (dump_flags & TDF_DETAILS))
1047 fprintf (dump_file, "searching for un-distribute opportunities ");
1048 print_generic_expr (dump_file,
1049 VEC_index (operand_entry_t, *ops,
1050 sbitmap_first_set_bit (candidates))->op, 0);
1051 fprintf (dump_file, " %d\n", nr_candidates);
1054 /* Build linearized sub-operand lists and the counting table. */
1056 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1057 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1058 VEC_length (operand_entry_t, *ops));
1059 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1062 enum tree_code oecode;
1065 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1066 oecode = gimple_assign_rhs_code (oedef);
1067 linearize_expr_tree (&subops[i], oedef,
1068 associative_tree_code (oecode), false);
1070 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
1077 c.id = next_oecount_id++;
1079 VEC_safe_push (oecount, heap, cvec, &c);
1080 idx = VEC_length (oecount, cvec) + 41;
1081 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1084 *slot = (void *)idx;
1088 VEC_pop (oecount, cvec);
1089 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1093 htab_delete (ctable);
1095 /* Sort the counting table. */
1096 qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec),
1097 sizeof (oecount), oecount_cmp);
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1102 fprintf (dump_file, "Candidates:\n");
1103 for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j)
1105 fprintf (dump_file, " %u %s: ", c->cnt,
1106 c->oecode == MULT_EXPR
1107 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1108 print_generic_expr (dump_file, c->op, 0);
1109 fprintf (dump_file, "\n");
1113 /* Process the (operand, code) pairs in order of most occurence. */
1114 candidates2 = sbitmap_alloc (length);
1115 while (!VEC_empty (oecount, cvec))
1117 oecount *c = VEC_last (oecount, cvec);
1121 /* Now collect the operands in the outer chain that contain
1122 the common operand in their inner chain. */
1123 sbitmap_zero (candidates2);
1125 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1128 enum tree_code oecode;
1130 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1132 /* If we undistributed in this chain already this may be
1134 if (TREE_CODE (op) != SSA_NAME)
1137 oedef = SSA_NAME_DEF_STMT (op);
1138 oecode = gimple_assign_rhs_code (oedef);
1139 if (oecode != c->oecode)
1142 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
1144 if (oe1->op == c->op)
1146 SET_BIT (candidates2, i);
1153 if (nr_candidates2 >= 2)
1155 operand_entry_t oe1, oe2;
1158 int first = sbitmap_first_set_bit (candidates2);
1160 /* Build the new addition chain. */
1161 oe1 = VEC_index (operand_entry_t, *ops, first);
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1164 fprintf (dump_file, "Building (");
1165 print_generic_expr (dump_file, oe1->op, 0);
1167 tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1168 add_referenced_var (tmpvar);
1169 zero_one_operation (&oe1->op, c->oecode, c->op);
1170 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1173 oe2 = VEC_index (operand_entry_t, *ops, i);
1174 if (dump_file && (dump_flags & TDF_DETAILS))
1176 fprintf (dump_file, " + ");
1177 print_generic_expr (dump_file, oe2->op, 0);
1179 zero_one_operation (&oe2->op, c->oecode, c->op);
1180 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1181 oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node);
1183 oe1->op = gimple_get_lhs (sum);
1186 /* Apply the multiplication/division. */
1187 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1188 if (dump_file && (dump_flags & TDF_DETAILS))
1190 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1191 print_generic_expr (dump_file, c->op, 0);
1192 fprintf (dump_file, "\n");
1195 /* Record it in the addition chain and disable further
1196 undistribution with this op. */
1197 oe1->op = gimple_assign_lhs (prod);
1198 oe1->rank = get_rank (oe1->op);
1199 VEC_free (operand_entry_t, heap, subops[first]);
1204 VEC_pop (oecount, cvec);
1207 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1208 VEC_free (operand_entry_t, heap, subops[i]);
1210 VEC_free (oecount, heap, cvec);
1211 sbitmap_free (candidates);
1212 sbitmap_free (candidates2);
1217 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1218 expression, examine the other OPS to see if any of them are comparisons
1219 of the same values, which we may be able to combine or eliminate.
1220 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1223 eliminate_redundant_comparison (enum tree_code opcode,
1224 VEC (operand_entry_t, heap) **ops,
1225 unsigned int currindex,
1226 operand_entry_t curr)
1229 enum tree_code lcode, rcode;
1234 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1237 /* Check that CURR is a comparison. */
1238 if (TREE_CODE (curr->op) != SSA_NAME)
1240 def1 = SSA_NAME_DEF_STMT (curr->op);
1241 if (!is_gimple_assign (def1))
1243 lcode = gimple_assign_rhs_code (def1);
1244 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1246 op1 = gimple_assign_rhs1 (def1);
1247 op2 = gimple_assign_rhs2 (def1);
1249 /* Now look for a similar comparison in the remaining OPS. */
1250 for (i = currindex + 1;
1251 VEC_iterate (operand_entry_t, *ops, i, oe);
1256 if (TREE_CODE (oe->op) != SSA_NAME)
1258 def2 = SSA_NAME_DEF_STMT (oe->op);
1259 if (!is_gimple_assign (def2))
1261 rcode = gimple_assign_rhs_code (def2);
1262 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1264 if (operand_equal_p (op1, gimple_assign_rhs1 (def2), 0)
1265 && operand_equal_p (op2, gimple_assign_rhs2 (def2), 0))
1267 else if (operand_equal_p (op1, gimple_assign_rhs2 (def2), 0)
1268 && operand_equal_p (op2, gimple_assign_rhs1 (def2), 0))
1269 rcode = swap_tree_comparison (rcode);
1273 /* If we got here, we have a match. See if we can combine the
1275 t = combine_comparisons (UNKNOWN_LOCATION,
1276 (opcode == BIT_IOR_EXPR
1277 ? TRUTH_OR_EXPR : TRUTH_AND_EXPR),
1278 lcode, rcode, TREE_TYPE (curr->op), op1, op2);
1281 if (dump_file && (dump_flags & TDF_DETAILS))
1283 fprintf (dump_file, "Equivalence: ");
1284 print_generic_expr (dump_file, curr->op, 0);
1285 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1286 print_generic_expr (dump_file, oe->op, 0);
1287 fprintf (dump_file, " -> ");
1288 print_generic_expr (dump_file, t, 0);
1289 fprintf (dump_file, "\n");
1292 /* Now we can delete oe, as it has been subsumed by the new combined
1294 VEC_ordered_remove (operand_entry_t, *ops, i);
1295 reassociate_stats.ops_eliminated ++;
1297 /* If t is the same as curr->op, we're done. Otherwise we must
1298 replace curr->op with t. Special case is if we got a constant
1299 back, in which case we add it to the end instead of in place of
1300 the current entry. */
1301 if (TREE_CODE (t) == INTEGER_CST)
1303 VEC_ordered_remove (operand_entry_t, *ops, currindex);
1304 add_to_ops_vec (ops, t);
1306 else if (TREE_CODE (t) != lcode)
1310 enum tree_code subcode;
1313 tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1314 add_referenced_var (tmpvar);
1315 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1316 sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1317 curr->op = gimple_get_lhs (sum);
1325 /* Perform various identities and other optimizations on the list of
1326 operand entries, stored in OPS. The tree code for the binary
1327 operation between all the operands is OPCODE. */
1330 optimize_ops_list (enum tree_code opcode,
1331 VEC (operand_entry_t, heap) **ops)
1333 unsigned int length = VEC_length (operand_entry_t, *ops);
1336 operand_entry_t oelast = NULL;
1337 bool iterate = false;
1342 oelast = VEC_last (operand_entry_t, *ops);
1344 /* If the last two are constants, pop the constants off, merge them
1345 and try the next two. */
1346 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1348 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1350 if (oelm1->rank == 0
1351 && is_gimple_min_invariant (oelm1->op)
1352 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1353 TREE_TYPE (oelast->op)))
1355 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1356 oelm1->op, oelast->op);
1358 if (folded && is_gimple_min_invariant (folded))
1360 if (dump_file && (dump_flags & TDF_DETAILS))
1361 fprintf (dump_file, "Merging constants\n");
1363 VEC_pop (operand_entry_t, *ops);
1364 VEC_pop (operand_entry_t, *ops);
1366 add_to_ops_vec (ops, folded);
1367 reassociate_stats.constants_eliminated++;
1369 optimize_ops_list (opcode, ops);
1375 eliminate_using_constants (opcode, ops);
1378 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1382 if (eliminate_not_pairs (opcode, ops, i, oe))
1384 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1385 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1386 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1398 length = VEC_length (operand_entry_t, *ops);
1399 oelast = VEC_last (operand_entry_t, *ops);
1402 optimize_ops_list (opcode, ops);
1405 /* Return true if OPERAND is defined by a PHI node which uses the LHS
1406 of STMT in it's operands. This is also known as a "destructive
1407 update" operation. */
1410 is_phi_for_stmt (gimple stmt, tree operand)
1414 use_operand_p arg_p;
1417 if (TREE_CODE (operand) != SSA_NAME)
1420 lhs = gimple_assign_lhs (stmt);
1422 def_stmt = SSA_NAME_DEF_STMT (operand);
1423 if (gimple_code (def_stmt) != GIMPLE_PHI)
1426 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
1427 if (lhs == USE_FROM_PTR (arg_p))
1432 /* Remove def stmt of VAR if VAR has zero uses and recurse
1433 on rhs1 operand if so. */
1436 remove_visited_stmt_chain (tree var)
1439 gimple_stmt_iterator gsi;
1443 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
1445 stmt = SSA_NAME_DEF_STMT (var);
1446 if (!is_gimple_assign (stmt)
1447 || !gimple_visited_p (stmt))
1449 var = gimple_assign_rhs1 (stmt);
1450 gsi = gsi_for_stmt (stmt);
1451 gsi_remove (&gsi, true);
1452 release_defs (stmt);
1456 /* Recursively rewrite our linearized statements so that the operators
1457 match those in OPS[OPINDEX], putting the computation in rank
1461 rewrite_expr_tree (gimple stmt, unsigned int opindex,
1462 VEC(operand_entry_t, heap) * ops, bool moved)
1464 tree rhs1 = gimple_assign_rhs1 (stmt);
1465 tree rhs2 = gimple_assign_rhs2 (stmt);
1468 /* If we have three operands left, then we want to make sure the one
1469 that gets the double binary op are the ones with the same rank.
1471 The alternative we try is to see if this is a destructive
1472 update style statement, which is like:
1475 In that case, we want to use the destructive update form to
1476 expose the possible vectorizer sum reduction opportunity.
1477 In that case, the third operand will be the phi node.
1479 We could, of course, try to be better as noted above, and do a
1480 lot of work to try to find these opportunities in >3 operand
1481 cases, but it is unlikely to be worth it. */
1482 if (opindex + 3 == VEC_length (operand_entry_t, ops))
1484 operand_entry_t oe1, oe2, oe3;
1486 oe1 = VEC_index (operand_entry_t, ops, opindex);
1487 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1488 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
1490 if ((oe1->rank == oe2->rank
1491 && oe2->rank != oe3->rank)
1492 || (is_phi_for_stmt (stmt, oe3->op)
1493 && !is_phi_for_stmt (stmt, oe1->op)
1494 && !is_phi_for_stmt (stmt, oe2->op)))
1496 struct operand_entry temp = *oe3;
1498 oe3->rank = oe1->rank;
1500 oe1->rank= temp.rank;
1502 else if ((oe1->rank == oe3->rank
1503 && oe2->rank != oe3->rank)
1504 || (is_phi_for_stmt (stmt, oe2->op)
1505 && !is_phi_for_stmt (stmt, oe1->op)
1506 && !is_phi_for_stmt (stmt, oe3->op)))
1508 struct operand_entry temp = *oe2;
1510 oe2->rank = oe1->rank;
1512 oe1->rank= temp.rank;
1516 /* The final recursion case for this function is that you have
1517 exactly two operations left.
1518 If we had one exactly one op in the entire list to start with, we
1519 would have never called this function, and the tail recursion
1520 rewrites them one at a time. */
1521 if (opindex + 2 == VEC_length (operand_entry_t, ops))
1523 operand_entry_t oe1, oe2;
1525 oe1 = VEC_index (operand_entry_t, ops, opindex);
1526 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1528 if (rhs1 != oe1->op || rhs2 != oe2->op)
1530 if (dump_file && (dump_flags & TDF_DETAILS))
1532 fprintf (dump_file, "Transforming ");
1533 print_gimple_stmt (dump_file, stmt, 0, 0);
1536 gimple_assign_set_rhs1 (stmt, oe1->op);
1537 gimple_assign_set_rhs2 (stmt, oe2->op);
1539 if (rhs1 != oe1->op && rhs1 != oe2->op)
1540 remove_visited_stmt_chain (rhs1);
1542 if (dump_file && (dump_flags & TDF_DETAILS))
1544 fprintf (dump_file, " into ");
1545 print_gimple_stmt (dump_file, stmt, 0, 0);
1552 /* If we hit here, we should have 3 or more ops left. */
1553 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
1555 /* Rewrite the next operator. */
1556 oe = VEC_index (operand_entry_t, ops, opindex);
1562 gimple_stmt_iterator gsinow, gsirhs1;
1563 gimple stmt1 = stmt, stmt2;
1566 gsinow = gsi_for_stmt (stmt);
1567 count = VEC_length (operand_entry_t, ops) - opindex - 2;
1568 while (count-- != 0)
1570 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
1571 gsirhs1 = gsi_for_stmt (stmt2);
1572 gsi_move_before (&gsirhs1, &gsinow);
1579 if (dump_file && (dump_flags & TDF_DETAILS))
1581 fprintf (dump_file, "Transforming ");
1582 print_gimple_stmt (dump_file, stmt, 0, 0);
1585 gimple_assign_set_rhs2 (stmt, oe->op);
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1590 fprintf (dump_file, " into ");
1591 print_gimple_stmt (dump_file, stmt, 0, 0);
1594 /* Recurse on the LHS of the binary operator, which is guaranteed to
1595 be the non-leaf side. */
1596 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
1599 /* Transform STMT, which is really (A +B) + (C + D) into the left
1600 linear form, ((A+B)+C)+D.
1601 Recurse on D if necessary. */
1604 linearize_expr (gimple stmt)
1606 gimple_stmt_iterator gsinow, gsirhs;
1607 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1608 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1609 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1610 gimple newbinrhs = NULL;
1611 struct loop *loop = loop_containing_stmt (stmt);
1613 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
1614 && is_reassociable_op (binrhs, rhscode, loop));
1616 gsinow = gsi_for_stmt (stmt);
1617 gsirhs = gsi_for_stmt (binrhs);
1618 gsi_move_before (&gsirhs, &gsinow);
1620 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
1621 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
1622 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
1624 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1625 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1627 if (dump_file && (dump_flags & TDF_DETAILS))
1629 fprintf (dump_file, "Linearized: ");
1630 print_gimple_stmt (dump_file, stmt, 0, 0);
1633 reassociate_stats.linearized++;
1634 update_stmt (binrhs);
1635 update_stmt (binlhs);
1638 gimple_set_visited (stmt, true);
1639 gimple_set_visited (binlhs, true);
1640 gimple_set_visited (binrhs, true);
1642 /* Tail recurse on the new rhs if it still needs reassociation. */
1643 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
1644 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
1645 want to change the algorithm while converting to tuples. */
1646 linearize_expr (stmt);
1649 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
1650 it. Otherwise, return NULL. */
1653 get_single_immediate_use (tree lhs)
1655 use_operand_p immuse;
1658 if (TREE_CODE (lhs) == SSA_NAME
1659 && single_imm_use (lhs, &immuse, &immusestmt)
1660 && is_gimple_assign (immusestmt))
1666 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
1667 representing the negated value. Insertions of any necessary
1668 instructions go before GSI.
1669 This function is recursive in that, if you hand it "a_5" as the
1670 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
1671 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
1674 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
1676 gimple negatedefstmt= NULL;
1677 tree resultofnegate;
1679 /* If we are trying to negate a name, defined by an add, negate the
1680 add operands instead. */
1681 if (TREE_CODE (tonegate) == SSA_NAME)
1682 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
1683 if (TREE_CODE (tonegate) == SSA_NAME
1684 && is_gimple_assign (negatedefstmt)
1685 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
1686 && has_single_use (gimple_assign_lhs (negatedefstmt))
1687 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
1689 gimple_stmt_iterator gsi;
1690 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
1691 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
1693 gsi = gsi_for_stmt (negatedefstmt);
1694 rhs1 = negate_value (rhs1, &gsi);
1695 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
1697 gsi = gsi_for_stmt (negatedefstmt);
1698 rhs2 = negate_value (rhs2, &gsi);
1699 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
1701 update_stmt (negatedefstmt);
1702 return gimple_assign_lhs (negatedefstmt);
1705 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
1706 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
1707 NULL_TREE, true, GSI_SAME_STMT);
1708 return resultofnegate;
1711 /* Return true if we should break up the subtract in STMT into an add
1712 with negate. This is true when we the subtract operands are really
1713 adds, or the subtract itself is used in an add expression. In
1714 either case, breaking up the subtract into an add with negate
1715 exposes the adds to reassociation. */
1718 should_break_up_subtract (gimple stmt)
1720 tree lhs = gimple_assign_lhs (stmt);
1721 tree binlhs = gimple_assign_rhs1 (stmt);
1722 tree binrhs = gimple_assign_rhs2 (stmt);
1724 struct loop *loop = loop_containing_stmt (stmt);
1726 if (TREE_CODE (binlhs) == SSA_NAME
1727 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
1730 if (TREE_CODE (binrhs) == SSA_NAME
1731 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
1734 if (TREE_CODE (lhs) == SSA_NAME
1735 && (immusestmt = get_single_immediate_use (lhs))
1736 && is_gimple_assign (immusestmt)
1737 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
1738 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
1743 /* Transform STMT from A - B into A + -B. */
1746 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
1748 tree rhs1 = gimple_assign_rhs1 (stmt);
1749 tree rhs2 = gimple_assign_rhs2 (stmt);
1751 if (dump_file && (dump_flags & TDF_DETAILS))
1753 fprintf (dump_file, "Breaking up subtract ");
1754 print_gimple_stmt (dump_file, stmt, 0, 0);
1757 rhs2 = negate_value (rhs2, gsip);
1758 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
1762 /* Recursively linearize a binary expression that is the RHS of STMT.
1763 Place the operands of the expression tree in the vector named OPS. */
1766 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
1767 bool is_associative, bool set_visited)
1769 tree binlhs = gimple_assign_rhs1 (stmt);
1770 tree binrhs = gimple_assign_rhs2 (stmt);
1771 gimple binlhsdef, binrhsdef;
1772 bool binlhsisreassoc = false;
1773 bool binrhsisreassoc = false;
1774 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1775 struct loop *loop = loop_containing_stmt (stmt);
1778 gimple_set_visited (stmt, true);
1780 if (TREE_CODE (binlhs) == SSA_NAME)
1782 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
1783 binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode, loop);
1786 if (TREE_CODE (binrhs) == SSA_NAME)
1788 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
1789 binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode, loop);
1792 /* If the LHS is not reassociable, but the RHS is, we need to swap
1793 them. If neither is reassociable, there is nothing we can do, so
1794 just put them in the ops vector. If the LHS is reassociable,
1795 linearize it. If both are reassociable, then linearize the RHS
1798 if (!binlhsisreassoc)
1802 /* If this is not a associative operation like division, give up. */
1803 if (!is_associative)
1805 add_to_ops_vec (ops, binrhs);
1809 if (!binrhsisreassoc)
1811 add_to_ops_vec (ops, binrhs);
1812 add_to_ops_vec (ops, binlhs);
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1818 fprintf (dump_file, "swapping operands of ");
1819 print_gimple_stmt (dump_file, stmt, 0, 0);
1822 swap_tree_operands (stmt,
1823 gimple_assign_rhs1_ptr (stmt),
1824 gimple_assign_rhs2_ptr (stmt));
1827 if (dump_file && (dump_flags & TDF_DETAILS))
1829 fprintf (dump_file, " is now ");
1830 print_gimple_stmt (dump_file, stmt, 0, 0);
1833 /* We want to make it so the lhs is always the reassociative op,
1839 else if (binrhsisreassoc)
1841 linearize_expr (stmt);
1842 binlhs = gimple_assign_rhs1 (stmt);
1843 binrhs = gimple_assign_rhs2 (stmt);
1846 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
1847 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
1849 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
1850 is_associative, set_visited);
1851 add_to_ops_vec (ops, binrhs);
1854 /* Repropagate the negates back into subtracts, since no other pass
1855 currently does it. */
1858 repropagate_negates (void)
1863 for (i = 0; VEC_iterate (tree, plus_negates, i, negate); i++)
1865 gimple user = get_single_immediate_use (negate);
1867 if (!user || !is_gimple_assign (user))
1870 /* The negate operand can be either operand of a PLUS_EXPR
1871 (it can be the LHS if the RHS is a constant for example).
1873 Force the negate operand to the RHS of the PLUS_EXPR, then
1874 transform the PLUS_EXPR into a MINUS_EXPR. */
1875 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
1877 /* If the negated operand appears on the LHS of the
1878 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1879 to force the negated operand to the RHS of the PLUS_EXPR. */
1880 if (gimple_assign_rhs1 (user) == negate)
1882 swap_tree_operands (user,
1883 gimple_assign_rhs1_ptr (user),
1884 gimple_assign_rhs2_ptr (user));
1887 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1888 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1889 if (gimple_assign_rhs2 (user) == negate)
1891 tree rhs1 = gimple_assign_rhs1 (user);
1892 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
1893 gimple_stmt_iterator gsi = gsi_for_stmt (user);
1894 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
1898 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
1900 if (gimple_assign_rhs1 (user) == negate)
1905 which we transform into
1908 This pushes down the negate which we possibly can merge
1909 into some other operation, hence insert it into the
1910 plus_negates vector. */
1911 gimple feed = SSA_NAME_DEF_STMT (negate);
1912 tree a = gimple_assign_rhs1 (feed);
1913 tree rhs2 = gimple_assign_rhs2 (user);
1914 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
1915 gimple_replace_lhs (feed, negate);
1916 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
1917 update_stmt (gsi_stmt (gsi));
1918 gsi2 = gsi_for_stmt (user);
1919 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
1920 update_stmt (gsi_stmt (gsi2));
1921 gsi_move_before (&gsi, &gsi2);
1922 VEC_safe_push (tree, heap, plus_negates,
1923 gimple_assign_lhs (gsi_stmt (gsi2)));
1927 /* Transform "x = -a; y = b - x" into "y = b + a", getting
1928 rid of one operation. */
1929 gimple feed = SSA_NAME_DEF_STMT (negate);
1930 tree a = gimple_assign_rhs1 (feed);
1931 tree rhs1 = gimple_assign_rhs1 (user);
1932 gimple_stmt_iterator gsi = gsi_for_stmt (user);
1933 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
1934 update_stmt (gsi_stmt (gsi));
1940 /* Returns true if OP is of a type for which we can do reassociation.
1941 That is for integral or non-saturating fixed-point types, and for
1942 floating point type when associative-math is enabled. */
1945 can_reassociate_p (tree op)
1947 tree type = TREE_TYPE (op);
1948 if (INTEGRAL_TYPE_P (type)
1949 || NON_SAT_FIXED_POINT_TYPE_P (type)
1950 || (flag_associative_math && FLOAT_TYPE_P (type)))
1955 /* Break up subtract operations in block BB.
1957 We do this top down because we don't know whether the subtract is
1958 part of a possible chain of reassociation except at the top.
1967 we want to break up k = t - q, but we won't until we've transformed q
1968 = b - r, which won't be broken up until we transform b = c - d.
1970 En passant, clear the GIMPLE visited flag on every statement. */
1973 break_up_subtract_bb (basic_block bb)
1975 gimple_stmt_iterator gsi;
1978 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1980 gimple stmt = gsi_stmt (gsi);
1981 gimple_set_visited (stmt, false);
1983 if (!is_gimple_assign (stmt)
1984 || !can_reassociate_p (gimple_assign_lhs (stmt)))
1987 /* Look for simple gimple subtract operations. */
1988 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
1990 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
1991 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
1994 /* Check for a subtract used only in an addition. If this
1995 is the case, transform it into add of a negate for better
1996 reassociation. IE transform C = A-B into C = A + -B if C
1997 is only used in an addition. */
1998 if (should_break_up_subtract (stmt))
1999 break_up_subtract (stmt, &gsi);
2001 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
2002 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
2003 VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
2005 for (son = first_dom_son (CDI_DOMINATORS, bb);
2007 son = next_dom_son (CDI_DOMINATORS, son))
2008 break_up_subtract_bb (son);
2011 /* Reassociate expressions in basic block BB and its post-dominator as
2015 reassociate_bb (basic_block bb)
2017 gimple_stmt_iterator gsi;
2020 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2022 gimple stmt = gsi_stmt (gsi);
2024 if (is_gimple_assign (stmt))
2026 tree lhs, rhs1, rhs2;
2027 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2029 /* If this is not a gimple binary expression, there is
2030 nothing for us to do with it. */
2031 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
2034 /* If this was part of an already processed statement,
2035 we don't need to touch it again. */
2036 if (gimple_visited_p (stmt))
2038 /* This statement might have become dead because of previous
2040 if (has_zero_uses (gimple_get_lhs (stmt)))
2042 gsi_remove (&gsi, true);
2043 release_defs (stmt);
2044 /* We might end up removing the last stmt above which
2045 places the iterator to the end of the sequence.
2046 Reset it to the last stmt in this case which might
2047 be the end of the sequence as well if we removed
2048 the last statement of the sequence. In which case
2049 we need to bail out. */
2050 if (gsi_end_p (gsi))
2052 gsi = gsi_last_bb (bb);
2053 if (gsi_end_p (gsi))
2060 lhs = gimple_assign_lhs (stmt);
2061 rhs1 = gimple_assign_rhs1 (stmt);
2062 rhs2 = gimple_assign_rhs2 (stmt);
2064 if (!can_reassociate_p (lhs)
2065 || !can_reassociate_p (rhs1)
2066 || !can_reassociate_p (rhs2))
2069 if (associative_tree_code (rhs_code))
2071 VEC(operand_entry_t, heap) *ops = NULL;
2073 /* There may be no immediate uses left by the time we
2074 get here because we may have eliminated them all. */
2075 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
2078 gimple_set_visited (stmt, true);
2079 linearize_expr_tree (&ops, stmt, true, true);
2080 qsort (VEC_address (operand_entry_t, ops),
2081 VEC_length (operand_entry_t, ops),
2082 sizeof (operand_entry_t),
2083 sort_by_operand_rank);
2084 optimize_ops_list (rhs_code, &ops);
2085 if (undistribute_ops_list (rhs_code, &ops,
2086 loop_containing_stmt (stmt)))
2088 qsort (VEC_address (operand_entry_t, ops),
2089 VEC_length (operand_entry_t, ops),
2090 sizeof (operand_entry_t),
2091 sort_by_operand_rank);
2092 optimize_ops_list (rhs_code, &ops);
2095 if (VEC_length (operand_entry_t, ops) == 1)
2097 if (dump_file && (dump_flags & TDF_DETAILS))
2099 fprintf (dump_file, "Transforming ");
2100 print_gimple_stmt (dump_file, stmt, 0, 0);
2103 rhs1 = gimple_assign_rhs1 (stmt);
2104 gimple_assign_set_rhs_from_tree (&gsi,
2105 VEC_last (operand_entry_t,
2108 remove_visited_stmt_chain (rhs1);
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2112 fprintf (dump_file, " into ");
2113 print_gimple_stmt (dump_file, stmt, 0, 0);
2117 rewrite_expr_tree (stmt, 0, ops, false);
2119 VEC_free (operand_entry_t, heap, ops);
2123 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
2125 son = next_dom_son (CDI_POST_DOMINATORS, son))
2126 reassociate_bb (son);
2129 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2130 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2132 /* Dump the operand entry vector OPS to FILE. */
2135 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2140 for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
2142 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2143 print_generic_expr (file, oe->op, 0);
2147 /* Dump the operand entry vector OPS to STDERR. */
2150 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2152 dump_ops_vector (stderr, ops);
2158 break_up_subtract_bb (ENTRY_BLOCK_PTR);
2159 reassociate_bb (EXIT_BLOCK_PTR);
2162 /* Initialize the reassociation pass. */
2170 int *bbs = XNEWVEC (int, last_basic_block + 1);
2172 /* Find the loops, so that we can prevent moving calculations in
2174 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2176 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
2178 operand_entry_pool = create_alloc_pool ("operand entry pool",
2179 sizeof (struct operand_entry), 30);
2180 next_operand_entry_id = 0;
2182 /* Reverse RPO (Reverse Post Order) will give us something where
2183 deeper loops come later. */
2184 pre_and_rev_post_order_compute (NULL, bbs, false);
2185 bb_rank = XCNEWVEC (long, last_basic_block + 1);
2186 operand_rank = pointer_map_create ();
2188 /* Give each argument a distinct rank. */
2189 for (param = DECL_ARGUMENTS (current_function_decl);
2191 param = TREE_CHAIN (param))
2193 if (gimple_default_def (cfun, param) != NULL)
2195 tree def = gimple_default_def (cfun, param);
2196 insert_operand_rank (def, ++rank);
2200 /* Give the chain decl a distinct rank. */
2201 if (cfun->static_chain_decl != NULL)
2203 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
2205 insert_operand_rank (def, ++rank);
2208 /* Set up rank for each BB */
2209 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
2210 bb_rank[bbs[i]] = ++rank << 16;
2213 calculate_dominance_info (CDI_POST_DOMINATORS);
2214 plus_negates = NULL;
2217 /* Cleanup after the reassociation pass, and print stats if
2223 statistics_counter_event (cfun, "Linearized",
2224 reassociate_stats.linearized);
2225 statistics_counter_event (cfun, "Constants eliminated",
2226 reassociate_stats.constants_eliminated);
2227 statistics_counter_event (cfun, "Ops eliminated",
2228 reassociate_stats.ops_eliminated);
2229 statistics_counter_event (cfun, "Statements rewritten",
2230 reassociate_stats.rewritten);
2232 pointer_map_destroy (operand_rank);
2233 free_alloc_pool (operand_entry_pool);
2235 VEC_free (tree, heap, plus_negates);
2236 free_dominance_info (CDI_POST_DOMINATORS);
2237 loop_optimizer_finalize ();
2240 /* Gate and execute functions for Reassociation. */
2243 execute_reassoc (void)
2248 repropagate_negates ();
2255 gate_tree_ssa_reassoc (void)
2257 return flag_tree_reassoc != 0;
2260 struct gimple_opt_pass pass_reassoc =
2264 "reassoc", /* name */
2265 gate_tree_ssa_reassoc, /* gate */
2266 execute_reassoc, /* execute */
2269 0, /* static_pass_number */
2270 TV_TREE_REASSOC, /* tv_id */
2271 PROP_cfg | PROP_ssa, /* properties_required */
2272 0, /* properties_provided */
2273 0, /* properties_destroyed */
2274 0, /* todo_flags_start */
2275 TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */