1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dan@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
27 #include "basic-block.h"
28 #include "tree-pretty-print.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-inline.h"
31 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
45 #include "diagnostic-core.h"
47 /* This is a simple global reassociation pass. It is, in part, based
48 on the LLVM pass of the same name (They do some things more/less
49 than we do, in different orders, etc).
51 It consists of five steps:
53 1. Breaking up subtract operations into addition + negate, where
54 it would promote the reassociation of adds.
56 2. Left linearization of the expression trees, so that (A+B)+(C+D)
57 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
58 During linearization, we place the operands of the binary
59 expressions into a vector of operand_entry_t
61 3. Optimization of the operand lists, eliminating things like a +
64 4. Rewrite the expression trees we linearized and optimized so
65 they are in proper rank order.
67 5. Repropagate negates, as nothing else will clean it up ATM.
69 A bit of theory on #4, since nobody seems to write anything down
70 about why it makes sense to do it the way they do it:
72 We could do this much nicer theoretically, but don't (for reasons
73 explained after how to do it theoretically nice :P).
75 In order to promote the most redundancy elimination, you want
76 binary expressions whose operands are the same rank (or
77 preferably, the same value) exposed to the redundancy eliminator,
78 for possible elimination.
80 So the way to do this if we really cared, is to build the new op
81 tree from the leaves to the roots, merging as you go, and putting the
82 new op on the end of the worklist, until you are left with one
83 thing on the worklist.
85 IE if you have to rewrite the following set of operands (listed with
86 rank in parentheses), with opcode PLUS_EXPR:
88 a (1), b (1), c (1), d (2), e (2)
91 We start with our merge worklist empty, and the ops list with all of
94 You want to first merge all leaves of the same rank, as much as
97 So first build a binary op of
99 mergetmp = a + b, and put "mergetmp" on the merge worklist.
101 Because there is no three operand form of PLUS_EXPR, c is not going to
102 be exposed to redundancy elimination as a rank 1 operand.
104 So you might as well throw it on the merge worklist (you could also
105 consider it to now be a rank two operand, and merge it with d and e,
106 but in this case, you then have evicted e from a binary op. So at
107 least in this situation, you can't win.)
109 Then build a binary op of d + e
112 and put mergetmp2 on the merge worklist.
114 so merge worklist = {mergetmp, c, mergetmp2}
116 Continue building binary ops of these operations until you have only
117 one operation left on the worklist.
122 mergetmp3 = mergetmp + c
124 worklist = {mergetmp2, mergetmp3}
126 mergetmp4 = mergetmp2 + mergetmp3
128 worklist = {mergetmp4}
130 because we have one operation left, we can now just set the original
131 statement equal to the result of that operation.
133 This will at least expose a + b and d + e to redundancy elimination
134 as binary operations.
136 For extra points, you can reuse the old statements to build the
137 mergetmps, since you shouldn't run out.
139 So why don't we do this?
141 Because it's expensive, and rarely will help. Most trees we are
142 reassociating have 3 or less ops. If they have 2 ops, they already
143 will be written into a nice single binary op. If you have 3 ops, a
144 single simple check suffices to tell you whether the first two are of the
145 same rank. If so, you know to order it
148 newstmt = mergetmp + op3
152 newstmt = mergetmp + op1
154 If all three are of the same rank, you can't expose them all in a
155 single binary operator anyway, so the above is *still* the best you
158 Thus, this is what we do. When we have three ops left, we check to see
159 what order to put them in, and call it a day. As a nod to vector sum
160 reduction, we check if any of the ops are really a phi node that is a
161 destructive update for the associating op, and keep the destructive
162 update together for vector sum reduction recognition. */
169 int constants_eliminated;
174 /* Operator, rank pair. */
175 typedef struct operand_entry
182 static alloc_pool operand_entry_pool;
184 /* This is used to assign a unique ID to each struct operand_entry
185 so that qsort results are identical on different hosts. */
186 static int next_operand_entry_id;
188 /* Starting rank number for a given basic block, so that we can rank
189 operations using unmovable instructions in that BB based on the bb
191 static long *bb_rank;
193 /* Operand->rank hashtable. */
194 static struct pointer_map_t *operand_rank;
197 static long get_rank (tree);
200 /* Bias amount for loop-carried phis. We want this to be larger than
201 the depth of any reassociation tree we can see, but not larger than
202 the rank difference between two blocks. */
203 #define PHI_LOOP_BIAS (1 << 15)
205 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
206 an innermost loop, and the phi has only a single use which is inside
207 the loop, then the rank is the block rank of the loop latch plus an
208 extra bias for the loop-carried dependence. This causes expressions
209 calculated into an accumulator variable to be independent for each
210 iteration of the loop. If STMT is some other phi, the rank is the
211 block rank of its containing block. */
213 phi_rank (gimple stmt)
215 basic_block bb = gimple_bb (stmt);
216 struct loop *father = bb->loop_father;
222 /* We only care about real loops (those with a latch). */
224 return bb_rank[bb->index];
226 /* Interesting phis must be in headers of innermost loops. */
227 if (bb != father->header
229 return bb_rank[bb->index];
231 /* Ignore virtual SSA_NAMEs. */
232 res = gimple_phi_result (stmt);
233 if (!is_gimple_reg (SSA_NAME_VAR (res)))
234 return bb_rank[bb->index];
236 /* The phi definition must have a single use, and that use must be
237 within the loop. Otherwise this isn't an accumulator pattern. */
238 if (!single_imm_use (res, &use, &use_stmt)
239 || gimple_bb (use_stmt)->loop_father != father)
240 return bb_rank[bb->index];
242 /* Look for phi arguments from within the loop. If found, bias this phi. */
243 for (i = 0; i < gimple_phi_num_args (stmt); i++)
245 tree arg = gimple_phi_arg_def (stmt, i);
246 if (TREE_CODE (arg) == SSA_NAME
247 && !SSA_NAME_IS_DEFAULT_DEF (arg))
249 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
250 if (gimple_bb (def_stmt)->loop_father == father)
251 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
255 /* Must be an uninteresting phi. */
256 return bb_rank[bb->index];
259 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
260 loop-carried dependence of an innermost loop, return TRUE; else
263 loop_carried_phi (tree exp)
268 if (TREE_CODE (exp) != SSA_NAME
269 || SSA_NAME_IS_DEFAULT_DEF (exp))
272 phi_stmt = SSA_NAME_DEF_STMT (exp);
274 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
277 /* Non-loop-carried phis have block rank. Loop-carried phis have
278 an additional bias added in. If this phi doesn't have block rank,
279 it's biased and should not be propagated. */
280 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
282 if (phi_rank (phi_stmt) != block_rank)
288 /* Return the maximum of RANK and the rank that should be propagated
289 from expression OP. For most operands, this is just the rank of OP.
290 For loop-carried phis, the value is zero to avoid undoing the bias
291 in favor of the phi. */
293 propagate_rank (long rank, tree op)
297 if (loop_carried_phi (op))
300 op_rank = get_rank (op);
302 return MAX (rank, op_rank);
305 /* Look up the operand rank structure for expression E. */
308 find_operand_rank (tree e)
310 void **slot = pointer_map_contains (operand_rank, e);
311 return slot ? (long) (intptr_t) *slot : -1;
314 /* Insert {E,RANK} into the operand rank hashtable. */
317 insert_operand_rank (tree e, long rank)
320 gcc_assert (rank > 0);
321 slot = pointer_map_insert (operand_rank, e);
323 *slot = (void *) (intptr_t) rank;
326 /* Given an expression E, return the rank of the expression. */
331 /* Constants have rank 0. */
332 if (is_gimple_min_invariant (e))
335 /* SSA_NAME's have the rank of the expression they are the result
337 For globals and uninitialized values, the rank is 0.
338 For function arguments, use the pre-setup rank.
339 For PHI nodes, stores, asm statements, etc, we use the rank of
341 For simple operations, the rank is the maximum rank of any of
342 its operands, or the bb_rank, whichever is less.
343 I make no claims that this is optimal, however, it gives good
346 /* We make an exception to the normal ranking system to break
347 dependences of accumulator variables in loops. Suppose we
348 have a simple one-block loop containing:
355 As shown, each iteration of the calculation into x is fully
356 dependent upon the iteration before it. We would prefer to
357 see this in the form:
364 If the loop is unrolled, the calculations of b and c from
365 different iterations can be interleaved.
367 To obtain this result during reassociation, we bias the rank
368 of the phi definition x_1 upward, when it is recognized as an
369 accumulator pattern. The artificial rank causes it to be
370 added last, providing the desired independence. */
372 if (TREE_CODE (e) == SSA_NAME)
379 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
380 && SSA_NAME_IS_DEFAULT_DEF (e))
381 return find_operand_rank (e);
383 stmt = SSA_NAME_DEF_STMT (e);
384 if (gimple_bb (stmt) == NULL)
387 if (gimple_code (stmt) == GIMPLE_PHI)
388 return phi_rank (stmt);
390 if (!is_gimple_assign (stmt)
391 || gimple_vdef (stmt))
392 return bb_rank[gimple_bb (stmt)->index];
394 /* If we already have a rank for this expression, use that. */
395 rank = find_operand_rank (e);
399 /* Otherwise, find the maximum rank for the operands. As an
400 exception, remove the bias from loop-carried phis when propagating
401 the rank so that dependent operations are not also biased. */
403 if (gimple_assign_single_p (stmt))
405 tree rhs = gimple_assign_rhs1 (stmt);
406 n = TREE_OPERAND_LENGTH (rhs);
408 rank = propagate_rank (rank, rhs);
411 for (i = 0; i < n; i++)
413 op = TREE_OPERAND (rhs, i);
416 rank = propagate_rank (rank, op);
422 n = gimple_num_ops (stmt);
423 for (i = 1; i < n; i++)
425 op = gimple_op (stmt, i);
427 rank = propagate_rank (rank, op);
431 if (dump_file && (dump_flags & TDF_DETAILS))
433 fprintf (dump_file, "Rank for ");
434 print_generic_expr (dump_file, e, 0);
435 fprintf (dump_file, " is %ld\n", (rank + 1));
438 /* Note the rank in the hashtable so we don't recompute it. */
439 insert_operand_rank (e, (rank + 1));
443 /* Globals, etc, are rank 0 */
447 DEF_VEC_P(operand_entry_t);
448 DEF_VEC_ALLOC_P(operand_entry_t, heap);
450 /* We want integer ones to end up last no matter what, since they are
451 the ones we can do the most with. */
452 #define INTEGER_CONST_TYPE 1 << 3
453 #define FLOAT_CONST_TYPE 1 << 2
454 #define OTHER_CONST_TYPE 1 << 1
456 /* Classify an invariant tree into integer, float, or other, so that
457 we can sort them to be near other constants of the same type. */
459 constant_type (tree t)
461 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
462 return INTEGER_CONST_TYPE;
463 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
464 return FLOAT_CONST_TYPE;
466 return OTHER_CONST_TYPE;
469 /* qsort comparison function to sort operand entries PA and PB by rank
470 so that the sorted array is ordered by rank in decreasing order. */
472 sort_by_operand_rank (const void *pa, const void *pb)
474 const operand_entry_t oea = *(const operand_entry_t *)pa;
475 const operand_entry_t oeb = *(const operand_entry_t *)pb;
477 /* It's nicer for optimize_expression if constants that are likely
478 to fold when added/multiplied//whatever are put next to each
479 other. Since all constants have rank 0, order them by type. */
480 if (oeb->rank == 0 && oea->rank == 0)
482 if (constant_type (oeb->op) != constant_type (oea->op))
483 return constant_type (oeb->op) - constant_type (oea->op);
485 /* To make sorting result stable, we use unique IDs to determine
487 return oeb->id - oea->id;
490 /* Lastly, make sure the versions that are the same go next to each
491 other. We use SSA_NAME_VERSION because it's stable. */
492 if ((oeb->rank - oea->rank == 0)
493 && TREE_CODE (oea->op) == SSA_NAME
494 && TREE_CODE (oeb->op) == SSA_NAME)
496 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
497 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
499 return oeb->id - oea->id;
502 if (oeb->rank != oea->rank)
503 return oeb->rank - oea->rank;
505 return oeb->id - oea->id;
508 /* Add an operand entry to *OPS for the tree operand OP. */
511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
513 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
516 oe->rank = get_rank (op);
517 oe->id = next_operand_entry_id++;
518 VEC_safe_push (operand_entry_t, heap, *ops, oe);
521 /* Return true if STMT is reassociable operation containing a binary
522 operation with tree code CODE, and is inside LOOP. */
525 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
527 basic_block bb = gimple_bb (stmt);
529 if (gimple_bb (stmt) == NULL)
532 if (!flow_bb_inside_loop_p (loop, bb))
535 if (is_gimple_assign (stmt)
536 && gimple_assign_rhs_code (stmt) == code
537 && has_single_use (gimple_assign_lhs (stmt)))
544 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
545 operand of the negate operation. Otherwise, return NULL. */
548 get_unary_op (tree name, enum tree_code opcode)
550 gimple stmt = SSA_NAME_DEF_STMT (name);
552 if (!is_gimple_assign (stmt))
555 if (gimple_assign_rhs_code (stmt) == opcode)
556 return gimple_assign_rhs1 (stmt);
560 /* If CURR and LAST are a pair of ops that OPCODE allows us to
561 eliminate through equivalences, do so, remove them from OPS, and
562 return true. Otherwise, return false. */
565 eliminate_duplicate_pair (enum tree_code opcode,
566 VEC (operand_entry_t, heap) **ops,
569 operand_entry_t curr,
570 operand_entry_t last)
573 /* If we have two of the same op, and the opcode is & |, min, or max,
574 we can eliminate one of them.
575 If we have two of the same op, and the opcode is ^, we can
576 eliminate both of them. */
578 if (last && last->op == curr->op)
586 if (dump_file && (dump_flags & TDF_DETAILS))
588 fprintf (dump_file, "Equivalence: ");
589 print_generic_expr (dump_file, curr->op, 0);
590 fprintf (dump_file, " [&|minmax] ");
591 print_generic_expr (dump_file, last->op, 0);
592 fprintf (dump_file, " -> ");
593 print_generic_stmt (dump_file, last->op, 0);
596 VEC_ordered_remove (operand_entry_t, *ops, i);
597 reassociate_stats.ops_eliminated ++;
602 if (dump_file && (dump_flags & TDF_DETAILS))
604 fprintf (dump_file, "Equivalence: ");
605 print_generic_expr (dump_file, curr->op, 0);
606 fprintf (dump_file, " ^ ");
607 print_generic_expr (dump_file, last->op, 0);
608 fprintf (dump_file, " -> nothing\n");
611 reassociate_stats.ops_eliminated += 2;
613 if (VEC_length (operand_entry_t, *ops) == 2)
615 VEC_free (operand_entry_t, heap, *ops);
617 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
622 VEC_ordered_remove (operand_entry_t, *ops, i-1);
623 VEC_ordered_remove (operand_entry_t, *ops, i-1);
635 static VEC(tree, heap) *plus_negates;
637 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
638 expression, look in OPS for a corresponding positive operation to cancel
639 it out. If we find one, remove the other from OPS, replace
640 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
644 eliminate_plus_minus_pair (enum tree_code opcode,
645 VEC (operand_entry_t, heap) **ops,
646 unsigned int currindex,
647 operand_entry_t curr)
654 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
657 negateop = get_unary_op (curr->op, NEGATE_EXPR);
658 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
659 if (negateop == NULL_TREE && notop == NULL_TREE)
662 /* Any non-negated version will have a rank that is one less than
663 the current rank. So once we hit those ranks, if we don't find
666 for (i = currindex + 1;
667 VEC_iterate (operand_entry_t, *ops, i, oe)
668 && oe->rank >= curr->rank - 1 ;
671 if (oe->op == negateop)
674 if (dump_file && (dump_flags & TDF_DETAILS))
676 fprintf (dump_file, "Equivalence: ");
677 print_generic_expr (dump_file, negateop, 0);
678 fprintf (dump_file, " + -");
679 print_generic_expr (dump_file, oe->op, 0);
680 fprintf (dump_file, " -> 0\n");
683 VEC_ordered_remove (operand_entry_t, *ops, i);
684 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
685 VEC_ordered_remove (operand_entry_t, *ops, currindex);
686 reassociate_stats.ops_eliminated ++;
690 else if (oe->op == notop)
692 tree op_type = TREE_TYPE (oe->op);
694 if (dump_file && (dump_flags & TDF_DETAILS))
696 fprintf (dump_file, "Equivalence: ");
697 print_generic_expr (dump_file, notop, 0);
698 fprintf (dump_file, " + ~");
699 print_generic_expr (dump_file, oe->op, 0);
700 fprintf (dump_file, " -> -1\n");
703 VEC_ordered_remove (operand_entry_t, *ops, i);
704 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
705 VEC_ordered_remove (operand_entry_t, *ops, currindex);
706 reassociate_stats.ops_eliminated ++;
712 /* CURR->OP is a negate expr in a plus expr: save it for later
713 inspection in repropagate_negates(). */
714 if (negateop != NULL_TREE)
715 VEC_safe_push (tree, heap, plus_negates, curr->op);
720 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
721 bitwise not expression, look in OPS for a corresponding operand to
722 cancel it out. If we find one, remove the other from OPS, replace
723 OPS[CURRINDEX] with 0, and return true. Otherwise, return
727 eliminate_not_pairs (enum tree_code opcode,
728 VEC (operand_entry_t, heap) **ops,
729 unsigned int currindex,
730 operand_entry_t curr)
736 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
737 || TREE_CODE (curr->op) != SSA_NAME)
740 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
741 if (notop == NULL_TREE)
744 /* Any non-not version will have a rank that is one less than
745 the current rank. So once we hit those ranks, if we don't find
748 for (i = currindex + 1;
749 VEC_iterate (operand_entry_t, *ops, i, oe)
750 && oe->rank >= curr->rank - 1;
755 if (dump_file && (dump_flags & TDF_DETAILS))
757 fprintf (dump_file, "Equivalence: ");
758 print_generic_expr (dump_file, notop, 0);
759 if (opcode == BIT_AND_EXPR)
760 fprintf (dump_file, " & ~");
761 else if (opcode == BIT_IOR_EXPR)
762 fprintf (dump_file, " | ~");
763 print_generic_expr (dump_file, oe->op, 0);
764 if (opcode == BIT_AND_EXPR)
765 fprintf (dump_file, " -> 0\n");
766 else if (opcode == BIT_IOR_EXPR)
767 fprintf (dump_file, " -> -1\n");
770 if (opcode == BIT_AND_EXPR)
771 oe->op = build_zero_cst (TREE_TYPE (oe->op));
772 else if (opcode == BIT_IOR_EXPR)
773 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
774 TYPE_PRECISION (TREE_TYPE (oe->op)));
776 reassociate_stats.ops_eliminated
777 += VEC_length (operand_entry_t, *ops) - 1;
778 VEC_free (operand_entry_t, heap, *ops);
780 VEC_safe_push (operand_entry_t, heap, *ops, oe);
788 /* Use constant value that may be present in OPS to try to eliminate
789 operands. Note that this function is only really used when we've
790 eliminated ops for other reasons, or merged constants. Across
791 single statements, fold already does all of this, plus more. There
792 is little point in duplicating logic, so I've only included the
793 identities that I could ever construct testcases to trigger. */
796 eliminate_using_constants (enum tree_code opcode,
797 VEC(operand_entry_t, heap) **ops)
799 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
800 tree type = TREE_TYPE (oelast->op);
802 if (oelast->rank == 0
803 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
808 if (integer_zerop (oelast->op))
810 if (VEC_length (operand_entry_t, *ops) != 1)
812 if (dump_file && (dump_flags & TDF_DETAILS))
813 fprintf (dump_file, "Found & 0, removing all other ops\n");
815 reassociate_stats.ops_eliminated
816 += VEC_length (operand_entry_t, *ops) - 1;
818 VEC_free (operand_entry_t, heap, *ops);
820 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
824 else if (integer_all_onesp (oelast->op))
826 if (VEC_length (operand_entry_t, *ops) != 1)
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "Found & -1, removing\n");
830 VEC_pop (operand_entry_t, *ops);
831 reassociate_stats.ops_eliminated++;
836 if (integer_all_onesp (oelast->op))
838 if (VEC_length (operand_entry_t, *ops) != 1)
840 if (dump_file && (dump_flags & TDF_DETAILS))
841 fprintf (dump_file, "Found | -1, removing all other ops\n");
843 reassociate_stats.ops_eliminated
844 += VEC_length (operand_entry_t, *ops) - 1;
846 VEC_free (operand_entry_t, heap, *ops);
848 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
852 else if (integer_zerop (oelast->op))
854 if (VEC_length (operand_entry_t, *ops) != 1)
856 if (dump_file && (dump_flags & TDF_DETAILS))
857 fprintf (dump_file, "Found | 0, removing\n");
858 VEC_pop (operand_entry_t, *ops);
859 reassociate_stats.ops_eliminated++;
864 if (integer_zerop (oelast->op)
865 || (FLOAT_TYPE_P (type)
866 && !HONOR_NANS (TYPE_MODE (type))
867 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
868 && real_zerop (oelast->op)))
870 if (VEC_length (operand_entry_t, *ops) != 1)
872 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file, "Found * 0, removing all other ops\n");
875 reassociate_stats.ops_eliminated
876 += VEC_length (operand_entry_t, *ops) - 1;
877 VEC_free (operand_entry_t, heap, *ops);
879 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
883 else if (integer_onep (oelast->op)
884 || (FLOAT_TYPE_P (type)
885 && !HONOR_SNANS (TYPE_MODE (type))
886 && real_onep (oelast->op)))
888 if (VEC_length (operand_entry_t, *ops) != 1)
890 if (dump_file && (dump_flags & TDF_DETAILS))
891 fprintf (dump_file, "Found * 1, removing\n");
892 VEC_pop (operand_entry_t, *ops);
893 reassociate_stats.ops_eliminated++;
901 if (integer_zerop (oelast->op)
902 || (FLOAT_TYPE_P (type)
903 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
904 && fold_real_zero_addition_p (type, oelast->op,
905 opcode == MINUS_EXPR)))
907 if (VEC_length (operand_entry_t, *ops) != 1)
909 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Found [|^+] 0, removing\n");
911 VEC_pop (operand_entry_t, *ops);
912 reassociate_stats.ops_eliminated++;
924 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
927 /* Structure for tracking and counting operands. */
928 typedef struct oecount_s {
931 enum tree_code oecode;
936 DEF_VEC_ALLOC_O(oecount,heap);
938 /* The heap for the oecount hashtable and the sorted list of operands. */
939 static VEC (oecount, heap) *cvec;
941 /* Hash function for oecount. */
944 oecount_hash (const void *p)
946 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
947 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
950 /* Comparison function for oecount. */
953 oecount_eq (const void *p1, const void *p2)
955 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
956 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
957 return (c1->oecode == c2->oecode
958 && c1->op == c2->op);
961 /* Comparison function for qsort sorting oecount elements by count. */
964 oecount_cmp (const void *p1, const void *p2)
966 const oecount *c1 = (const oecount *)p1;
967 const oecount *c2 = (const oecount *)p2;
968 if (c1->cnt != c2->cnt)
969 return c1->cnt - c2->cnt;
971 /* If counts are identical, use unique IDs to stabilize qsort. */
972 return c1->id - c2->id;
975 /* Walks the linear chain with result *DEF searching for an operation
976 with operand OP and code OPCODE removing that from the chain. *DEF
977 is updated if there is only one operand but no operation left. */
980 zero_one_operation (tree *def, enum tree_code opcode, tree op)
982 gimple stmt = SSA_NAME_DEF_STMT (*def);
986 tree name = gimple_assign_rhs1 (stmt);
988 /* If this is the operation we look for and one of the operands
989 is ours simply propagate the other operand into the stmts
991 if (gimple_assign_rhs_code (stmt) == opcode
993 || gimple_assign_rhs2 (stmt) == op))
997 gimple_stmt_iterator gsi;
999 name = gimple_assign_rhs2 (stmt);
1000 gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
1001 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
1002 if (gimple_assign_lhs (stmt) == *def)
1004 SET_USE (use, name);
1005 if (TREE_CODE (name) != SSA_NAME)
1006 update_stmt (use_stmt);
1007 gsi = gsi_for_stmt (stmt);
1008 gsi_remove (&gsi, true);
1009 release_defs (stmt);
1013 /* Continue walking the chain. */
1014 gcc_assert (name != op
1015 && TREE_CODE (name) == SSA_NAME);
1016 stmt = SSA_NAME_DEF_STMT (name);
1021 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1022 the result. Places the statement after the definition of either
1023 OP1 or OP2. Returns the new statement. */
1026 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1028 gimple op1def = NULL, op2def = NULL;
1029 gimple_stmt_iterator gsi;
1033 /* Create the addition statement. */
1034 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1035 op = make_ssa_name (tmpvar, sum);
1036 gimple_assign_set_lhs (sum, op);
1038 /* Find an insertion place and insert. */
1039 if (TREE_CODE (op1) == SSA_NAME)
1040 op1def = SSA_NAME_DEF_STMT (op1);
1041 if (TREE_CODE (op2) == SSA_NAME)
1042 op2def = SSA_NAME_DEF_STMT (op2);
1043 if ((!op1def || gimple_nop_p (op1def))
1044 && (!op2def || gimple_nop_p (op2def)))
1046 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1047 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1049 else if ((!op1def || gimple_nop_p (op1def))
1050 || (op2def && !gimple_nop_p (op2def)
1051 && stmt_dominates_stmt_p (op1def, op2def)))
1053 if (gimple_code (op2def) == GIMPLE_PHI)
1055 gsi = gsi_after_labels (gimple_bb (op2def));
1056 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1060 if (!stmt_ends_bb_p (op2def))
1062 gsi = gsi_for_stmt (op2def);
1063 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1070 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1071 if (e->flags & EDGE_FALLTHRU)
1072 gsi_insert_on_edge_immediate (e, sum);
1078 if (gimple_code (op1def) == GIMPLE_PHI)
1080 gsi = gsi_after_labels (gimple_bb (op1def));
1081 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1085 if (!stmt_ends_bb_p (op1def))
1087 gsi = gsi_for_stmt (op1def);
1088 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1095 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1096 if (e->flags & EDGE_FALLTHRU)
1097 gsi_insert_on_edge_immediate (e, sum);
1106 /* Perform un-distribution of divisions and multiplications.
1107 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1108 to (A + B) / X for real X.
1110 The algorithm is organized as follows.
1112 - First we walk the addition chain *OPS looking for summands that
1113 are defined by a multiplication or a real division. This results
1114 in the candidates bitmap with relevant indices into *OPS.
1116 - Second we build the chains of multiplications or divisions for
1117 these candidates, counting the number of occurences of (operand, code)
1118 pairs in all of the candidates chains.
1120 - Third we sort the (operand, code) pairs by number of occurence and
1121 process them starting with the pair with the most uses.
1123 * For each such pair we walk the candidates again to build a
1124 second candidate bitmap noting all multiplication/division chains
1125 that have at least one occurence of (operand, code).
1127 * We build an alternate addition chain only covering these
1128 candidates with one (operand, code) operation removed from their
1129 multiplication/division chain.
1131 * The first candidate gets replaced by the alternate addition chain
1132 multiplied/divided by the operand.
1134 * All candidate chains get disabled for further processing and
1135 processing of (operand, code) pairs continues.
1137 The alternate addition chains built are re-processed by the main
1138 reassociation algorithm which allows optimizing a * x * y + b * y * x
1139 to (a + b ) * x * y in one invocation of the reassociation pass. */
1142 undistribute_ops_list (enum tree_code opcode,
1143 VEC (operand_entry_t, heap) **ops, struct loop *loop)
1145 unsigned int length = VEC_length (operand_entry_t, *ops);
1146 operand_entry_t oe1;
1148 sbitmap candidates, candidates2;
1149 unsigned nr_candidates, nr_candidates2;
1150 sbitmap_iterator sbi0;
1151 VEC (operand_entry_t, heap) **subops;
1153 bool changed = false;
1154 int next_oecount_id = 0;
1157 || opcode != PLUS_EXPR)
1160 /* Build a list of candidates to process. */
1161 candidates = sbitmap_alloc (length);
1162 sbitmap_zero (candidates);
1164 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1166 enum tree_code dcode;
1169 if (TREE_CODE (oe1->op) != SSA_NAME)
1171 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1172 if (!is_gimple_assign (oe1def))
1174 dcode = gimple_assign_rhs_code (oe1def);
1175 if ((dcode != MULT_EXPR
1176 && dcode != RDIV_EXPR)
1177 || !is_reassociable_op (oe1def, dcode, loop))
1180 SET_BIT (candidates, i);
1184 if (nr_candidates < 2)
1186 sbitmap_free (candidates);
1190 if (dump_file && (dump_flags & TDF_DETAILS))
1192 fprintf (dump_file, "searching for un-distribute opportunities ");
1193 print_generic_expr (dump_file,
1194 VEC_index (operand_entry_t, *ops,
1195 sbitmap_first_set_bit (candidates))->op, 0);
1196 fprintf (dump_file, " %d\n", nr_candidates);
1199 /* Build linearized sub-operand lists and the counting table. */
1201 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1202 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1203 VEC_length (operand_entry_t, *ops));
1204 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1207 enum tree_code oecode;
1210 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1211 oecode = gimple_assign_rhs_code (oedef);
1212 linearize_expr_tree (&subops[i], oedef,
1213 associative_tree_code (oecode), false);
1215 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1222 c.id = next_oecount_id++;
1224 VEC_safe_push (oecount, heap, cvec, &c);
1225 idx = VEC_length (oecount, cvec) + 41;
1226 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1229 *slot = (void *)idx;
1233 VEC_pop (oecount, cvec);
1234 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1238 htab_delete (ctable);
1240 /* Sort the counting table. */
1241 VEC_qsort (oecount, cvec, oecount_cmp);
1243 if (dump_file && (dump_flags & TDF_DETAILS))
1246 fprintf (dump_file, "Candidates:\n");
1247 FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1249 fprintf (dump_file, " %u %s: ", c->cnt,
1250 c->oecode == MULT_EXPR
1251 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1252 print_generic_expr (dump_file, c->op, 0);
1253 fprintf (dump_file, "\n");
1257 /* Process the (operand, code) pairs in order of most occurence. */
1258 candidates2 = sbitmap_alloc (length);
1259 while (!VEC_empty (oecount, cvec))
1261 oecount *c = VEC_last (oecount, cvec);
1265 /* Now collect the operands in the outer chain that contain
1266 the common operand in their inner chain. */
1267 sbitmap_zero (candidates2);
1269 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1272 enum tree_code oecode;
1274 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1276 /* If we undistributed in this chain already this may be
1278 if (TREE_CODE (op) != SSA_NAME)
1281 oedef = SSA_NAME_DEF_STMT (op);
1282 oecode = gimple_assign_rhs_code (oedef);
1283 if (oecode != c->oecode)
1286 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1288 if (oe1->op == c->op)
1290 SET_BIT (candidates2, i);
1297 if (nr_candidates2 >= 2)
1299 operand_entry_t oe1, oe2;
1302 int first = sbitmap_first_set_bit (candidates2);
1304 /* Build the new addition chain. */
1305 oe1 = VEC_index (operand_entry_t, *ops, first);
1306 if (dump_file && (dump_flags & TDF_DETAILS))
1308 fprintf (dump_file, "Building (");
1309 print_generic_expr (dump_file, oe1->op, 0);
1311 tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1312 add_referenced_var (tmpvar);
1313 zero_one_operation (&oe1->op, c->oecode, c->op);
1314 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1317 oe2 = VEC_index (operand_entry_t, *ops, i);
1318 if (dump_file && (dump_flags & TDF_DETAILS))
1320 fprintf (dump_file, " + ");
1321 print_generic_expr (dump_file, oe2->op, 0);
1323 zero_one_operation (&oe2->op, c->oecode, c->op);
1324 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1325 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1327 oe1->op = gimple_get_lhs (sum);
1330 /* Apply the multiplication/division. */
1331 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1332 if (dump_file && (dump_flags & TDF_DETAILS))
1334 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1335 print_generic_expr (dump_file, c->op, 0);
1336 fprintf (dump_file, "\n");
1339 /* Record it in the addition chain and disable further
1340 undistribution with this op. */
1341 oe1->op = gimple_assign_lhs (prod);
1342 oe1->rank = get_rank (oe1->op);
1343 VEC_free (operand_entry_t, heap, subops[first]);
1348 VEC_pop (oecount, cvec);
1351 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1352 VEC_free (operand_entry_t, heap, subops[i]);
1354 VEC_free (oecount, heap, cvec);
1355 sbitmap_free (candidates);
1356 sbitmap_free (candidates2);
1361 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1362 expression, examine the other OPS to see if any of them are comparisons
1363 of the same values, which we may be able to combine or eliminate.
1364 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1367 eliminate_redundant_comparison (enum tree_code opcode,
1368 VEC (operand_entry_t, heap) **ops,
1369 unsigned int currindex,
1370 operand_entry_t curr)
1373 enum tree_code lcode, rcode;
1378 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1381 /* Check that CURR is a comparison. */
1382 if (TREE_CODE (curr->op) != SSA_NAME)
1384 def1 = SSA_NAME_DEF_STMT (curr->op);
1385 if (!is_gimple_assign (def1))
1387 lcode = gimple_assign_rhs_code (def1);
1388 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1390 op1 = gimple_assign_rhs1 (def1);
1391 op2 = gimple_assign_rhs2 (def1);
1393 /* Now look for a similar comparison in the remaining OPS. */
1394 for (i = currindex + 1;
1395 VEC_iterate (operand_entry_t, *ops, i, oe);
1400 if (TREE_CODE (oe->op) != SSA_NAME)
1402 def2 = SSA_NAME_DEF_STMT (oe->op);
1403 if (!is_gimple_assign (def2))
1405 rcode = gimple_assign_rhs_code (def2);
1406 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1409 /* If we got here, we have a match. See if we can combine the
1411 if (opcode == BIT_IOR_EXPR)
1412 t = maybe_fold_or_comparisons (lcode, op1, op2,
1413 rcode, gimple_assign_rhs1 (def2),
1414 gimple_assign_rhs2 (def2));
1416 t = maybe_fold_and_comparisons (lcode, op1, op2,
1417 rcode, gimple_assign_rhs1 (def2),
1418 gimple_assign_rhs2 (def2));
1422 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1423 always give us a boolean_type_node value back. If the original
1424 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1425 we need to convert. */
1426 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1427 t = fold_convert (TREE_TYPE (curr->op), t);
1429 if (TREE_CODE (t) != INTEGER_CST
1430 && !operand_equal_p (t, curr->op, 0))
1432 enum tree_code subcode;
1433 tree newop1, newop2;
1434 if (!COMPARISON_CLASS_P (t))
1436 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1437 STRIP_USELESS_TYPE_CONVERSION (newop1);
1438 STRIP_USELESS_TYPE_CONVERSION (newop2);
1439 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1443 if (dump_file && (dump_flags & TDF_DETAILS))
1445 fprintf (dump_file, "Equivalence: ");
1446 print_generic_expr (dump_file, curr->op, 0);
1447 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1448 print_generic_expr (dump_file, oe->op, 0);
1449 fprintf (dump_file, " -> ");
1450 print_generic_expr (dump_file, t, 0);
1451 fprintf (dump_file, "\n");
1454 /* Now we can delete oe, as it has been subsumed by the new combined
1456 VEC_ordered_remove (operand_entry_t, *ops, i);
1457 reassociate_stats.ops_eliminated ++;
1459 /* If t is the same as curr->op, we're done. Otherwise we must
1460 replace curr->op with t. Special case is if we got a constant
1461 back, in which case we add it to the end instead of in place of
1462 the current entry. */
1463 if (TREE_CODE (t) == INTEGER_CST)
1465 VEC_ordered_remove (operand_entry_t, *ops, currindex);
1466 add_to_ops_vec (ops, t);
1468 else if (!operand_equal_p (t, curr->op, 0))
1472 enum tree_code subcode;
1475 gcc_assert (COMPARISON_CLASS_P (t));
1476 tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1477 add_referenced_var (tmpvar);
1478 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1479 STRIP_USELESS_TYPE_CONVERSION (newop1);
1480 STRIP_USELESS_TYPE_CONVERSION (newop2);
1481 gcc_checking_assert (is_gimple_val (newop1)
1482 && is_gimple_val (newop2));
1483 sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1484 curr->op = gimple_get_lhs (sum);
1492 /* Perform various identities and other optimizations on the list of
1493 operand entries, stored in OPS. The tree code for the binary
1494 operation between all the operands is OPCODE. */
1497 optimize_ops_list (enum tree_code opcode,
1498 VEC (operand_entry_t, heap) **ops)
1500 unsigned int length = VEC_length (operand_entry_t, *ops);
1503 operand_entry_t oelast = NULL;
1504 bool iterate = false;
1509 oelast = VEC_last (operand_entry_t, *ops);
1511 /* If the last two are constants, pop the constants off, merge them
1512 and try the next two. */
1513 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1515 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1517 if (oelm1->rank == 0
1518 && is_gimple_min_invariant (oelm1->op)
1519 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1520 TREE_TYPE (oelast->op)))
1522 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1523 oelm1->op, oelast->op);
1525 if (folded && is_gimple_min_invariant (folded))
1527 if (dump_file && (dump_flags & TDF_DETAILS))
1528 fprintf (dump_file, "Merging constants\n");
1530 VEC_pop (operand_entry_t, *ops);
1531 VEC_pop (operand_entry_t, *ops);
1533 add_to_ops_vec (ops, folded);
1534 reassociate_stats.constants_eliminated++;
1536 optimize_ops_list (opcode, ops);
1542 eliminate_using_constants (opcode, ops);
1545 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1549 if (eliminate_not_pairs (opcode, ops, i, oe))
1551 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1552 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1553 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1565 length = VEC_length (operand_entry_t, *ops);
1566 oelast = VEC_last (operand_entry_t, *ops);
1569 optimize_ops_list (opcode, ops);
1572 /* The following functions are subroutines to optimize_range_tests and allow
1573 it to try to change a logical combination of comparisons into a range
1577 X == 2 || X == 5 || X == 3 || X == 4
1581 (unsigned) (X - 2) <= 3
1583 For more information see comments above fold_test_range in fold-const.c,
1584 this implementation is for GIMPLE. */
1592 bool strict_overflow_p;
1593 unsigned int idx, next;
1596 /* This is similar to make_range in fold-const.c, but on top of
1597 GIMPLE instead of trees. */
1600 init_range_entry (struct range_entry *r, tree exp)
1604 bool is_bool, strict_overflow_p;
1608 r->strict_overflow_p = false;
1610 r->high = NULL_TREE;
1611 if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1614 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1615 and see if we can refine the range. Some of the cases below may not
1616 happen, but it doesn't seem worth worrying about this. We "continue"
1617 the outer loop when we've changed something; otherwise we "break"
1618 the switch, which will "break" the while. */
1619 low = build_int_cst (TREE_TYPE (exp), 0);
1622 strict_overflow_p = false;
1624 if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1626 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1631 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1637 enum tree_code code;
1638 tree arg0, arg1, exp_type;
1642 if (TREE_CODE (exp) != SSA_NAME)
1645 stmt = SSA_NAME_DEF_STMT (exp);
1646 if (!is_gimple_assign (stmt))
1649 code = gimple_assign_rhs_code (stmt);
1650 arg0 = gimple_assign_rhs1 (stmt);
1651 arg1 = gimple_assign_rhs2 (stmt);
1652 exp_type = TREE_TYPE (exp);
1653 loc = gimple_location (stmt);
1657 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1670 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1672 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1677 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1692 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1694 &strict_overflow_p);
1695 if (nexp != NULL_TREE)
1698 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1711 r->strict_overflow_p = strict_overflow_p;
1715 /* Comparison function for qsort. Sort entries
1716 without SSA_NAME exp first, then with SSA_NAMEs sorted
1717 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1718 by increasing ->low and if ->low is the same, by increasing
1719 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1723 range_entry_cmp (const void *a, const void *b)
1725 const struct range_entry *p = (const struct range_entry *) a;
1726 const struct range_entry *q = (const struct range_entry *) b;
1728 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1730 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1732 /* Group range_entries for the same SSA_NAME together. */
1733 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1735 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1737 /* If ->low is different, NULL low goes first, then by
1739 if (p->low != NULL_TREE)
1741 if (q->low != NULL_TREE)
1743 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1745 if (tem && integer_onep (tem))
1747 tem = fold_binary (GT_EXPR, boolean_type_node,
1749 if (tem && integer_onep (tem))
1755 else if (q->low != NULL_TREE)
1757 /* If ->high is different, NULL high goes last, before that by
1759 if (p->high != NULL_TREE)
1761 if (q->high != NULL_TREE)
1763 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1765 if (tem && integer_onep (tem))
1767 tem = fold_binary (GT_EXPR, boolean_type_node,
1769 if (tem && integer_onep (tem))
1775 else if (p->high != NULL_TREE)
1777 /* If both ranges are the same, sort below by ascending idx. */
1782 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1785 if (p->idx < q->idx)
1789 gcc_checking_assert (p->idx > q->idx);
1794 /* Helper routine of optimize_range_test.
1795 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1796 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1797 OPCODE and OPS are arguments of optimize_range_tests. Return
1798 true if the range merge has been successful. */
1801 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1802 unsigned int count, enum tree_code opcode,
1803 VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1804 tree low, tree high, bool strict_overflow_p)
1806 tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1807 location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1808 tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1809 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1810 gimple_stmt_iterator gsi;
1812 if (tem == NULL_TREE)
1815 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1816 warning_at (loc, OPT_Wstrict_overflow,
1817 "assuming signed overflow does not occur "
1818 "when simplifying range test");
1820 if (dump_file && (dump_flags & TDF_DETAILS))
1822 struct range_entry *r;
1823 fprintf (dump_file, "Optimizing range tests ");
1824 print_generic_expr (dump_file, range->exp, 0);
1825 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1826 print_generic_expr (dump_file, range->low, 0);
1827 fprintf (dump_file, ", ");
1828 print_generic_expr (dump_file, range->high, 0);
1829 fprintf (dump_file, "]");
1830 for (r = otherrange; r < otherrange + count; r++)
1832 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1833 print_generic_expr (dump_file, r->low, 0);
1834 fprintf (dump_file, ", ");
1835 print_generic_expr (dump_file, r->high, 0);
1836 fprintf (dump_file, "]");
1838 fprintf (dump_file, "\n into ");
1839 print_generic_expr (dump_file, tem, 0);
1840 fprintf (dump_file, "\n");
1843 if (opcode == BIT_IOR_EXPR)
1844 tem = invert_truthvalue_loc (loc, tem);
1846 tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1847 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1848 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1851 VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1856 range->strict_overflow_p = false;
1858 for (range = otherrange; range < otherrange + count; range++)
1860 VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1861 range->exp = NULL_TREE;
1866 /* Optimize range tests, similarly how fold_range_test optimizes
1867 it on trees. The tree code for the binary
1868 operation between all the operands is OPCODE. */
1871 optimize_range_tests (enum tree_code opcode,
1872 VEC (operand_entry_t, heap) **ops)
1874 unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
1876 struct range_entry *ranges;
1877 bool any_changes = false;
1882 ranges = XNEWVEC (struct range_entry, length);
1883 for (i = 0; i < length; i++)
1886 init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
1887 /* For | invert it now, we will invert it again before emitting
1888 the optimized expression. */
1889 if (opcode == BIT_IOR_EXPR)
1890 ranges[i].in_p = !ranges[i].in_p;
1893 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
1894 for (i = 0; i < length; i++)
1895 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
1898 /* Try to merge ranges. */
1899 for (first = i; i < length; i++)
1901 tree low = ranges[i].low;
1902 tree high = ranges[i].high;
1903 int in_p = ranges[i].in_p;
1904 bool strict_overflow_p = ranges[i].strict_overflow_p;
1905 int update_fail_count = 0;
1907 for (j = i + 1; j < length; j++)
1909 if (ranges[i].exp != ranges[j].exp)
1911 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
1912 ranges[j].in_p, ranges[j].low, ranges[j].high))
1914 strict_overflow_p |= ranges[j].strict_overflow_p;
1920 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
1921 ops, ranges[i].exp, in_p, low, high,
1927 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
1928 while update_range_test would fail. */
1929 else if (update_fail_count == 64)
1932 ++update_fail_count;
1935 /* Optimize X == CST1 || X == CST2
1936 if popcount (CST1 ^ CST2) == 1 into
1937 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
1938 Similarly for ranges. E.g.
1939 X != 2 && X != 3 && X != 10 && X != 11
1940 will be transformed by the above loop into
1941 (X - 2U) <= 1U && (X - 10U) <= 1U
1942 and this loop can transform that into
1943 ((X & ~8) - 2U) <= 1U. */
1944 for (i = first; i < length; i++)
1946 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
1948 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
1950 type = TREE_TYPE (ranges[i].exp);
1951 if (!INTEGRAL_TYPE_P (type))
1953 lowi = ranges[i].low;
1954 if (lowi == NULL_TREE)
1955 lowi = TYPE_MIN_VALUE (type);
1956 highi = ranges[i].high;
1957 if (highi == NULL_TREE)
1959 for (j = i + 1; j < length && j < i + 64; j++)
1961 if (ranges[j].exp == NULL_TREE)
1963 if (ranges[i].exp != ranges[j].exp)
1967 lowj = ranges[j].low;
1968 if (lowj == NULL_TREE)
1970 highj = ranges[j].high;
1971 if (highj == NULL_TREE)
1972 highj = TYPE_MAX_VALUE (type);
1973 tem = fold_binary (GT_EXPR, boolean_type_node,
1975 if (tem == NULL_TREE || !integer_onep (tem))
1977 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
1978 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
1980 gcc_checking_assert (!integer_zerop (lowxor));
1981 tem = fold_binary (MINUS_EXPR, type, lowxor,
1982 build_int_cst (type, 1));
1983 if (tem == NULL_TREE)
1985 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
1986 if (tem == NULL_TREE || !integer_zerop (tem))
1988 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
1989 if (!tree_int_cst_equal (lowxor, highxor))
1991 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
1992 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
1993 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
1994 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
1995 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
1996 ranges[i].in_p, lowj, highj,
1997 ranges[i].strict_overflow_p
1998 || ranges[j].strict_overflow_p))
2009 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2011 if (oe->op == error_mark_node)
2014 VEC_replace (operand_entry_t, *ops, j, oe);
2017 VEC_truncate (operand_entry_t, *ops, j);
2020 XDELETEVEC (ranges);
2023 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2024 of STMT in it's operands. This is also known as a "destructive
2025 update" operation. */
2028 is_phi_for_stmt (gimple stmt, tree operand)
2032 use_operand_p arg_p;
2035 if (TREE_CODE (operand) != SSA_NAME)
2038 lhs = gimple_assign_lhs (stmt);
2040 def_stmt = SSA_NAME_DEF_STMT (operand);
2041 if (gimple_code (def_stmt) != GIMPLE_PHI)
2044 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2045 if (lhs == USE_FROM_PTR (arg_p))
2050 /* Remove def stmt of VAR if VAR has zero uses and recurse
2051 on rhs1 operand if so. */
2054 remove_visited_stmt_chain (tree var)
2057 gimple_stmt_iterator gsi;
2061 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2063 stmt = SSA_NAME_DEF_STMT (var);
2064 if (!is_gimple_assign (stmt)
2065 || !gimple_visited_p (stmt))
2067 var = gimple_assign_rhs1 (stmt);
2068 gsi = gsi_for_stmt (stmt);
2069 gsi_remove (&gsi, true);
2070 release_defs (stmt);
2074 /* This function checks three consequtive operands in
2075 passed operands vector OPS starting from OPINDEX and
2076 swaps two operands if it is profitable for binary operation
2077 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2079 We pair ops with the same rank if possible.
2081 The alternative we try is to see if STMT is a destructive
2082 update style statement, which is like:
2085 In that case, we want to use the destructive update form to
2086 expose the possible vectorizer sum reduction opportunity.
2087 In that case, the third operand will be the phi node. This
2088 check is not performed if STMT is null.
2090 We could, of course, try to be better as noted above, and do a
2091 lot of work to try to find these opportunities in >3 operand
2092 cases, but it is unlikely to be worth it. */
2095 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2096 unsigned int opindex, gimple stmt)
2098 operand_entry_t oe1, oe2, oe3;
2100 oe1 = VEC_index (operand_entry_t, ops, opindex);
2101 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2102 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2104 if ((oe1->rank == oe2->rank
2105 && oe2->rank != oe3->rank)
2106 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2107 && !is_phi_for_stmt (stmt, oe1->op)
2108 && !is_phi_for_stmt (stmt, oe2->op)))
2110 struct operand_entry temp = *oe3;
2112 oe3->rank = oe1->rank;
2114 oe1->rank= temp.rank;
2116 else if ((oe1->rank == oe3->rank
2117 && oe2->rank != oe3->rank)
2118 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2119 && !is_phi_for_stmt (stmt, oe1->op)
2120 && !is_phi_for_stmt (stmt, oe3->op)))
2122 struct operand_entry temp = *oe2;
2124 oe2->rank = oe1->rank;
2126 oe1->rank= temp.rank;
2130 /* Recursively rewrite our linearized statements so that the operators
2131 match those in OPS[OPINDEX], putting the computation in rank
2135 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2136 VEC(operand_entry_t, heap) * ops, bool moved)
2138 tree rhs1 = gimple_assign_rhs1 (stmt);
2139 tree rhs2 = gimple_assign_rhs2 (stmt);
2142 /* If we have three operands left, then we want to make sure the ones
2143 that get the double binary op are chosen wisely. */
2144 if (opindex + 3 == VEC_length (operand_entry_t, ops))
2145 swap_ops_for_binary_stmt (ops, opindex, stmt);
2147 /* The final recursion case for this function is that you have
2148 exactly two operations left.
2149 If we had one exactly one op in the entire list to start with, we
2150 would have never called this function, and the tail recursion
2151 rewrites them one at a time. */
2152 if (opindex + 2 == VEC_length (operand_entry_t, ops))
2154 operand_entry_t oe1, oe2;
2156 oe1 = VEC_index (operand_entry_t, ops, opindex);
2157 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2159 if (rhs1 != oe1->op || rhs2 != oe2->op)
2161 if (dump_file && (dump_flags & TDF_DETAILS))
2163 fprintf (dump_file, "Transforming ");
2164 print_gimple_stmt (dump_file, stmt, 0, 0);
2167 gimple_assign_set_rhs1 (stmt, oe1->op);
2168 gimple_assign_set_rhs2 (stmt, oe2->op);
2170 if (rhs1 != oe1->op && rhs1 != oe2->op)
2171 remove_visited_stmt_chain (rhs1);
2173 if (dump_file && (dump_flags & TDF_DETAILS))
2175 fprintf (dump_file, " into ");
2176 print_gimple_stmt (dump_file, stmt, 0, 0);
2183 /* If we hit here, we should have 3 or more ops left. */
2184 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2186 /* Rewrite the next operator. */
2187 oe = VEC_index (operand_entry_t, ops, opindex);
2193 gimple_stmt_iterator gsinow, gsirhs1;
2194 gimple stmt1 = stmt, stmt2;
2197 gsinow = gsi_for_stmt (stmt);
2198 count = VEC_length (operand_entry_t, ops) - opindex - 2;
2199 while (count-- != 0)
2201 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2202 gsirhs1 = gsi_for_stmt (stmt2);
2203 gsi_move_before (&gsirhs1, &gsinow);
2210 if (dump_file && (dump_flags & TDF_DETAILS))
2212 fprintf (dump_file, "Transforming ");
2213 print_gimple_stmt (dump_file, stmt, 0, 0);
2216 gimple_assign_set_rhs2 (stmt, oe->op);
2219 if (dump_file && (dump_flags & TDF_DETAILS))
2221 fprintf (dump_file, " into ");
2222 print_gimple_stmt (dump_file, stmt, 0, 0);
2225 /* Recurse on the LHS of the binary operator, which is guaranteed to
2226 be the non-leaf side. */
2227 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2230 /* Find out how many cycles we need to compute statements chain.
2231 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2232 maximum number of independent statements we may execute per cycle. */
2235 get_required_cycles (int ops_num, int cpu_width)
2241 /* While we have more than 2 * cpu_width operands
2242 we may reduce number of operands by cpu_width
2244 res = ops_num / (2 * cpu_width);
2246 /* Remained operands count may be reduced twice per cycle
2247 until we have only one operand. */
2248 rest = (unsigned)(ops_num - res * cpu_width);
2249 elog = exact_log2 (rest);
2253 res += floor_log2 (rest) + 1;
2258 /* Returns an optimal number of registers to use for computation of
2259 given statements. */
2262 get_reassociation_width (int ops_num, enum tree_code opc,
2263 enum machine_mode mode)
2265 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2270 if (param_width > 0)
2271 width = param_width;
2273 width = targetm.sched.reassociation_width (opc, mode);
2278 /* Get the minimal time required for sequence computation. */
2279 cycles_best = get_required_cycles (ops_num, width);
2281 /* Check if we may use less width and still compute sequence for
2282 the same time. It will allow us to reduce registers usage.
2283 get_required_cycles is monotonically increasing with lower width
2284 so we can perform a binary search for the minimal width that still
2285 results in the optimal cycle count. */
2287 while (width > width_min)
2289 int width_mid = (width + width_min) / 2;
2291 if (get_required_cycles (ops_num, width_mid) == cycles_best)
2293 else if (width_min < width_mid)
2294 width_min = width_mid;
2302 /* Recursively rewrite our linearized statements so that the operators
2303 match those in OPS[OPINDEX], putting the computation in rank
2304 order and trying to allow operations to be executed in
2308 rewrite_expr_tree_parallel (gimple stmt, int width,
2309 VEC(operand_entry_t, heap) * ops)
2311 enum tree_code opcode = gimple_assign_rhs_code (stmt);
2312 int op_num = VEC_length (operand_entry_t, ops);
2313 int stmt_num = op_num - 1;
2314 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2315 int op_index = op_num - 1;
2317 int ready_stmts_end = 0;
2319 tree last_rhs1 = gimple_assign_rhs1 (stmt);
2322 /* We start expression rewriting from the top statements.
2323 So, in this loop we create a full list of statements
2324 we will work with. */
2325 stmts[stmt_num - 1] = stmt;
2326 for (i = stmt_num - 2; i >= 0; i--)
2327 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2329 lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2330 add_referenced_var (lhs_var);
2332 for (i = 0; i < stmt_num; i++)
2336 /* Determine whether we should use results of
2337 already handled statements or not. */
2338 if (ready_stmts_end == 0
2339 && (i - stmt_index >= width || op_index < 1))
2340 ready_stmts_end = i;
2342 /* Now we choose operands for the next statement. Non zero
2343 value in ready_stmts_end means here that we should use
2344 the result of already generated statements as new operand. */
2345 if (ready_stmts_end > 0)
2347 op1 = gimple_assign_lhs (stmts[stmt_index++]);
2348 if (ready_stmts_end > stmt_index)
2349 op2 = gimple_assign_lhs (stmts[stmt_index++]);
2350 else if (op_index >= 0)
2351 op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2354 gcc_assert (stmt_index < i);
2355 op2 = gimple_assign_lhs (stmts[stmt_index++]);
2358 if (stmt_index >= ready_stmts_end)
2359 ready_stmts_end = 0;
2364 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2365 op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2366 op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2369 /* If we emit the last statement then we should put
2370 operands into the last statement. It will also
2372 if (op_index < 0 && stmt_index == i)
2375 if (dump_file && (dump_flags & TDF_DETAILS))
2377 fprintf (dump_file, "Transforming ");
2378 print_gimple_stmt (dump_file, stmts[i], 0, 0);
2381 /* We keep original statement only for the last one. All
2382 others are recreated. */
2383 if (i == stmt_num - 1)
2385 gimple_assign_set_rhs1 (stmts[i], op1);
2386 gimple_assign_set_rhs2 (stmts[i], op2);
2387 update_stmt (stmts[i]);
2390 stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2392 if (dump_file && (dump_flags & TDF_DETAILS))
2394 fprintf (dump_file, " into ");
2395 print_gimple_stmt (dump_file, stmts[i], 0, 0);
2399 remove_visited_stmt_chain (last_rhs1);
2402 /* Transform STMT, which is really (A +B) + (C + D) into the left
2403 linear form, ((A+B)+C)+D.
2404 Recurse on D if necessary. */
2407 linearize_expr (gimple stmt)
2409 gimple_stmt_iterator gsinow, gsirhs;
2410 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2411 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2412 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2413 gimple newbinrhs = NULL;
2414 struct loop *loop = loop_containing_stmt (stmt);
2416 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2417 && is_reassociable_op (binrhs, rhscode, loop));
2419 gsinow = gsi_for_stmt (stmt);
2420 gsirhs = gsi_for_stmt (binrhs);
2421 gsi_move_before (&gsirhs, &gsinow);
2423 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2424 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2425 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2427 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2428 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2430 if (dump_file && (dump_flags & TDF_DETAILS))
2432 fprintf (dump_file, "Linearized: ");
2433 print_gimple_stmt (dump_file, stmt, 0, 0);
2436 reassociate_stats.linearized++;
2437 update_stmt (binrhs);
2438 update_stmt (binlhs);
2441 gimple_set_visited (stmt, true);
2442 gimple_set_visited (binlhs, true);
2443 gimple_set_visited (binrhs, true);
2445 /* Tail recurse on the new rhs if it still needs reassociation. */
2446 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2447 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2448 want to change the algorithm while converting to tuples. */
2449 linearize_expr (stmt);
2452 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2453 it. Otherwise, return NULL. */
2456 get_single_immediate_use (tree lhs)
2458 use_operand_p immuse;
2461 if (TREE_CODE (lhs) == SSA_NAME
2462 && single_imm_use (lhs, &immuse, &immusestmt)
2463 && is_gimple_assign (immusestmt))
2469 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2470 representing the negated value. Insertions of any necessary
2471 instructions go before GSI.
2472 This function is recursive in that, if you hand it "a_5" as the
2473 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2474 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
2477 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2479 gimple negatedefstmt= NULL;
2480 tree resultofnegate;
2482 /* If we are trying to negate a name, defined by an add, negate the
2483 add operands instead. */
2484 if (TREE_CODE (tonegate) == SSA_NAME)
2485 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2486 if (TREE_CODE (tonegate) == SSA_NAME
2487 && is_gimple_assign (negatedefstmt)
2488 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2489 && has_single_use (gimple_assign_lhs (negatedefstmt))
2490 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2492 gimple_stmt_iterator gsi;
2493 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2494 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2496 gsi = gsi_for_stmt (negatedefstmt);
2497 rhs1 = negate_value (rhs1, &gsi);
2498 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2500 gsi = gsi_for_stmt (negatedefstmt);
2501 rhs2 = negate_value (rhs2, &gsi);
2502 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2504 update_stmt (negatedefstmt);
2505 return gimple_assign_lhs (negatedefstmt);
2508 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2509 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2510 NULL_TREE, true, GSI_SAME_STMT);
2511 return resultofnegate;
2514 /* Return true if we should break up the subtract in STMT into an add
2515 with negate. This is true when we the subtract operands are really
2516 adds, or the subtract itself is used in an add expression. In
2517 either case, breaking up the subtract into an add with negate
2518 exposes the adds to reassociation. */
2521 should_break_up_subtract (gimple stmt)
2523 tree lhs = gimple_assign_lhs (stmt);
2524 tree binlhs = gimple_assign_rhs1 (stmt);
2525 tree binrhs = gimple_assign_rhs2 (stmt);
2527 struct loop *loop = loop_containing_stmt (stmt);
2529 if (TREE_CODE (binlhs) == SSA_NAME
2530 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2533 if (TREE_CODE (binrhs) == SSA_NAME
2534 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2537 if (TREE_CODE (lhs) == SSA_NAME
2538 && (immusestmt = get_single_immediate_use (lhs))
2539 && is_gimple_assign (immusestmt)
2540 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2541 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2546 /* Transform STMT from A - B into A + -B. */
2549 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2551 tree rhs1 = gimple_assign_rhs1 (stmt);
2552 tree rhs2 = gimple_assign_rhs2 (stmt);
2554 if (dump_file && (dump_flags & TDF_DETAILS))
2556 fprintf (dump_file, "Breaking up subtract ");
2557 print_gimple_stmt (dump_file, stmt, 0, 0);
2560 rhs2 = negate_value (rhs2, gsip);
2561 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2565 /* Recursively linearize a binary expression that is the RHS of STMT.
2566 Place the operands of the expression tree in the vector named OPS. */
2569 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2570 bool is_associative, bool set_visited)
2572 tree binlhs = gimple_assign_rhs1 (stmt);
2573 tree binrhs = gimple_assign_rhs2 (stmt);
2574 gimple binlhsdef, binrhsdef;
2575 bool binlhsisreassoc = false;
2576 bool binrhsisreassoc = false;
2577 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2578 struct loop *loop = loop_containing_stmt (stmt);
2581 gimple_set_visited (stmt, true);
2583 if (TREE_CODE (binlhs) == SSA_NAME)
2585 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2586 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2587 && !stmt_could_throw_p (binlhsdef));
2590 if (TREE_CODE (binrhs) == SSA_NAME)
2592 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2593 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2594 && !stmt_could_throw_p (binrhsdef));
2597 /* If the LHS is not reassociable, but the RHS is, we need to swap
2598 them. If neither is reassociable, there is nothing we can do, so
2599 just put them in the ops vector. If the LHS is reassociable,
2600 linearize it. If both are reassociable, then linearize the RHS
2603 if (!binlhsisreassoc)
2607 /* If this is not a associative operation like division, give up. */
2608 if (!is_associative)
2610 add_to_ops_vec (ops, binrhs);
2614 if (!binrhsisreassoc)
2616 add_to_ops_vec (ops, binrhs);
2617 add_to_ops_vec (ops, binlhs);
2621 if (dump_file && (dump_flags & TDF_DETAILS))
2623 fprintf (dump_file, "swapping operands of ");
2624 print_gimple_stmt (dump_file, stmt, 0, 0);
2627 swap_tree_operands (stmt,
2628 gimple_assign_rhs1_ptr (stmt),
2629 gimple_assign_rhs2_ptr (stmt));
2632 if (dump_file && (dump_flags & TDF_DETAILS))
2634 fprintf (dump_file, " is now ");
2635 print_gimple_stmt (dump_file, stmt, 0, 0);
2638 /* We want to make it so the lhs is always the reassociative op,
2644 else if (binrhsisreassoc)
2646 linearize_expr (stmt);
2647 binlhs = gimple_assign_rhs1 (stmt);
2648 binrhs = gimple_assign_rhs2 (stmt);
2651 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2652 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2654 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2655 is_associative, set_visited);
2656 add_to_ops_vec (ops, binrhs);
2659 /* Repropagate the negates back into subtracts, since no other pass
2660 currently does it. */
2663 repropagate_negates (void)
2668 FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2670 gimple user = get_single_immediate_use (negate);
2672 if (!user || !is_gimple_assign (user))
2675 /* The negate operand can be either operand of a PLUS_EXPR
2676 (it can be the LHS if the RHS is a constant for example).
2678 Force the negate operand to the RHS of the PLUS_EXPR, then
2679 transform the PLUS_EXPR into a MINUS_EXPR. */
2680 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2682 /* If the negated operand appears on the LHS of the
2683 PLUS_EXPR, exchange the operands of the PLUS_EXPR
2684 to force the negated operand to the RHS of the PLUS_EXPR. */
2685 if (gimple_assign_rhs1 (user) == negate)
2687 swap_tree_operands (user,
2688 gimple_assign_rhs1_ptr (user),
2689 gimple_assign_rhs2_ptr (user));
2692 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2693 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
2694 if (gimple_assign_rhs2 (user) == negate)
2696 tree rhs1 = gimple_assign_rhs1 (user);
2697 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2698 gimple_stmt_iterator gsi = gsi_for_stmt (user);
2699 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2703 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2705 if (gimple_assign_rhs1 (user) == negate)
2710 which we transform into
2713 This pushes down the negate which we possibly can merge
2714 into some other operation, hence insert it into the
2715 plus_negates vector. */
2716 gimple feed = SSA_NAME_DEF_STMT (negate);
2717 tree a = gimple_assign_rhs1 (feed);
2718 tree rhs2 = gimple_assign_rhs2 (user);
2719 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2720 gimple_replace_lhs (feed, negate);
2721 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2722 update_stmt (gsi_stmt (gsi));
2723 gsi2 = gsi_for_stmt (user);
2724 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2725 update_stmt (gsi_stmt (gsi2));
2726 gsi_move_before (&gsi, &gsi2);
2727 VEC_safe_push (tree, heap, plus_negates,
2728 gimple_assign_lhs (gsi_stmt (gsi2)));
2732 /* Transform "x = -a; y = b - x" into "y = b + a", getting
2733 rid of one operation. */
2734 gimple feed = SSA_NAME_DEF_STMT (negate);
2735 tree a = gimple_assign_rhs1 (feed);
2736 tree rhs1 = gimple_assign_rhs1 (user);
2737 gimple_stmt_iterator gsi = gsi_for_stmt (user);
2738 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2739 update_stmt (gsi_stmt (gsi));
2745 /* Returns true if OP is of a type for which we can do reassociation.
2746 That is for integral or non-saturating fixed-point types, and for
2747 floating point type when associative-math is enabled. */
2750 can_reassociate_p (tree op)
2752 tree type = TREE_TYPE (op);
2753 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2754 || NON_SAT_FIXED_POINT_TYPE_P (type)
2755 || (flag_associative_math && FLOAT_TYPE_P (type)))
2760 /* Break up subtract operations in block BB.
2762 We do this top down because we don't know whether the subtract is
2763 part of a possible chain of reassociation except at the top.
2772 we want to break up k = t - q, but we won't until we've transformed q
2773 = b - r, which won't be broken up until we transform b = c - d.
2775 En passant, clear the GIMPLE visited flag on every statement. */
2778 break_up_subtract_bb (basic_block bb)
2780 gimple_stmt_iterator gsi;
2783 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2785 gimple stmt = gsi_stmt (gsi);
2786 gimple_set_visited (stmt, false);
2788 if (!is_gimple_assign (stmt)
2789 || !can_reassociate_p (gimple_assign_lhs (stmt)))
2792 /* Look for simple gimple subtract operations. */
2793 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
2795 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
2796 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
2799 /* Check for a subtract used only in an addition. If this
2800 is the case, transform it into add of a negate for better
2801 reassociation. IE transform C = A-B into C = A + -B if C
2802 is only used in an addition. */
2803 if (should_break_up_subtract (stmt))
2804 break_up_subtract (stmt, &gsi);
2806 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
2807 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
2808 VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
2810 for (son = first_dom_son (CDI_DOMINATORS, bb);
2812 son = next_dom_son (CDI_DOMINATORS, son))
2813 break_up_subtract_bb (son);
2816 /* Reassociate expressions in basic block BB and its post-dominator as
2820 reassociate_bb (basic_block bb)
2822 gimple_stmt_iterator gsi;
2825 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2827 gimple stmt = gsi_stmt (gsi);
2829 if (is_gimple_assign (stmt)
2830 && !stmt_could_throw_p (stmt))
2832 tree lhs, rhs1, rhs2;
2833 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2835 /* If this is not a gimple binary expression, there is
2836 nothing for us to do with it. */
2837 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
2840 /* If this was part of an already processed statement,
2841 we don't need to touch it again. */
2842 if (gimple_visited_p (stmt))
2844 /* This statement might have become dead because of previous
2846 if (has_zero_uses (gimple_get_lhs (stmt)))
2848 gsi_remove (&gsi, true);
2849 release_defs (stmt);
2850 /* We might end up removing the last stmt above which
2851 places the iterator to the end of the sequence.
2852 Reset it to the last stmt in this case which might
2853 be the end of the sequence as well if we removed
2854 the last statement of the sequence. In which case
2855 we need to bail out. */
2856 if (gsi_end_p (gsi))
2858 gsi = gsi_last_bb (bb);
2859 if (gsi_end_p (gsi))
2866 lhs = gimple_assign_lhs (stmt);
2867 rhs1 = gimple_assign_rhs1 (stmt);
2868 rhs2 = gimple_assign_rhs2 (stmt);
2870 /* For non-bit or min/max operations we can't associate
2871 all types. Verify that here. */
2872 if (rhs_code != BIT_IOR_EXPR
2873 && rhs_code != BIT_AND_EXPR
2874 && rhs_code != BIT_XOR_EXPR
2875 && rhs_code != MIN_EXPR
2876 && rhs_code != MAX_EXPR
2877 && (!can_reassociate_p (lhs)
2878 || !can_reassociate_p (rhs1)
2879 || !can_reassociate_p (rhs2)))
2882 if (associative_tree_code (rhs_code))
2884 VEC(operand_entry_t, heap) *ops = NULL;
2886 /* There may be no immediate uses left by the time we
2887 get here because we may have eliminated them all. */
2888 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
2891 gimple_set_visited (stmt, true);
2892 linearize_expr_tree (&ops, stmt, true, true);
2893 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2894 optimize_ops_list (rhs_code, &ops);
2895 if (undistribute_ops_list (rhs_code, &ops,
2896 loop_containing_stmt (stmt)))
2898 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2899 optimize_ops_list (rhs_code, &ops);
2902 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
2903 optimize_range_tests (rhs_code, &ops);
2905 if (VEC_length (operand_entry_t, ops) == 1)
2907 if (dump_file && (dump_flags & TDF_DETAILS))
2909 fprintf (dump_file, "Transforming ");
2910 print_gimple_stmt (dump_file, stmt, 0, 0);
2913 rhs1 = gimple_assign_rhs1 (stmt);
2914 gimple_assign_set_rhs_from_tree (&gsi,
2915 VEC_last (operand_entry_t,
2918 remove_visited_stmt_chain (rhs1);
2920 if (dump_file && (dump_flags & TDF_DETAILS))
2922 fprintf (dump_file, " into ");
2923 print_gimple_stmt (dump_file, stmt, 0, 0);
2928 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2929 int ops_num = VEC_length (operand_entry_t, ops);
2930 int width = get_reassociation_width (ops_num, rhs_code, mode);
2932 if (dump_file && (dump_flags & TDF_DETAILS))
2934 "Width = %d was chosen for reassociation\n", width);
2937 && VEC_length (operand_entry_t, ops) > 3)
2938 rewrite_expr_tree_parallel (stmt, width, ops);
2940 rewrite_expr_tree (stmt, 0, ops, false);
2943 VEC_free (operand_entry_t, heap, ops);
2947 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
2949 son = next_dom_son (CDI_POST_DOMINATORS, son))
2950 reassociate_bb (son);
2953 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2954 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2956 /* Dump the operand entry vector OPS to FILE. */
2959 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2964 FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
2966 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2967 print_generic_expr (file, oe->op, 0);
2971 /* Dump the operand entry vector OPS to STDERR. */
2974 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2976 dump_ops_vector (stderr, ops);
2982 break_up_subtract_bb (ENTRY_BLOCK_PTR);
2983 reassociate_bb (EXIT_BLOCK_PTR);
2986 /* Initialize the reassociation pass. */
2994 int *bbs = XNEWVEC (int, last_basic_block + 1);
2996 /* Find the loops, so that we can prevent moving calculations in
2998 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3000 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3002 operand_entry_pool = create_alloc_pool ("operand entry pool",
3003 sizeof (struct operand_entry), 30);
3004 next_operand_entry_id = 0;
3006 /* Reverse RPO (Reverse Post Order) will give us something where
3007 deeper loops come later. */
3008 pre_and_rev_post_order_compute (NULL, bbs, false);
3009 bb_rank = XCNEWVEC (long, last_basic_block + 1);
3010 operand_rank = pointer_map_create ();
3012 /* Give each argument a distinct rank. */
3013 for (param = DECL_ARGUMENTS (current_function_decl);
3015 param = DECL_CHAIN (param))
3017 if (gimple_default_def (cfun, param) != NULL)
3019 tree def = gimple_default_def (cfun, param);
3020 insert_operand_rank (def, ++rank);
3024 /* Give the chain decl a distinct rank. */
3025 if (cfun->static_chain_decl != NULL)
3027 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
3029 insert_operand_rank (def, ++rank);
3032 /* Set up rank for each BB */
3033 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3034 bb_rank[bbs[i]] = ++rank << 16;
3037 calculate_dominance_info (CDI_POST_DOMINATORS);
3038 plus_negates = NULL;
3041 /* Cleanup after the reassociation pass, and print stats if
3047 statistics_counter_event (cfun, "Linearized",
3048 reassociate_stats.linearized);
3049 statistics_counter_event (cfun, "Constants eliminated",
3050 reassociate_stats.constants_eliminated);
3051 statistics_counter_event (cfun, "Ops eliminated",
3052 reassociate_stats.ops_eliminated);
3053 statistics_counter_event (cfun, "Statements rewritten",
3054 reassociate_stats.rewritten);
3056 pointer_map_destroy (operand_rank);
3057 free_alloc_pool (operand_entry_pool);
3059 VEC_free (tree, heap, plus_negates);
3060 free_dominance_info (CDI_POST_DOMINATORS);
3061 loop_optimizer_finalize ();
3064 /* Gate and execute functions for Reassociation. */
3067 execute_reassoc (void)
3072 repropagate_negates ();
3079 gate_tree_ssa_reassoc (void)
3081 return flag_tree_reassoc != 0;
3084 struct gimple_opt_pass pass_reassoc =
3088 "reassoc", /* name */
3089 gate_tree_ssa_reassoc, /* gate */
3090 execute_reassoc, /* execute */
3093 0, /* static_pass_number */
3094 TV_TREE_REASSOC, /* tv_id */
3095 PROP_cfg | PROP_ssa, /* properties_required */
3096 0, /* properties_provided */
3097 0, /* properties_destroyed */
3098 0, /* todo_flags_start */
3101 | TODO_ggc_collect /* todo_flags_finish */