gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Daniel Berlin <dan@dberlin.org>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "tree-pretty-print.h"
  29 #include "gimple-pretty-print.h"
  30 #include "tree-inline.h"
  31 #include "tree-flow.h"
  32 #include "gimple.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "tree-iterator.h"
  36 #include "tree-pass.h"
  37 #include "alloc-pool.h"
  38 #include "vec.h"
  39 #include "langhooks.h"
  40 #include "pointer-set.h"
  41 #include "cfgloop.h"
  42 #include "flags.h"
  43 #include "target.h"
  44 #include "params.h"
  45 #include "diagnostic-core.h"
  46
  47 /*  This is a simple global reassociation pass.  It is, in part, based
  48     on the LLVM pass of the same name (They do some things more/less
  49     than we do, in different orders, etc).
  50
  51     It consists of five steps:
  52
  53     1. Breaking up subtract operations into addition + negate, where
  54     it would promote the reassociation of adds.
  55
  56     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  57     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  58     During linearization, we place the operands of the binary
  59     expressions into a vector of operand_entry_t
  60
  61     3. Optimization of the operand lists, eliminating things like a +
  62     -a, a & a, etc.
  63
  64     4. Rewrite the expression trees we linearized and optimized so
  65     they are in proper rank order.
  66
  67     5. Repropagate negates, as nothing else will clean it up ATM.
  68
  69     A bit of theory on #4, since nobody seems to write anything down
  70     about why it makes sense to do it the way they do it:
  71
  72     We could do this much nicer theoretically, but don't (for reasons
  73     explained after how to do it theoretically nice :P).
  74
  75     In order to promote the most redundancy elimination, you want
  76     binary expressions whose operands are the same rank (or
  77     preferably, the same value) exposed to the redundancy eliminator,
  78     for possible elimination.
  79
  80     So the way to do this if we really cared, is to build the new op
  81     tree from the leaves to the roots, merging as you go, and putting the
  82     new op on the end of the worklist, until you are left with one
  83     thing on the worklist.
  84
  85     IE if you have to rewrite the following set of operands (listed with
  86     rank in parentheses), with opcode PLUS_EXPR:
  87
  88     a (1),  b (1),  c (1),  d (2), e (2)
  89
  90
  91     We start with our merge worklist empty, and the ops list with all of
  92     those on it.
  93
  94     You want to first merge all leaves of the same rank, as much as
  95     possible.
  96
  97     So first build a binary op of
  98
  99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 100
 101     Because there is no three operand form of PLUS_EXPR, c is not going to
 102     be exposed to redundancy elimination as a rank 1 operand.
 103
 104     So you might as well throw it on the merge worklist (you could also
 105     consider it to now be a rank two operand, and merge it with d and e,
 106     but in this case, you then have evicted e from a binary op. So at
 107     least in this situation, you can't win.)
 108
 109     Then build a binary op of d + e
 110     mergetmp2 = d + e
 111
 112     and put mergetmp2 on the merge worklist.
 113
 114     so merge worklist = {mergetmp, c, mergetmp2}
 115
 116     Continue building binary ops of these operations until you have only
 117     one operation left on the worklist.
 118
 119     So we have
 120
 121     build binary op
 122     mergetmp3 = mergetmp + c
 123
 124     worklist = {mergetmp2, mergetmp3}
 125
 126     mergetmp4 = mergetmp2 + mergetmp3
 127
 128     worklist = {mergetmp4}
 129
 130     because we have one operation left, we can now just set the original
 131     statement equal to the result of that operation.
 132
 133     This will at least expose a + b  and d + e to redundancy elimination
 134     as binary operations.
 135
 136     For extra points, you can reuse the old statements to build the
 137     mergetmps, since you shouldn't run out.
 138
 139     So why don't we do this?
 140
 141     Because it's expensive, and rarely will help.  Most trees we are
 142     reassociating have 3 or less ops.  If they have 2 ops, they already
 143     will be written into a nice single binary op.  If you have 3 ops, a
 144     single simple check suffices to tell you whether the first two are of the
 145     same rank.  If so, you know to order it
 146
 147     mergetmp = op1 + op2
 148     newstmt = mergetmp + op3
 149
 150     instead of
 151     mergetmp = op2 + op3
 152     newstmt = mergetmp + op1
 153
 154     If all three are of the same rank, you can't expose them all in a
 155     single binary operator anyway, so the above is *still* the best you
 156     can do.
 157
 158     Thus, this is what we do.  When we have three ops left, we check to see
 159     what order to put them in, and call it a day.  As a nod to vector sum
 160     reduction, we check if any of the ops are really a phi node that is a
 161     destructive update for the associating op, and keep the destructive
 162     update together for vector sum reduction recognition.  */
 163
 164
 165 /* Statistics */
 166 static struct
 167 {
 168   int linearized;
 169   int constants_eliminated;
 170   int ops_eliminated;
 171   int rewritten;
 172 } reassociate_stats;
 173
 174 /* Operator, rank pair.  */
 175 typedef struct operand_entry
 176 {
 177   unsigned int rank;
 178   int id;
 179   tree op;
 180 } *operand_entry_t;
 181
 182 static alloc_pool operand_entry_pool;
 183
 184 /* This is used to assign a unique ID to each struct operand_entry
 185    so that qsort results are identical on different hosts.  */
 186 static int next_operand_entry_id;
 187
 188 /* Starting rank number for a given basic block, so that we can rank
 189    operations using unmovable instructions in that BB based on the bb
 190    depth.  */
 191 static long *bb_rank;
 192
 193 /* Operand->rank hashtable.  */
 194 static struct pointer_map_t *operand_rank;
 195
 196 /* Forward decls.  */
 197 static long get_rank (tree);
 198
 199
 200 /* Bias amount for loop-carried phis.  We want this to be larger than
 201    the depth of any reassociation tree we can see, but not larger than
 202    the rank difference between two blocks.  */
 203 #define PHI_LOOP_BIAS (1 << 15)
 204
 205 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 206    an innermost loop, and the phi has only a single use which is inside
 207    the loop, then the rank is the block rank of the loop latch plus an
 208    extra bias for the loop-carried dependence.  This causes expressions
 209    calculated into an accumulator variable to be independent for each
 210    iteration of the loop.  If STMT is some other phi, the rank is the
 211    block rank of its containing block.  */
 212 static long
 213 phi_rank (gimple stmt)
 214 {
 215   basic_block bb = gimple_bb (stmt);
 216   struct loop *father = bb->loop_father;
 217   tree res;
 218   unsigned i;
 219   use_operand_p use;
 220   gimple use_stmt;
 221
 222   /* We only care about real loops (those with a latch).  */
 223   if (!father->latch)
 224     return bb_rank[bb->index];
 225
 226   /* Interesting phis must be in headers of innermost loops.  */
 227   if (bb != father->header
 228       || father->inner)
 229     return bb_rank[bb->index];
 230
 231   /* Ignore virtual SSA_NAMEs.  */
 232   res = gimple_phi_result (stmt);
 233   if (!is_gimple_reg (SSA_NAME_VAR (res)))
 234     return bb_rank[bb->index];
 235
 236   /* The phi definition must have a single use, and that use must be
 237      within the loop.  Otherwise this isn't an accumulator pattern.  */
 238   if (!single_imm_use (res, &use, &use_stmt)
 239       || gimple_bb (use_stmt)->loop_father != father)
 240     return bb_rank[bb->index];
 241
 242   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 243   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 244     {
 245       tree arg = gimple_phi_arg_def (stmt, i);
 246       if (TREE_CODE (arg) == SSA_NAME
 247           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 248         {
 249           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 250           if (gimple_bb (def_stmt)->loop_father == father)
 251             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 252         }
 253     }
 254
 255   /* Must be an uninteresting phi.  */
 256   return bb_rank[bb->index];
 257 }
 258
 259 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 260    loop-carried dependence of an innermost loop, return TRUE; else
 261    return FALSE.  */
 262 static bool
 263 loop_carried_phi (tree exp)
 264 {
 265   gimple phi_stmt;
 266   long block_rank;
 267
 268   if (TREE_CODE (exp) != SSA_NAME
 269       || SSA_NAME_IS_DEFAULT_DEF (exp))
 270     return false;
 271
 272   phi_stmt = SSA_NAME_DEF_STMT (exp);
 273
 274   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 275     return false;
 276
 277   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 278      an additional bias added in.  If this phi doesn't have block rank,
 279      it's biased and should not be propagated.  */
 280   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 281
 282   if (phi_rank (phi_stmt) != block_rank)
 283     return true;
 284
 285   return false;
 286 }
 287
 288 /* Return the maximum of RANK and the rank that should be propagated
 289    from expression OP.  For most operands, this is just the rank of OP.
 290    For loop-carried phis, the value is zero to avoid undoing the bias
 291    in favor of the phi.  */
 292 static long
 293 propagate_rank (long rank, tree op)
 294 {
 295   long op_rank;
 296
 297   if (loop_carried_phi (op))
 298     return rank;
 299
 300   op_rank = get_rank (op);
 301
 302   return MAX (rank, op_rank);
 303 }
 304
 305 /* Look up the operand rank structure for expression E.  */
 306
 307 static inline long
 308 find_operand_rank (tree e)
 309 {
 310   void **slot = pointer_map_contains (operand_rank, e);
 311   return slot ? (long) (intptr_t) *slot : -1;
 312 }
 313
 314 /* Insert {E,RANK} into the operand rank hashtable.  */
 315
 316 static inline void
 317 insert_operand_rank (tree e, long rank)
 318 {
 319   void **slot;
 320   gcc_assert (rank > 0);
 321   slot = pointer_map_insert (operand_rank, e);
 322   gcc_assert (!*slot);
 323   *slot = (void *) (intptr_t) rank;
 324 }
 325
 326 /* Given an expression E, return the rank of the expression.  */
 327
 328 static long
 329 get_rank (tree e)
 330 {
 331   /* Constants have rank 0.  */
 332   if (is_gimple_min_invariant (e))
 333     return 0;
 334
 335   /* SSA_NAME's have the rank of the expression they are the result
 336      of.
 337      For globals and uninitialized values, the rank is 0.
 338      For function arguments, use the pre-setup rank.
 339      For PHI nodes, stores, asm statements, etc, we use the rank of
 340      the BB.
 341      For simple operations, the rank is the maximum rank of any of
 342      its operands, or the bb_rank, whichever is less.
 343      I make no claims that this is optimal, however, it gives good
 344      results.  */
 345
 346   /* We make an exception to the normal ranking system to break
 347      dependences of accumulator variables in loops.  Suppose we
 348      have a simple one-block loop containing:
 349
 350        x_1 = phi(x_0, x_2)
 351        b = a + x_1
 352        c = b + d
 353        x_2 = c + e
 354
 355      As shown, each iteration of the calculation into x is fully
 356      dependent upon the iteration before it.  We would prefer to
 357      see this in the form:
 358
 359        x_1 = phi(x_0, x_2)
 360        b = a + d
 361        c = b + e
 362        x_2 = c + x_1
 363
 364      If the loop is unrolled, the calculations of b and c from
 365      different iterations can be interleaved.
 366
 367      To obtain this result during reassociation, we bias the rank
 368      of the phi definition x_1 upward, when it is recognized as an
 369      accumulator pattern.  The artificial rank causes it to be
 370      added last, providing the desired independence.  */
 371
 372   if (TREE_CODE (e) == SSA_NAME)
 373     {
 374       gimple stmt;
 375       long rank;
 376       int i, n;
 377       tree op;
 378
 379       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
 380           && SSA_NAME_IS_DEFAULT_DEF (e))
 381         return find_operand_rank (e);
 382
 383       stmt = SSA_NAME_DEF_STMT (e);
 384       if (gimple_bb (stmt) == NULL)
 385         return 0;
 386
 387       if (gimple_code (stmt) == GIMPLE_PHI)
 388         return phi_rank (stmt);
 389
 390       if (!is_gimple_assign (stmt)
 391           || gimple_vdef (stmt))
 392         return bb_rank[gimple_bb (stmt)->index];
 393
 394       /* If we already have a rank for this expression, use that.  */
 395       rank = find_operand_rank (e);
 396       if (rank != -1)
 397         return rank;
 398
 399       /* Otherwise, find the maximum rank for the operands.  As an
 400          exception, remove the bias from loop-carried phis when propagating
 401          the rank so that dependent operations are not also biased.  */
 402       rank = 0;
 403       if (gimple_assign_single_p (stmt))
 404         {
 405           tree rhs = gimple_assign_rhs1 (stmt);
 406           n = TREE_OPERAND_LENGTH (rhs);
 407           if (n == 0)
 408             rank = propagate_rank (rank, rhs);
 409           else
 410             {
 411               for (i = 0; i < n; i++)
 412                 {
 413                   op = TREE_OPERAND (rhs, i);
 414
 415                   if (op != NULL_TREE)
 416                     rank = propagate_rank (rank, op);
 417                 }
 418             }
 419         }
 420       else
 421         {
 422           n = gimple_num_ops (stmt);
 423           for (i = 1; i < n; i++)
 424             {
 425               op = gimple_op (stmt, i);
 426               gcc_assert (op);
 427               rank = propagate_rank (rank, op);
 428             }
 429         }
 430
 431       if (dump_file && (dump_flags & TDF_DETAILS))
 432         {
 433           fprintf (dump_file, "Rank for ");
 434           print_generic_expr (dump_file, e, 0);
 435           fprintf (dump_file, " is %ld\n", (rank + 1));
 436         }
 437
 438       /* Note the rank in the hashtable so we don't recompute it.  */
 439       insert_operand_rank (e, (rank + 1));
 440       return (rank + 1);
 441     }
 442
 443   /* Globals, etc,  are rank 0 */
 444   return 0;
 445 }
 446
 447 DEF_VEC_P(operand_entry_t);
 448 DEF_VEC_ALLOC_P(operand_entry_t, heap);
 449
 450 /* We want integer ones to end up last no matter what, since they are
 451    the ones we can do the most with.  */
 452 #define INTEGER_CONST_TYPE 1 << 3
 453 #define FLOAT_CONST_TYPE 1 << 2
 454 #define OTHER_CONST_TYPE 1 << 1
 455
 456 /* Classify an invariant tree into integer, float, or other, so that
 457    we can sort them to be near other constants of the same type.  */
 458 static inline int
 459 constant_type (tree t)
 460 {
 461   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 462     return INTEGER_CONST_TYPE;
 463   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 464     return FLOAT_CONST_TYPE;
 465   else
 466     return OTHER_CONST_TYPE;
 467 }
 468
 469 /* qsort comparison function to sort operand entries PA and PB by rank
 470    so that the sorted array is ordered by rank in decreasing order.  */
 471 static int
 472 sort_by_operand_rank (const void *pa, const void *pb)
 473 {
 474   const operand_entry_t oea = *(const operand_entry_t *)pa;
 475   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 476
 477   /* It's nicer for optimize_expression if constants that are likely
 478      to fold when added/multiplied//whatever are put next to each
 479      other.  Since all constants have rank 0, order them by type.  */
 480   if (oeb->rank == 0 &&  oea->rank == 0)
 481     {
 482       if (constant_type (oeb->op) != constant_type (oea->op))
 483         return constant_type (oeb->op) - constant_type (oea->op);
 484       else
 485         /* To make sorting result stable, we use unique IDs to determine
 486            order.  */
 487         return oeb->id - oea->id;
 488     }
 489
 490   /* Lastly, make sure the versions that are the same go next to each
 491      other.  We use SSA_NAME_VERSION because it's stable.  */
 492   if ((oeb->rank - oea->rank == 0)
 493       && TREE_CODE (oea->op) == SSA_NAME
 494       && TREE_CODE (oeb->op) == SSA_NAME)
 495     {
 496       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 497         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 498       else
 499         return oeb->id - oea->id;
 500     }
 501
 502   if (oeb->rank != oea->rank)
 503     return oeb->rank - oea->rank;
 504   else
 505     return oeb->id - oea->id;
 506 }
 507
 508 /* Add an operand entry to *OPS for the tree operand OP.  */
 509
 510 static void
 511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
 512 {
 513   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 514
 515   oe->op = op;
 516   oe->rank = get_rank (op);
 517   oe->id = next_operand_entry_id++;
 518   VEC_safe_push (operand_entry_t, heap, *ops, oe);
 519 }
 520
 521 /* Return true if STMT is reassociable operation containing a binary
 522    operation with tree code CODE, and is inside LOOP.  */
 523
 524 static bool
 525 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 526 {
 527   basic_block bb = gimple_bb (stmt);
 528
 529   if (gimple_bb (stmt) == NULL)
 530     return false;
 531
 532   if (!flow_bb_inside_loop_p (loop, bb))
 533     return false;
 534
 535   if (is_gimple_assign (stmt)
 536       && gimple_assign_rhs_code (stmt) == code
 537       && has_single_use (gimple_assign_lhs (stmt)))
 538     return true;
 539
 540   return false;
 541 }
 542
 543
 544 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 545    operand of the negate operation.  Otherwise, return NULL.  */
 546
 547 static tree
 548 get_unary_op (tree name, enum tree_code opcode)
 549 {
 550   gimple stmt = SSA_NAME_DEF_STMT (name);
 551
 552   if (!is_gimple_assign (stmt))
 553     return NULL_TREE;
 554
 555   if (gimple_assign_rhs_code (stmt) == opcode)
 556     return gimple_assign_rhs1 (stmt);
 557   return NULL_TREE;
 558 }
 559
 560 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 561    eliminate through equivalences, do so, remove them from OPS, and
 562    return true.  Otherwise, return false.  */
 563
 564 static bool
 565 eliminate_duplicate_pair (enum tree_code opcode,
 566                           VEC (operand_entry_t, heap) **ops,
 567                           bool *all_done,
 568                           unsigned int i,
 569                           operand_entry_t curr,
 570                           operand_entry_t last)
 571 {
 572
 573   /* If we have two of the same op, and the opcode is & |, min, or max,
 574      we can eliminate one of them.
 575      If we have two of the same op, and the opcode is ^, we can
 576      eliminate both of them.  */
 577
 578   if (last && last->op == curr->op)
 579     {
 580       switch (opcode)
 581         {
 582         case MAX_EXPR:
 583         case MIN_EXPR:
 584         case BIT_IOR_EXPR:
 585         case BIT_AND_EXPR:
 586           if (dump_file && (dump_flags & TDF_DETAILS))
 587             {
 588               fprintf (dump_file, "Equivalence: ");
 589               print_generic_expr (dump_file, curr->op, 0);
 590               fprintf (dump_file, " [&|minmax] ");
 591               print_generic_expr (dump_file, last->op, 0);
 592               fprintf (dump_file, " -> ");
 593               print_generic_stmt (dump_file, last->op, 0);
 594             }
 595
 596           VEC_ordered_remove (operand_entry_t, *ops, i);
 597           reassociate_stats.ops_eliminated ++;
 598
 599           return true;
 600
 601         case BIT_XOR_EXPR:
 602           if (dump_file && (dump_flags & TDF_DETAILS))
 603             {
 604               fprintf (dump_file, "Equivalence: ");
 605               print_generic_expr (dump_file, curr->op, 0);
 606               fprintf (dump_file, " ^ ");
 607               print_generic_expr (dump_file, last->op, 0);
 608               fprintf (dump_file, " -> nothing\n");
 609             }
 610
 611           reassociate_stats.ops_eliminated += 2;
 612
 613           if (VEC_length (operand_entry_t, *ops) == 2)
 614             {
 615               VEC_free (operand_entry_t, heap, *ops);
 616               *ops = NULL;
 617               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 618               *all_done = true;
 619             }
 620           else
 621             {
 622               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 623               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 624             }
 625
 626           return true;
 627
 628         default:
 629           break;
 630         }
 631     }
 632   return false;
 633 }
 634
 635 static VEC(tree, heap) *plus_negates;
 636
 637 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 638    expression, look in OPS for a corresponding positive operation to cancel
 639    it out.  If we find one, remove the other from OPS, replace
 640    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 641    return false. */
 642
 643 static bool
 644 eliminate_plus_minus_pair (enum tree_code opcode,
 645                            VEC (operand_entry_t, heap) **ops,
 646                            unsigned int currindex,
 647                            operand_entry_t curr)
 648 {
 649   tree negateop;
 650   tree notop;
 651   unsigned int i;
 652   operand_entry_t oe;
 653
 654   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 655     return false;
 656
 657   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 658   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 659   if (negateop == NULL_TREE && notop == NULL_TREE)
 660     return false;
 661
 662   /* Any non-negated version will have a rank that is one less than
 663      the current rank.  So once we hit those ranks, if we don't find
 664      one, we can stop.  */
 665
 666   for (i = currindex + 1;
 667        VEC_iterate (operand_entry_t, *ops, i, oe)
 668        && oe->rank >= curr->rank - 1 ;
 669        i++)
 670     {
 671       if (oe->op == negateop)
 672         {
 673
 674           if (dump_file && (dump_flags & TDF_DETAILS))
 675             {
 676               fprintf (dump_file, "Equivalence: ");
 677               print_generic_expr (dump_file, negateop, 0);
 678               fprintf (dump_file, " + -");
 679               print_generic_expr (dump_file, oe->op, 0);
 680               fprintf (dump_file, " -> 0\n");
 681             }
 682
 683           VEC_ordered_remove (operand_entry_t, *ops, i);
 684           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 685           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 686           reassociate_stats.ops_eliminated ++;
 687
 688           return true;
 689         }
 690       else if (oe->op == notop)
 691         {
 692           tree op_type = TREE_TYPE (oe->op);
 693
 694           if (dump_file && (dump_flags & TDF_DETAILS))
 695             {
 696               fprintf (dump_file, "Equivalence: ");
 697               print_generic_expr (dump_file, notop, 0);
 698               fprintf (dump_file, " + ~");
 699               print_generic_expr (dump_file, oe->op, 0);
 700               fprintf (dump_file, " -> -1\n");
 701             }
 702
 703           VEC_ordered_remove (operand_entry_t, *ops, i);
 704           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 705           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 706           reassociate_stats.ops_eliminated ++;
 707
 708           return true;
 709         }
 710     }
 711
 712   /* CURR->OP is a negate expr in a plus expr: save it for later
 713      inspection in repropagate_negates().  */
 714   if (negateop != NULL_TREE)
 715     VEC_safe_push (tree, heap, plus_negates, curr->op);
 716
 717   return false;
 718 }
 719
 720 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 721    bitwise not expression, look in OPS for a corresponding operand to
 722    cancel it out.  If we find one, remove the other from OPS, replace
 723    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 724    false. */
 725
 726 static bool
 727 eliminate_not_pairs (enum tree_code opcode,
 728                      VEC (operand_entry_t, heap) **ops,
 729                      unsigned int currindex,
 730                      operand_entry_t curr)
 731 {
 732   tree notop;
 733   unsigned int i;
 734   operand_entry_t oe;
 735
 736   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 737       || TREE_CODE (curr->op) != SSA_NAME)
 738     return false;
 739
 740   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 741   if (notop == NULL_TREE)
 742     return false;
 743
 744   /* Any non-not version will have a rank that is one less than
 745      the current rank.  So once we hit those ranks, if we don't find
 746      one, we can stop.  */
 747
 748   for (i = currindex + 1;
 749        VEC_iterate (operand_entry_t, *ops, i, oe)
 750        && oe->rank >= curr->rank - 1;
 751        i++)
 752     {
 753       if (oe->op == notop)
 754         {
 755           if (dump_file && (dump_flags & TDF_DETAILS))
 756             {
 757               fprintf (dump_file, "Equivalence: ");
 758               print_generic_expr (dump_file, notop, 0);
 759               if (opcode == BIT_AND_EXPR)
 760                 fprintf (dump_file, " & ~");
 761               else if (opcode == BIT_IOR_EXPR)
 762                 fprintf (dump_file, " | ~");
 763               print_generic_expr (dump_file, oe->op, 0);
 764               if (opcode == BIT_AND_EXPR)
 765                 fprintf (dump_file, " -> 0\n");
 766               else if (opcode == BIT_IOR_EXPR)
 767                 fprintf (dump_file, " -> -1\n");
 768             }
 769
 770           if (opcode == BIT_AND_EXPR)
 771             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 772           else if (opcode == BIT_IOR_EXPR)
 773             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 774                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 775
 776           reassociate_stats.ops_eliminated
 777             += VEC_length (operand_entry_t, *ops) - 1;
 778           VEC_free (operand_entry_t, heap, *ops);
 779           *ops = NULL;
 780           VEC_safe_push (operand_entry_t, heap, *ops, oe);
 781           return true;
 782         }
 783     }
 784
 785   return false;
 786 }
 787
 788 /* Use constant value that may be present in OPS to try to eliminate
 789    operands.  Note that this function is only really used when we've
 790    eliminated ops for other reasons, or merged constants.  Across
 791    single statements, fold already does all of this, plus more.  There
 792    is little point in duplicating logic, so I've only included the
 793    identities that I could ever construct testcases to trigger.  */
 794
 795 static void
 796 eliminate_using_constants (enum tree_code opcode,
 797                            VEC(operand_entry_t, heap) **ops)
 798 {
 799   operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
 800   tree type = TREE_TYPE (oelast->op);
 801
 802   if (oelast->rank == 0
 803       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 804     {
 805       switch (opcode)
 806         {
 807         case BIT_AND_EXPR:
 808           if (integer_zerop (oelast->op))
 809             {
 810               if (VEC_length (operand_entry_t, *ops) != 1)
 811                 {
 812                   if (dump_file && (dump_flags & TDF_DETAILS))
 813                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 814
 815                   reassociate_stats.ops_eliminated
 816                     += VEC_length (operand_entry_t, *ops) - 1;
 817
 818                   VEC_free (operand_entry_t, heap, *ops);
 819                   *ops = NULL;
 820                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 821                   return;
 822                 }
 823             }
 824           else if (integer_all_onesp (oelast->op))
 825             {
 826               if (VEC_length (operand_entry_t, *ops) != 1)
 827                 {
 828                   if (dump_file && (dump_flags & TDF_DETAILS))
 829                     fprintf (dump_file, "Found & -1, removing\n");
 830                   VEC_pop (operand_entry_t, *ops);
 831                   reassociate_stats.ops_eliminated++;
 832                 }
 833             }
 834           break;
 835         case BIT_IOR_EXPR:
 836           if (integer_all_onesp (oelast->op))
 837             {
 838               if (VEC_length (operand_entry_t, *ops) != 1)
 839                 {
 840                   if (dump_file && (dump_flags & TDF_DETAILS))
 841                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 842
 843                   reassociate_stats.ops_eliminated
 844                     += VEC_length (operand_entry_t, *ops) - 1;
 845
 846                   VEC_free (operand_entry_t, heap, *ops);
 847                   *ops = NULL;
 848                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 849                   return;
 850                 }
 851             }
 852           else if (integer_zerop (oelast->op))
 853             {
 854               if (VEC_length (operand_entry_t, *ops) != 1)
 855                 {
 856                   if (dump_file && (dump_flags & TDF_DETAILS))
 857                     fprintf (dump_file, "Found | 0, removing\n");
 858                   VEC_pop (operand_entry_t, *ops);
 859                   reassociate_stats.ops_eliminated++;
 860                 }
 861             }
 862           break;
 863         case MULT_EXPR:
 864           if (integer_zerop (oelast->op)
 865               || (FLOAT_TYPE_P (type)
 866                   && !HONOR_NANS (TYPE_MODE (type))
 867                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 868                   && real_zerop (oelast->op)))
 869             {
 870               if (VEC_length (operand_entry_t, *ops) != 1)
 871                 {
 872                   if (dump_file && (dump_flags & TDF_DETAILS))
 873                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 874
 875                   reassociate_stats.ops_eliminated
 876                     += VEC_length (operand_entry_t, *ops) - 1;
 877                   VEC_free (operand_entry_t, heap, *ops);
 878                   *ops = NULL;
 879                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 880                   return;
 881                 }
 882             }
 883           else if (integer_onep (oelast->op)
 884                    || (FLOAT_TYPE_P (type)
 885                        && !HONOR_SNANS (TYPE_MODE (type))
 886                        && real_onep (oelast->op)))
 887             {
 888               if (VEC_length (operand_entry_t, *ops) != 1)
 889                 {
 890                   if (dump_file && (dump_flags & TDF_DETAILS))
 891                     fprintf (dump_file, "Found * 1, removing\n");
 892                   VEC_pop (operand_entry_t, *ops);
 893                   reassociate_stats.ops_eliminated++;
 894                   return;
 895                 }
 896             }
 897           break;
 898         case BIT_XOR_EXPR:
 899         case PLUS_EXPR:
 900         case MINUS_EXPR:
 901           if (integer_zerop (oelast->op)
 902               || (FLOAT_TYPE_P (type)
 903                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 904                   && fold_real_zero_addition_p (type, oelast->op,
 905                                                 opcode == MINUS_EXPR)))
 906             {
 907               if (VEC_length (operand_entry_t, *ops) != 1)
 908                 {
 909                   if (dump_file && (dump_flags & TDF_DETAILS))
 910                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 911                   VEC_pop (operand_entry_t, *ops);
 912                   reassociate_stats.ops_eliminated++;
 913                   return;
 914                 }
 915             }
 916           break;
 917         default:
 918           break;
 919         }
 920     }
 921 }
 922
 923
 924 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
 925                                  bool, bool);
 926
 927 /* Structure for tracking and counting operands.  */
 928 typedef struct oecount_s {
 929   int cnt;
 930   int id;
 931   enum tree_code oecode;
 932   tree op;
 933 } oecount;
 934
 935 DEF_VEC_O(oecount);
 936 DEF_VEC_ALLOC_O(oecount,heap);
 937
 938 /* The heap for the oecount hashtable and the sorted list of operands.  */
 939 static VEC (oecount, heap) *cvec;
 940
 941 /* Hash function for oecount.  */
 942
 943 static hashval_t
 944 oecount_hash (const void *p)
 945 {
 946   const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
 947   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 948 }
 949
 950 /* Comparison function for oecount.  */
 951
 952 static int
 953 oecount_eq (const void *p1, const void *p2)
 954 {
 955   const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
 956   const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
 957   return (c1->oecode == c2->oecode
 958           && c1->op == c2->op);
 959 }
 960
 961 /* Comparison function for qsort sorting oecount elements by count.  */
 962
 963 static int
 964 oecount_cmp (const void *p1, const void *p2)
 965 {
 966   const oecount *c1 = (const oecount *)p1;
 967   const oecount *c2 = (const oecount *)p2;
 968   if (c1->cnt != c2->cnt)
 969     return c1->cnt - c2->cnt;
 970   else
 971     /* If counts are identical, use unique IDs to stabilize qsort.  */
 972     return c1->id - c2->id;
 973 }
 974
 975 /* Walks the linear chain with result *DEF searching for an operation
 976    with operand OP and code OPCODE removing that from the chain.  *DEF
 977    is updated if there is only one operand but no operation left.  */
 978
 979 static void
 980 zero_one_operation (tree *def, enum tree_code opcode, tree op)
 981 {
 982   gimple stmt = SSA_NAME_DEF_STMT (*def);
 983
 984   do
 985     {
 986       tree name = gimple_assign_rhs1 (stmt);
 987
 988       /* If this is the operation we look for and one of the operands
 989          is ours simply propagate the other operand into the stmts
 990          single use.  */
 991       if (gimple_assign_rhs_code (stmt) == opcode
 992           && (name == op
 993               || gimple_assign_rhs2 (stmt) == op))
 994         {
 995           gimple use_stmt;
 996           use_operand_p use;
 997           gimple_stmt_iterator gsi;
 998           if (name == op)
 999             name = gimple_assign_rhs2 (stmt);
1000           gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
1001           single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
1002           if (gimple_assign_lhs (stmt) == *def)
1003             *def = name;
1004           SET_USE (use, name);
1005           if (TREE_CODE (name) != SSA_NAME)
1006             update_stmt (use_stmt);
1007           gsi = gsi_for_stmt (stmt);
1008           gsi_remove (&gsi, true);
1009           release_defs (stmt);
1010           return;
1011         }
1012
1013       /* Continue walking the chain.  */
1014       gcc_assert (name != op
1015                   && TREE_CODE (name) == SSA_NAME);
1016       stmt = SSA_NAME_DEF_STMT (name);
1017     }
1018   while (1);
1019 }
1020
1021 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1022    the result.  Places the statement after the definition of either
1023    OP1 or OP2.  Returns the new statement.  */
1024
1025 static gimple
1026 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1027 {
1028   gimple op1def = NULL, op2def = NULL;
1029   gimple_stmt_iterator gsi;
1030   tree op;
1031   gimple sum;
1032
1033   /* Create the addition statement.  */
1034   sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1035   op = make_ssa_name (tmpvar, sum);
1036   gimple_assign_set_lhs (sum, op);
1037
1038   /* Find an insertion place and insert.  */
1039   if (TREE_CODE (op1) == SSA_NAME)
1040     op1def = SSA_NAME_DEF_STMT (op1);
1041   if (TREE_CODE (op2) == SSA_NAME)
1042     op2def = SSA_NAME_DEF_STMT (op2);
1043   if ((!op1def || gimple_nop_p (op1def))
1044       && (!op2def || gimple_nop_p (op2def)))
1045     {
1046       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1047       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1048     }
1049   else if ((!op1def || gimple_nop_p (op1def))
1050            || (op2def && !gimple_nop_p (op2def)
1051                && stmt_dominates_stmt_p (op1def, op2def)))
1052     {
1053       if (gimple_code (op2def) == GIMPLE_PHI)
1054         {
1055           gsi = gsi_after_labels (gimple_bb (op2def));
1056           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1057         }
1058       else
1059         {
1060           if (!stmt_ends_bb_p (op2def))
1061             {
1062               gsi = gsi_for_stmt (op2def);
1063               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1064             }
1065           else
1066             {
1067               edge e;
1068               edge_iterator ei;
1069
1070               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1071                 if (e->flags & EDGE_FALLTHRU)
1072                   gsi_insert_on_edge_immediate (e, sum);
1073             }
1074         }
1075     }
1076   else
1077     {
1078       if (gimple_code (op1def) == GIMPLE_PHI)
1079         {
1080           gsi = gsi_after_labels (gimple_bb (op1def));
1081           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1082         }
1083       else
1084         {
1085           if (!stmt_ends_bb_p (op1def))
1086             {
1087               gsi = gsi_for_stmt (op1def);
1088               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1089             }
1090           else
1091             {
1092               edge e;
1093               edge_iterator ei;
1094
1095               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1096                 if (e->flags & EDGE_FALLTHRU)
1097                   gsi_insert_on_edge_immediate (e, sum);
1098             }
1099         }
1100     }
1101   update_stmt (sum);
1102
1103   return sum;
1104 }
1105
1106 /* Perform un-distribution of divisions and multiplications.
1107    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1108    to (A + B) / X for real X.
1109
1110    The algorithm is organized as follows.
1111
1112     - First we walk the addition chain *OPS looking for summands that
1113       are defined by a multiplication or a real division.  This results
1114       in the candidates bitmap with relevant indices into *OPS.
1115
1116     - Second we build the chains of multiplications or divisions for
1117       these candidates, counting the number of occurences of (operand, code)
1118       pairs in all of the candidates chains.
1119
1120     - Third we sort the (operand, code) pairs by number of occurence and
1121       process them starting with the pair with the most uses.
1122
1123       * For each such pair we walk the candidates again to build a
1124         second candidate bitmap noting all multiplication/division chains
1125         that have at least one occurence of (operand, code).
1126
1127       * We build an alternate addition chain only covering these
1128         candidates with one (operand, code) operation removed from their
1129         multiplication/division chain.
1130
1131       * The first candidate gets replaced by the alternate addition chain
1132         multiplied/divided by the operand.
1133
1134       * All candidate chains get disabled for further processing and
1135         processing of (operand, code) pairs continues.
1136
1137   The alternate addition chains built are re-processed by the main
1138   reassociation algorithm which allows optimizing a * x * y + b * y * x
1139   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1140
1141 static bool
1142 undistribute_ops_list (enum tree_code opcode,
1143                        VEC (operand_entry_t, heap) **ops, struct loop *loop)
1144 {
1145   unsigned int length = VEC_length (operand_entry_t, *ops);
1146   operand_entry_t oe1;
1147   unsigned i, j;
1148   sbitmap candidates, candidates2;
1149   unsigned nr_candidates, nr_candidates2;
1150   sbitmap_iterator sbi0;
1151   VEC (operand_entry_t, heap) **subops;
1152   htab_t ctable;
1153   bool changed = false;
1154   int next_oecount_id = 0;
1155
1156   if (length <= 1
1157       || opcode != PLUS_EXPR)
1158     return false;
1159
1160   /* Build a list of candidates to process.  */
1161   candidates = sbitmap_alloc (length);
1162   sbitmap_zero (candidates);
1163   nr_candidates = 0;
1164   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1165     {
1166       enum tree_code dcode;
1167       gimple oe1def;
1168
1169       if (TREE_CODE (oe1->op) != SSA_NAME)
1170         continue;
1171       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1172       if (!is_gimple_assign (oe1def))
1173         continue;
1174       dcode = gimple_assign_rhs_code (oe1def);
1175       if ((dcode != MULT_EXPR
1176            && dcode != RDIV_EXPR)
1177           || !is_reassociable_op (oe1def, dcode, loop))
1178         continue;
1179
1180       SET_BIT (candidates, i);
1181       nr_candidates++;
1182     }
1183
1184   if (nr_candidates < 2)
1185     {
1186       sbitmap_free (candidates);
1187       return false;
1188     }
1189
1190   if (dump_file && (dump_flags & TDF_DETAILS))
1191     {
1192       fprintf (dump_file, "searching for un-distribute opportunities ");
1193       print_generic_expr (dump_file,
1194         VEC_index (operand_entry_t, *ops,
1195                    sbitmap_first_set_bit (candidates))->op, 0);
1196       fprintf (dump_file, " %d\n", nr_candidates);
1197     }
1198
1199   /* Build linearized sub-operand lists and the counting table.  */
1200   cvec = NULL;
1201   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1202   subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1203                      VEC_length (operand_entry_t, *ops));
1204   EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1205     {
1206       gimple oedef;
1207       enum tree_code oecode;
1208       unsigned j;
1209
1210       oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1211       oecode = gimple_assign_rhs_code (oedef);
1212       linearize_expr_tree (&subops[i], oedef,
1213                            associative_tree_code (oecode), false);
1214
1215       FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1216         {
1217           oecount c;
1218           void **slot;
1219           size_t idx;
1220           c.oecode = oecode;
1221           c.cnt = 1;
1222           c.id = next_oecount_id++;
1223           c.op = oe1->op;
1224           VEC_safe_push (oecount, heap, cvec, &c);
1225           idx = VEC_length (oecount, cvec) + 41;
1226           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1227           if (!*slot)
1228             {
1229               *slot = (void *)idx;
1230             }
1231           else
1232             {
1233               VEC_pop (oecount, cvec);
1234               VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1235             }
1236         }
1237     }
1238   htab_delete (ctable);
1239
1240   /* Sort the counting table.  */
1241   VEC_qsort (oecount, cvec, oecount_cmp);
1242
1243   if (dump_file && (dump_flags & TDF_DETAILS))
1244     {
1245       oecount *c;
1246       fprintf (dump_file, "Candidates:\n");
1247       FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1248         {
1249           fprintf (dump_file, "  %u %s: ", c->cnt,
1250                    c->oecode == MULT_EXPR
1251                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1252           print_generic_expr (dump_file, c->op, 0);
1253           fprintf (dump_file, "\n");
1254         }
1255     }
1256
1257   /* Process the (operand, code) pairs in order of most occurence.  */
1258   candidates2 = sbitmap_alloc (length);
1259   while (!VEC_empty (oecount, cvec))
1260     {
1261       oecount *c = VEC_last (oecount, cvec);
1262       if (c->cnt < 2)
1263         break;
1264
1265       /* Now collect the operands in the outer chain that contain
1266          the common operand in their inner chain.  */
1267       sbitmap_zero (candidates2);
1268       nr_candidates2 = 0;
1269       EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1270         {
1271           gimple oedef;
1272           enum tree_code oecode;
1273           unsigned j;
1274           tree op = VEC_index (operand_entry_t, *ops, i)->op;
1275
1276           /* If we undistributed in this chain already this may be
1277              a constant.  */
1278           if (TREE_CODE (op) != SSA_NAME)
1279             continue;
1280
1281           oedef = SSA_NAME_DEF_STMT (op);
1282           oecode = gimple_assign_rhs_code (oedef);
1283           if (oecode != c->oecode)
1284             continue;
1285
1286           FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1287             {
1288               if (oe1->op == c->op)
1289                 {
1290                   SET_BIT (candidates2, i);
1291                   ++nr_candidates2;
1292                   break;
1293                 }
1294             }
1295         }
1296
1297       if (nr_candidates2 >= 2)
1298         {
1299           operand_entry_t oe1, oe2;
1300           tree tmpvar;
1301           gimple prod;
1302           int first = sbitmap_first_set_bit (candidates2);
1303
1304           /* Build the new addition chain.  */
1305           oe1 = VEC_index (operand_entry_t, *ops, first);
1306           if (dump_file && (dump_flags & TDF_DETAILS))
1307             {
1308               fprintf (dump_file, "Building (");
1309               print_generic_expr (dump_file, oe1->op, 0);
1310             }
1311           tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1312           add_referenced_var (tmpvar);
1313           zero_one_operation (&oe1->op, c->oecode, c->op);
1314           EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1315             {
1316               gimple sum;
1317               oe2 = VEC_index (operand_entry_t, *ops, i);
1318               if (dump_file && (dump_flags & TDF_DETAILS))
1319                 {
1320                   fprintf (dump_file, " + ");
1321                   print_generic_expr (dump_file, oe2->op, 0);
1322                 }
1323               zero_one_operation (&oe2->op, c->oecode, c->op);
1324               sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1325               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1326               oe2->rank = 0;
1327               oe1->op = gimple_get_lhs (sum);
1328             }
1329
1330           /* Apply the multiplication/division.  */
1331           prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1332           if (dump_file && (dump_flags & TDF_DETAILS))
1333             {
1334               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1335               print_generic_expr (dump_file, c->op, 0);
1336               fprintf (dump_file, "\n");
1337             }
1338
1339           /* Record it in the addition chain and disable further
1340              undistribution with this op.  */
1341           oe1->op = gimple_assign_lhs (prod);
1342           oe1->rank = get_rank (oe1->op);
1343           VEC_free (operand_entry_t, heap, subops[first]);
1344
1345           changed = true;
1346         }
1347
1348       VEC_pop (oecount, cvec);
1349     }
1350
1351   for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1352     VEC_free (operand_entry_t, heap, subops[i]);
1353   free (subops);
1354   VEC_free (oecount, heap, cvec);
1355   sbitmap_free (candidates);
1356   sbitmap_free (candidates2);
1357
1358   return changed;
1359 }
1360
1361 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1362    expression, examine the other OPS to see if any of them are comparisons
1363    of the same values, which we may be able to combine or eliminate.
1364    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1365
1366 static bool
1367 eliminate_redundant_comparison (enum tree_code opcode,
1368                                 VEC (operand_entry_t, heap) **ops,
1369                                 unsigned int currindex,
1370                                 operand_entry_t curr)
1371 {
1372   tree op1, op2;
1373   enum tree_code lcode, rcode;
1374   gimple def1, def2;
1375   int i;
1376   operand_entry_t oe;
1377
1378   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1379     return false;
1380
1381   /* Check that CURR is a comparison.  */
1382   if (TREE_CODE (curr->op) != SSA_NAME)
1383     return false;
1384   def1 = SSA_NAME_DEF_STMT (curr->op);
1385   if (!is_gimple_assign (def1))
1386     return false;
1387   lcode = gimple_assign_rhs_code (def1);
1388   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1389     return false;
1390   op1 = gimple_assign_rhs1 (def1);
1391   op2 = gimple_assign_rhs2 (def1);
1392
1393   /* Now look for a similar comparison in the remaining OPS.  */
1394   for (i = currindex + 1;
1395        VEC_iterate (operand_entry_t, *ops, i, oe);
1396        i++)
1397     {
1398       tree t;
1399
1400       if (TREE_CODE (oe->op) != SSA_NAME)
1401         continue;
1402       def2 = SSA_NAME_DEF_STMT (oe->op);
1403       if (!is_gimple_assign (def2))
1404         continue;
1405       rcode = gimple_assign_rhs_code (def2);
1406       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1407         continue;
1408
1409       /* If we got here, we have a match.  See if we can combine the
1410          two comparisons.  */
1411       if (opcode == BIT_IOR_EXPR)
1412         t = maybe_fold_or_comparisons (lcode, op1, op2,
1413                                        rcode, gimple_assign_rhs1 (def2),
1414                                        gimple_assign_rhs2 (def2));
1415       else
1416         t = maybe_fold_and_comparisons (lcode, op1, op2,
1417                                         rcode, gimple_assign_rhs1 (def2),
1418                                         gimple_assign_rhs2 (def2));
1419       if (!t)
1420         continue;
1421
1422       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1423          always give us a boolean_type_node value back.  If the original
1424          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1425          we need to convert.  */
1426       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1427         t = fold_convert (TREE_TYPE (curr->op), t);
1428
1429       if (TREE_CODE (t) != INTEGER_CST
1430           && !operand_equal_p (t, curr->op, 0))
1431         {
1432           enum tree_code subcode;
1433           tree newop1, newop2;
1434           if (!COMPARISON_CLASS_P (t))
1435             continue;
1436           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1437           STRIP_USELESS_TYPE_CONVERSION (newop1);
1438           STRIP_USELESS_TYPE_CONVERSION (newop2);
1439           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1440             continue;
1441         }
1442
1443       if (dump_file && (dump_flags & TDF_DETAILS))
1444         {
1445           fprintf (dump_file, "Equivalence: ");
1446           print_generic_expr (dump_file, curr->op, 0);
1447           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1448           print_generic_expr (dump_file, oe->op, 0);
1449           fprintf (dump_file, " -> ");
1450           print_generic_expr (dump_file, t, 0);
1451           fprintf (dump_file, "\n");
1452         }
1453
1454       /* Now we can delete oe, as it has been subsumed by the new combined
1455          expression t.  */
1456       VEC_ordered_remove (operand_entry_t, *ops, i);
1457       reassociate_stats.ops_eliminated ++;
1458
1459       /* If t is the same as curr->op, we're done.  Otherwise we must
1460          replace curr->op with t.  Special case is if we got a constant
1461          back, in which case we add it to the end instead of in place of
1462          the current entry.  */
1463       if (TREE_CODE (t) == INTEGER_CST)
1464         {
1465           VEC_ordered_remove (operand_entry_t, *ops, currindex);
1466           add_to_ops_vec (ops, t);
1467         }
1468       else if (!operand_equal_p (t, curr->op, 0))
1469         {
1470           tree tmpvar;
1471           gimple sum;
1472           enum tree_code subcode;
1473           tree newop1;
1474           tree newop2;
1475           gcc_assert (COMPARISON_CLASS_P (t));
1476           tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1477           add_referenced_var (tmpvar);
1478           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1479           STRIP_USELESS_TYPE_CONVERSION (newop1);
1480           STRIP_USELESS_TYPE_CONVERSION (newop2);
1481           gcc_checking_assert (is_gimple_val (newop1)
1482                                && is_gimple_val (newop2));
1483           sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1484           curr->op = gimple_get_lhs (sum);
1485         }
1486       return true;
1487     }
1488
1489   return false;
1490 }
1491
1492 /* Perform various identities and other optimizations on the list of
1493    operand entries, stored in OPS.  The tree code for the binary
1494    operation between all the operands is OPCODE.  */
1495
1496 static void
1497 optimize_ops_list (enum tree_code opcode,
1498                    VEC (operand_entry_t, heap) **ops)
1499 {
1500   unsigned int length = VEC_length (operand_entry_t, *ops);
1501   unsigned int i;
1502   operand_entry_t oe;
1503   operand_entry_t oelast = NULL;
1504   bool iterate = false;
1505
1506   if (length == 1)
1507     return;
1508
1509   oelast = VEC_last (operand_entry_t, *ops);
1510
1511   /* If the last two are constants, pop the constants off, merge them
1512      and try the next two.  */
1513   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1514     {
1515       operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1516
1517       if (oelm1->rank == 0
1518           && is_gimple_min_invariant (oelm1->op)
1519           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1520                                        TREE_TYPE (oelast->op)))
1521         {
1522           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1523                                      oelm1->op, oelast->op);
1524
1525           if (folded && is_gimple_min_invariant (folded))
1526             {
1527               if (dump_file && (dump_flags & TDF_DETAILS))
1528                 fprintf (dump_file, "Merging constants\n");
1529
1530               VEC_pop (operand_entry_t, *ops);
1531               VEC_pop (operand_entry_t, *ops);
1532
1533               add_to_ops_vec (ops, folded);
1534               reassociate_stats.constants_eliminated++;
1535
1536               optimize_ops_list (opcode, ops);
1537               return;
1538             }
1539         }
1540     }
1541
1542   eliminate_using_constants (opcode, ops);
1543   oelast = NULL;
1544
1545   for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1546     {
1547       bool done = false;
1548
1549       if (eliminate_not_pairs (opcode, ops, i, oe))
1550         return;
1551       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1552           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1553           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1554         {
1555           if (done)
1556             return;
1557           iterate = true;
1558           oelast = NULL;
1559           continue;
1560         }
1561       oelast = oe;
1562       i++;
1563     }
1564
1565   length  = VEC_length (operand_entry_t, *ops);
1566   oelast = VEC_last (operand_entry_t, *ops);
1567
1568   if (iterate)
1569     optimize_ops_list (opcode, ops);
1570 }
1571
1572 /* The following functions are subroutines to optimize_range_tests and allow
1573    it to try to change a logical combination of comparisons into a range
1574    test.
1575
1576    For example, both
1577         X == 2 || X == 5 || X == 3 || X == 4
1578    and
1579         X >= 2 && X <= 5
1580    are converted to
1581         (unsigned) (X - 2) <= 3
1582
1583    For more information see comments above fold_test_range in fold-const.c,
1584    this implementation is for GIMPLE.  */
1585
1586 struct range_entry
1587 {
1588   tree exp;
1589   tree low;
1590   tree high;
1591   bool in_p;
1592   bool strict_overflow_p;
1593   unsigned int idx, next;
1594 };
1595
1596 /* This is similar to make_range in fold-const.c, but on top of
1597    GIMPLE instead of trees.  */
1598
1599 static void
1600 init_range_entry (struct range_entry *r, tree exp)
1601 {
1602   int in_p;
1603   tree low, high;
1604   bool is_bool, strict_overflow_p;
1605
1606   r->exp = NULL_TREE;
1607   r->in_p = false;
1608   r->strict_overflow_p = false;
1609   r->low = NULL_TREE;
1610   r->high = NULL_TREE;
1611   if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1612     return;
1613
1614   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1615      and see if we can refine the range.  Some of the cases below may not
1616      happen, but it doesn't seem worth worrying about this.  We "continue"
1617      the outer loop when we've changed something; otherwise we "break"
1618      the switch, which will "break" the while.  */
1619   low = build_int_cst (TREE_TYPE (exp), 0);
1620   high = low;
1621   in_p = 0;
1622   strict_overflow_p = false;
1623   is_bool = false;
1624   if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1625     {
1626       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1627         is_bool = true;
1628       else
1629         return;
1630     }
1631   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1632     is_bool = true;
1633
1634   while (1)
1635     {
1636       gimple stmt;
1637       enum tree_code code;
1638       tree arg0, arg1, exp_type;
1639       tree nexp;
1640       location_t loc;
1641
1642       if (TREE_CODE (exp) != SSA_NAME)
1643         break;
1644
1645       stmt = SSA_NAME_DEF_STMT (exp);
1646       if (!is_gimple_assign (stmt))
1647         break;
1648
1649       code = gimple_assign_rhs_code (stmt);
1650       arg0 = gimple_assign_rhs1 (stmt);
1651       arg1 = gimple_assign_rhs2 (stmt);
1652       exp_type = TREE_TYPE (exp);
1653       loc = gimple_location (stmt);
1654       switch (code)
1655         {
1656         case BIT_NOT_EXPR:
1657           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1658             {
1659               in_p = !in_p;
1660               exp = arg0;
1661               continue;
1662             }
1663           break;
1664         case SSA_NAME:
1665           exp = arg0;
1666           continue;
1667         CASE_CONVERT:
1668           if (is_bool)
1669             goto do_default;
1670           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1671             {
1672               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1673                 is_bool = true;
1674               else
1675                 return;
1676             }
1677           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1678             is_bool = true;
1679           goto do_default;
1680         case EQ_EXPR:
1681         case NE_EXPR:
1682         case LT_EXPR:
1683         case LE_EXPR:
1684         case GE_EXPR:
1685         case GT_EXPR:
1686           is_bool = true;
1687           /* FALLTHRU */
1688         default:
1689           if (!is_bool)
1690             return;
1691         do_default:
1692           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1693                                   &low, &high, &in_p,
1694                                   &strict_overflow_p);
1695           if (nexp != NULL_TREE)
1696             {
1697               exp = nexp;
1698               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1699               continue;
1700             }
1701           break;
1702         }
1703       break;
1704     }
1705   if (is_bool)
1706     {
1707       r->exp = exp;
1708       r->in_p = in_p;
1709       r->low = low;
1710       r->high = high;
1711       r->strict_overflow_p = strict_overflow_p;
1712     }
1713 }
1714
1715 /* Comparison function for qsort.  Sort entries
1716    without SSA_NAME exp first, then with SSA_NAMEs sorted
1717    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1718    by increasing ->low and if ->low is the same, by increasing
1719    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1720    maximum.  */
1721
1722 static int
1723 range_entry_cmp (const void *a, const void *b)
1724 {
1725   const struct range_entry *p = (const struct range_entry *) a;
1726   const struct range_entry *q = (const struct range_entry *) b;
1727
1728   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1729     {
1730       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1731         {
1732           /* Group range_entries for the same SSA_NAME together.  */
1733           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1734             return -1;
1735           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1736             return 1;
1737           /* If ->low is different, NULL low goes first, then by
1738              ascending low.  */
1739           if (p->low != NULL_TREE)
1740             {
1741               if (q->low != NULL_TREE)
1742                 {
1743                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1744                                           p->low, q->low);
1745                   if (tem && integer_onep (tem))
1746                     return -1;
1747                   tem = fold_binary (GT_EXPR, boolean_type_node,
1748                                      p->low, q->low);
1749                   if (tem && integer_onep (tem))
1750                     return 1;
1751                 }
1752               else
1753                 return 1;
1754             }
1755           else if (q->low != NULL_TREE)
1756             return -1;
1757           /* If ->high is different, NULL high goes last, before that by
1758              ascending high.  */
1759           if (p->high != NULL_TREE)
1760             {
1761               if (q->high != NULL_TREE)
1762                 {
1763                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1764                                           p->high, q->high);
1765                   if (tem && integer_onep (tem))
1766                     return -1;
1767                   tem = fold_binary (GT_EXPR, boolean_type_node,
1768                                      p->high, q->high);
1769                   if (tem && integer_onep (tem))
1770                     return 1;
1771                 }
1772               else
1773                 return -1;
1774             }
1775           else if (p->high != NULL_TREE)
1776             return 1;
1777           /* If both ranges are the same, sort below by ascending idx.  */
1778         }
1779       else
1780         return 1;
1781     }
1782   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1783     return -1;
1784
1785   if (p->idx < q->idx)
1786     return -1;
1787   else
1788     {
1789       gcc_checking_assert (p->idx > q->idx);
1790       return 1;
1791     }
1792 }
1793
1794 /* Helper routine of optimize_range_test.
1795    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1796    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1797    OPCODE and OPS are arguments of optimize_range_tests.  Return
1798    true if the range merge has been successful.  */
1799
1800 static bool
1801 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1802                    unsigned int count, enum tree_code opcode,
1803                    VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1804                    tree low, tree high, bool strict_overflow_p)
1805 {
1806   tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1807   location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1808   tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1809   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1810   gimple_stmt_iterator gsi;
1811
1812   if (tem == NULL_TREE)
1813     return false;
1814
1815   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1816     warning_at (loc, OPT_Wstrict_overflow,
1817                 "assuming signed overflow does not occur "
1818                 "when simplifying range test");
1819
1820   if (dump_file && (dump_flags & TDF_DETAILS))
1821     {
1822       struct range_entry *r;
1823       fprintf (dump_file, "Optimizing range tests ");
1824       print_generic_expr (dump_file, range->exp, 0);
1825       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1826       print_generic_expr (dump_file, range->low, 0);
1827       fprintf (dump_file, ", ");
1828       print_generic_expr (dump_file, range->high, 0);
1829       fprintf (dump_file, "]");
1830       for (r = otherrange; r < otherrange + count; r++)
1831         {
1832           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1833           print_generic_expr (dump_file, r->low, 0);
1834           fprintf (dump_file, ", ");
1835           print_generic_expr (dump_file, r->high, 0);
1836           fprintf (dump_file, "]");
1837         }
1838       fprintf (dump_file, "\n into ");
1839       print_generic_expr (dump_file, tem, 0);
1840       fprintf (dump_file, "\n");
1841     }
1842
1843   if (opcode == BIT_IOR_EXPR)
1844     tem = invert_truthvalue_loc (loc, tem);
1845
1846   tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1847   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1848   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1849                                   GSI_SAME_STMT);
1850
1851   VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1852   range->exp = exp;
1853   range->low = low;
1854   range->high = high;
1855   range->in_p = in_p;
1856   range->strict_overflow_p = false;
1857
1858   for (range = otherrange; range < otherrange + count; range++)
1859     {
1860       VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1861       range->exp = NULL_TREE;
1862     }
1863   return true;
1864 }
1865
1866 /* Optimize range tests, similarly how fold_range_test optimizes
1867    it on trees.  The tree code for the binary
1868    operation between all the operands is OPCODE.  */
1869
1870 static void
1871 optimize_range_tests (enum tree_code opcode,
1872                       VEC (operand_entry_t, heap) **ops)
1873 {
1874   unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
1875   operand_entry_t oe;
1876   struct range_entry *ranges;
1877   bool any_changes = false;
1878
1879   if (length == 1)
1880     return;
1881
1882   ranges = XNEWVEC (struct range_entry, length);
1883   for (i = 0; i < length; i++)
1884     {
1885       ranges[i].idx = i;
1886       init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
1887       /* For | invert it now, we will invert it again before emitting
1888          the optimized expression.  */
1889       if (opcode == BIT_IOR_EXPR)
1890         ranges[i].in_p = !ranges[i].in_p;
1891     }
1892
1893   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
1894   for (i = 0; i < length; i++)
1895     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
1896       break;
1897
1898   /* Try to merge ranges.  */
1899   for (first = i; i < length; i++)
1900     {
1901       tree low = ranges[i].low;
1902       tree high = ranges[i].high;
1903       int in_p = ranges[i].in_p;
1904       bool strict_overflow_p = ranges[i].strict_overflow_p;
1905       int update_fail_count = 0;
1906
1907       for (j = i + 1; j < length; j++)
1908         {
1909           if (ranges[i].exp != ranges[j].exp)
1910             break;
1911           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
1912                              ranges[j].in_p, ranges[j].low, ranges[j].high))
1913             break;
1914           strict_overflow_p |= ranges[j].strict_overflow_p;
1915         }
1916
1917       if (j == i + 1)
1918         continue;
1919
1920       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
1921                              ops, ranges[i].exp, in_p, low, high,
1922                              strict_overflow_p))
1923         {
1924           i = j - 1;
1925           any_changes = true;
1926         }
1927       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
1928          while update_range_test would fail.  */
1929       else if (update_fail_count == 64)
1930         i = j - 1;
1931       else
1932         ++update_fail_count;
1933     }
1934
1935   /* Optimize X == CST1 || X == CST2
1936      if popcount (CST1 ^ CST2) == 1 into
1937      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
1938      Similarly for ranges.  E.g.
1939      X != 2 && X != 3 && X != 10 && X != 11
1940      will be transformed by the above loop into
1941      (X - 2U) <= 1U && (X - 10U) <= 1U
1942      and this loop can transform that into
1943      ((X & ~8) - 2U) <= 1U.  */
1944   for (i = first; i < length; i++)
1945     {
1946       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
1947
1948       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
1949         continue;
1950       type = TREE_TYPE (ranges[i].exp);
1951       if (!INTEGRAL_TYPE_P (type))
1952         continue;
1953       lowi = ranges[i].low;
1954       if (lowi == NULL_TREE)
1955         lowi = TYPE_MIN_VALUE (type);
1956       highi = ranges[i].high;
1957       if (highi == NULL_TREE)
1958         continue;
1959       for (j = i + 1; j < length && j < i + 64; j++)
1960         {
1961           if (ranges[j].exp == NULL_TREE)
1962             continue;
1963           if (ranges[i].exp != ranges[j].exp)
1964             break;
1965           if (ranges[j].in_p)
1966             continue;
1967           lowj = ranges[j].low;
1968           if (lowj == NULL_TREE)
1969             continue;
1970           highj = ranges[j].high;
1971           if (highj == NULL_TREE)
1972             highj = TYPE_MAX_VALUE (type);
1973           tem = fold_binary (GT_EXPR, boolean_type_node,
1974                              lowj, highi);
1975           if (tem == NULL_TREE || !integer_onep (tem))
1976             continue;
1977           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
1978           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
1979             continue;
1980           gcc_checking_assert (!integer_zerop (lowxor));
1981           tem = fold_binary (MINUS_EXPR, type, lowxor,
1982                              build_int_cst (type, 1));
1983           if (tem == NULL_TREE)
1984             continue;
1985           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
1986           if (tem == NULL_TREE || !integer_zerop (tem))
1987             continue;
1988           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
1989           if (!tree_int_cst_equal (lowxor, highxor))
1990             continue;
1991           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
1992           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
1993           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
1994           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
1995           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
1996                                  ranges[i].in_p, lowj, highj,
1997                                  ranges[i].strict_overflow_p
1998                                  || ranges[j].strict_overflow_p))
1999             {
2000               any_changes = true;
2001               break;
2002             }
2003         }
2004     }
2005
2006   if (any_changes)
2007     {
2008       j = 0;
2009       FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2010         {
2011           if (oe->op == error_mark_node)
2012             continue;
2013           else if (i != j)
2014             VEC_replace (operand_entry_t, *ops, j, oe);
2015           j++;
2016         }
2017       VEC_truncate (operand_entry_t, *ops, j);
2018     }
2019
2020   XDELETEVEC (ranges);
2021 }
2022
2023 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2024    of STMT in it's operands.  This is also known as a "destructive
2025    update" operation.  */
2026
2027 static bool
2028 is_phi_for_stmt (gimple stmt, tree operand)
2029 {
2030   gimple def_stmt;
2031   tree lhs;
2032   use_operand_p arg_p;
2033   ssa_op_iter i;
2034
2035   if (TREE_CODE (operand) != SSA_NAME)
2036     return false;
2037
2038   lhs = gimple_assign_lhs (stmt);
2039
2040   def_stmt = SSA_NAME_DEF_STMT (operand);
2041   if (gimple_code (def_stmt) != GIMPLE_PHI)
2042     return false;
2043
2044   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2045     if (lhs == USE_FROM_PTR (arg_p))
2046       return true;
2047   return false;
2048 }
2049
2050 /* Remove def stmt of VAR if VAR has zero uses and recurse
2051    on rhs1 operand if so.  */
2052
2053 static void
2054 remove_visited_stmt_chain (tree var)
2055 {
2056   gimple stmt;
2057   gimple_stmt_iterator gsi;
2058
2059   while (1)
2060     {
2061       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2062         return;
2063       stmt = SSA_NAME_DEF_STMT (var);
2064       if (!is_gimple_assign (stmt)
2065           || !gimple_visited_p (stmt))
2066         return;
2067       var = gimple_assign_rhs1 (stmt);
2068       gsi = gsi_for_stmt (stmt);
2069       gsi_remove (&gsi, true);
2070       release_defs (stmt);
2071     }
2072 }
2073
2074 /* This function checks three consequtive operands in
2075    passed operands vector OPS starting from OPINDEX and
2076    swaps two operands if it is profitable for binary operation
2077    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2078
2079    We pair ops with the same rank if possible.
2080
2081    The alternative we try is to see if STMT is a destructive
2082    update style statement, which is like:
2083    b = phi (a, ...)
2084    a = c + b;
2085    In that case, we want to use the destructive update form to
2086    expose the possible vectorizer sum reduction opportunity.
2087    In that case, the third operand will be the phi node. This
2088    check is not performed if STMT is null.
2089
2090    We could, of course, try to be better as noted above, and do a
2091    lot of work to try to find these opportunities in >3 operand
2092    cases, but it is unlikely to be worth it.  */
2093
2094 static void
2095 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2096                           unsigned int opindex, gimple stmt)
2097 {
2098   operand_entry_t oe1, oe2, oe3;
2099
2100   oe1 = VEC_index (operand_entry_t, ops, opindex);
2101   oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2102   oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2103
2104   if ((oe1->rank == oe2->rank
2105        && oe2->rank != oe3->rank)
2106       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2107           && !is_phi_for_stmt (stmt, oe1->op)
2108           && !is_phi_for_stmt (stmt, oe2->op)))
2109     {
2110       struct operand_entry temp = *oe3;
2111       oe3->op = oe1->op;
2112       oe3->rank = oe1->rank;
2113       oe1->op = temp.op;
2114       oe1->rank= temp.rank;
2115     }
2116   else if ((oe1->rank == oe3->rank
2117             && oe2->rank != oe3->rank)
2118            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2119                && !is_phi_for_stmt (stmt, oe1->op)
2120                && !is_phi_for_stmt (stmt, oe3->op)))
2121     {
2122       struct operand_entry temp = *oe2;
2123       oe2->op = oe1->op;
2124       oe2->rank = oe1->rank;
2125       oe1->op = temp.op;
2126       oe1->rank= temp.rank;
2127     }
2128 }
2129
2130 /* Recursively rewrite our linearized statements so that the operators
2131    match those in OPS[OPINDEX], putting the computation in rank
2132    order.  */
2133
2134 static void
2135 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2136                    VEC(operand_entry_t, heap) * ops, bool moved)
2137 {
2138   tree rhs1 = gimple_assign_rhs1 (stmt);
2139   tree rhs2 = gimple_assign_rhs2 (stmt);
2140   operand_entry_t oe;
2141
2142   /* If we have three operands left, then we want to make sure the ones
2143      that get the double binary op are chosen wisely.  */
2144   if (opindex + 3 == VEC_length (operand_entry_t, ops))
2145     swap_ops_for_binary_stmt (ops, opindex, stmt);
2146
2147   /* The final recursion case for this function is that you have
2148      exactly two operations left.
2149      If we had one exactly one op in the entire list to start with, we
2150      would have never called this function, and the tail recursion
2151      rewrites them one at a time.  */
2152   if (opindex + 2 == VEC_length (operand_entry_t, ops))
2153     {
2154       operand_entry_t oe1, oe2;
2155
2156       oe1 = VEC_index (operand_entry_t, ops, opindex);
2157       oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2158
2159       if (rhs1 != oe1->op || rhs2 != oe2->op)
2160         {
2161           if (dump_file && (dump_flags & TDF_DETAILS))
2162             {
2163               fprintf (dump_file, "Transforming ");
2164               print_gimple_stmt (dump_file, stmt, 0, 0);
2165             }
2166
2167           gimple_assign_set_rhs1 (stmt, oe1->op);
2168           gimple_assign_set_rhs2 (stmt, oe2->op);
2169           update_stmt (stmt);
2170           if (rhs1 != oe1->op && rhs1 != oe2->op)
2171             remove_visited_stmt_chain (rhs1);
2172
2173           if (dump_file && (dump_flags & TDF_DETAILS))
2174             {
2175               fprintf (dump_file, " into ");
2176               print_gimple_stmt (dump_file, stmt, 0, 0);
2177             }
2178
2179         }
2180       return;
2181     }
2182
2183   /* If we hit here, we should have 3 or more ops left.  */
2184   gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2185
2186   /* Rewrite the next operator.  */
2187   oe = VEC_index (operand_entry_t, ops, opindex);
2188
2189   if (oe->op != rhs2)
2190     {
2191       if (!moved)
2192         {
2193           gimple_stmt_iterator gsinow, gsirhs1;
2194           gimple stmt1 = stmt, stmt2;
2195           unsigned int count;
2196
2197           gsinow = gsi_for_stmt (stmt);
2198           count = VEC_length (operand_entry_t, ops) - opindex - 2;
2199           while (count-- != 0)
2200             {
2201               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2202               gsirhs1 = gsi_for_stmt (stmt2);
2203               gsi_move_before (&gsirhs1, &gsinow);
2204               gsi_prev (&gsinow);
2205               stmt1 = stmt2;
2206             }
2207           moved = true;
2208         }
2209
2210       if (dump_file && (dump_flags & TDF_DETAILS))
2211         {
2212           fprintf (dump_file, "Transforming ");
2213           print_gimple_stmt (dump_file, stmt, 0, 0);
2214         }
2215
2216       gimple_assign_set_rhs2 (stmt, oe->op);
2217       update_stmt (stmt);
2218
2219       if (dump_file && (dump_flags & TDF_DETAILS))
2220         {
2221           fprintf (dump_file, " into ");
2222           print_gimple_stmt (dump_file, stmt, 0, 0);
2223         }
2224     }
2225   /* Recurse on the LHS of the binary operator, which is guaranteed to
2226      be the non-leaf side.  */
2227   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2228 }
2229
2230 /* Find out how many cycles we need to compute statements chain.
2231    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2232    maximum number of independent statements we may execute per cycle.  */
2233
2234 static int
2235 get_required_cycles (int ops_num, int cpu_width)
2236 {
2237   int res;
2238   int elog;
2239   unsigned int rest;
2240
2241   /* While we have more than 2 * cpu_width operands
2242      we may reduce number of operands by cpu_width
2243      per cycle.  */
2244   res = ops_num / (2 * cpu_width);
2245
2246   /* Remained operands count may be reduced twice per cycle
2247      until we have only one operand.  */
2248   rest = (unsigned)(ops_num - res * cpu_width);
2249   elog = exact_log2 (rest);
2250   if (elog >= 0)
2251     res += elog;
2252   else
2253     res += floor_log2 (rest) + 1;
2254
2255   return res;
2256 }
2257
2258 /* Returns an optimal number of registers to use for computation of
2259    given statements.  */
2260
2261 static int
2262 get_reassociation_width (int ops_num, enum tree_code opc,
2263                          enum machine_mode mode)
2264 {
2265   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2266   int width;
2267   int width_min;
2268   int cycles_best;
2269
2270   if (param_width > 0)
2271     width = param_width;
2272   else
2273     width = targetm.sched.reassociation_width (opc, mode);
2274
2275   if (width == 1)
2276     return width;
2277
2278   /* Get the minimal time required for sequence computation.  */
2279   cycles_best = get_required_cycles (ops_num, width);
2280
2281   /* Check if we may use less width and still compute sequence for
2282      the same time.  It will allow us to reduce registers usage.
2283      get_required_cycles is monotonically increasing with lower width
2284      so we can perform a binary search for the minimal width that still
2285      results in the optimal cycle count.  */
2286   width_min = 1;
2287   while (width > width_min)
2288     {
2289       int width_mid = (width + width_min) / 2;
2290
2291       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2292         width = width_mid;
2293       else if (width_min < width_mid)
2294         width_min = width_mid;
2295       else
2296         break;
2297     }
2298
2299   return width;
2300 }
2301
2302 /* Recursively rewrite our linearized statements so that the operators
2303    match those in OPS[OPINDEX], putting the computation in rank
2304    order and trying to allow operations to be executed in
2305    parallel.  */
2306
2307 static void
2308 rewrite_expr_tree_parallel (gimple stmt, int width,
2309                             VEC(operand_entry_t, heap) * ops)
2310 {
2311   enum tree_code opcode = gimple_assign_rhs_code (stmt);
2312   int op_num = VEC_length (operand_entry_t, ops);
2313   int stmt_num = op_num - 1;
2314   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2315   int op_index = op_num - 1;
2316   int stmt_index = 0;
2317   int ready_stmts_end = 0;
2318   int i = 0;
2319   tree last_rhs1 = gimple_assign_rhs1 (stmt);
2320   tree lhs_var;
2321
2322   /* We start expression rewriting from the top statements.
2323      So, in this loop we create a full list of statements
2324      we will work with.  */
2325   stmts[stmt_num - 1] = stmt;
2326   for (i = stmt_num - 2; i >= 0; i--)
2327     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2328
2329   lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2330   add_referenced_var (lhs_var);
2331
2332   for (i = 0; i < stmt_num; i++)
2333     {
2334       tree op1, op2;
2335
2336       /* Determine whether we should use results of
2337          already handled statements or not.  */
2338       if (ready_stmts_end == 0
2339           && (i - stmt_index >= width || op_index < 1))
2340         ready_stmts_end = i;
2341
2342       /* Now we choose operands for the next statement.  Non zero
2343          value in ready_stmts_end means here that we should use
2344          the result of already generated statements as new operand.  */
2345       if (ready_stmts_end > 0)
2346         {
2347           op1 = gimple_assign_lhs (stmts[stmt_index++]);
2348           if (ready_stmts_end > stmt_index)
2349             op2 = gimple_assign_lhs (stmts[stmt_index++]);
2350           else if (op_index >= 0)
2351             op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2352           else
2353             {
2354               gcc_assert (stmt_index < i);
2355               op2 = gimple_assign_lhs (stmts[stmt_index++]);
2356             }
2357
2358           if (stmt_index >= ready_stmts_end)
2359             ready_stmts_end = 0;
2360         }
2361       else
2362         {
2363           if (op_index > 1)
2364             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2365           op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2366           op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2367         }
2368
2369       /* If we emit the last statement then we should put
2370          operands into the last statement.  It will also
2371          break the loop.  */
2372       if (op_index < 0 && stmt_index == i)
2373         i = stmt_num - 1;
2374
2375       if (dump_file && (dump_flags & TDF_DETAILS))
2376         {
2377           fprintf (dump_file, "Transforming ");
2378           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2379         }
2380
2381       /* We keep original statement only for the last one.  All
2382          others are recreated.  */
2383       if (i == stmt_num - 1)
2384         {
2385           gimple_assign_set_rhs1 (stmts[i], op1);
2386           gimple_assign_set_rhs2 (stmts[i], op2);
2387           update_stmt (stmts[i]);
2388         }
2389       else
2390         stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2391
2392       if (dump_file && (dump_flags & TDF_DETAILS))
2393         {
2394           fprintf (dump_file, " into ");
2395           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2396         }
2397     }
2398
2399   remove_visited_stmt_chain (last_rhs1);
2400 }
2401
2402 /* Transform STMT, which is really (A +B) + (C + D) into the left
2403    linear form, ((A+B)+C)+D.
2404    Recurse on D if necessary.  */
2405
2406 static void
2407 linearize_expr (gimple stmt)
2408 {
2409   gimple_stmt_iterator gsinow, gsirhs;
2410   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2411   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2412   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2413   gimple newbinrhs = NULL;
2414   struct loop *loop = loop_containing_stmt (stmt);
2415
2416   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2417               && is_reassociable_op (binrhs, rhscode, loop));
2418
2419   gsinow = gsi_for_stmt (stmt);
2420   gsirhs = gsi_for_stmt (binrhs);
2421   gsi_move_before (&gsirhs, &gsinow);
2422
2423   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2424   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2425   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2426
2427   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2428     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2429
2430   if (dump_file && (dump_flags & TDF_DETAILS))
2431     {
2432       fprintf (dump_file, "Linearized: ");
2433       print_gimple_stmt (dump_file, stmt, 0, 0);
2434     }
2435
2436   reassociate_stats.linearized++;
2437   update_stmt (binrhs);
2438   update_stmt (binlhs);
2439   update_stmt (stmt);
2440
2441   gimple_set_visited (stmt, true);
2442   gimple_set_visited (binlhs, true);
2443   gimple_set_visited (binrhs, true);
2444
2445   /* Tail recurse on the new rhs if it still needs reassociation.  */
2446   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2447     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2448            want to change the algorithm while converting to tuples.  */
2449     linearize_expr (stmt);
2450 }
2451
2452 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2453    it.  Otherwise, return NULL.  */
2454
2455 static gimple
2456 get_single_immediate_use (tree lhs)
2457 {
2458   use_operand_p immuse;
2459   gimple immusestmt;
2460
2461   if (TREE_CODE (lhs) == SSA_NAME
2462       && single_imm_use (lhs, &immuse, &immusestmt)
2463       && is_gimple_assign (immusestmt))
2464     return immusestmt;
2465
2466   return NULL;
2467 }
2468
2469 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2470    representing the negated value.  Insertions of any necessary
2471    instructions go before GSI.
2472    This function is recursive in that, if you hand it "a_5" as the
2473    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2474    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
2475
2476 static tree
2477 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2478 {
2479   gimple negatedefstmt= NULL;
2480   tree resultofnegate;
2481
2482   /* If we are trying to negate a name, defined by an add, negate the
2483      add operands instead.  */
2484   if (TREE_CODE (tonegate) == SSA_NAME)
2485     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2486   if (TREE_CODE (tonegate) == SSA_NAME
2487       && is_gimple_assign (negatedefstmt)
2488       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2489       && has_single_use (gimple_assign_lhs (negatedefstmt))
2490       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2491     {
2492       gimple_stmt_iterator gsi;
2493       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2494       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2495
2496       gsi = gsi_for_stmt (negatedefstmt);
2497       rhs1 = negate_value (rhs1, &gsi);
2498       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2499
2500       gsi = gsi_for_stmt (negatedefstmt);
2501       rhs2 = negate_value (rhs2, &gsi);
2502       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2503
2504       update_stmt (negatedefstmt);
2505       return gimple_assign_lhs (negatedefstmt);
2506     }
2507
2508   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2509   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2510                                              NULL_TREE, true, GSI_SAME_STMT);
2511   return resultofnegate;
2512 }
2513
2514 /* Return true if we should break up the subtract in STMT into an add
2515    with negate.  This is true when we the subtract operands are really
2516    adds, or the subtract itself is used in an add expression.  In
2517    either case, breaking up the subtract into an add with negate
2518    exposes the adds to reassociation.  */
2519
2520 static bool
2521 should_break_up_subtract (gimple stmt)
2522 {
2523   tree lhs = gimple_assign_lhs (stmt);
2524   tree binlhs = gimple_assign_rhs1 (stmt);
2525   tree binrhs = gimple_assign_rhs2 (stmt);
2526   gimple immusestmt;
2527   struct loop *loop = loop_containing_stmt (stmt);
2528
2529   if (TREE_CODE (binlhs) == SSA_NAME
2530       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2531     return true;
2532
2533   if (TREE_CODE (binrhs) == SSA_NAME
2534       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2535     return true;
2536
2537   if (TREE_CODE (lhs) == SSA_NAME
2538       && (immusestmt = get_single_immediate_use (lhs))
2539       && is_gimple_assign (immusestmt)
2540       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2541           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2542     return true;
2543   return false;
2544 }
2545
2546 /* Transform STMT from A - B into A + -B.  */
2547
2548 static void
2549 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2550 {
2551   tree rhs1 = gimple_assign_rhs1 (stmt);
2552   tree rhs2 = gimple_assign_rhs2 (stmt);
2553
2554   if (dump_file && (dump_flags & TDF_DETAILS))
2555     {
2556       fprintf (dump_file, "Breaking up subtract ");
2557       print_gimple_stmt (dump_file, stmt, 0, 0);
2558     }
2559
2560   rhs2 = negate_value (rhs2, gsip);
2561   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2562   update_stmt (stmt);
2563 }
2564
2565 /* Recursively linearize a binary expression that is the RHS of STMT.
2566    Place the operands of the expression tree in the vector named OPS.  */
2567
2568 static void
2569 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2570                      bool is_associative, bool set_visited)
2571 {
2572   tree binlhs = gimple_assign_rhs1 (stmt);
2573   tree binrhs = gimple_assign_rhs2 (stmt);
2574   gimple binlhsdef, binrhsdef;
2575   bool binlhsisreassoc = false;
2576   bool binrhsisreassoc = false;
2577   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2578   struct loop *loop = loop_containing_stmt (stmt);
2579
2580   if (set_visited)
2581     gimple_set_visited (stmt, true);
2582
2583   if (TREE_CODE (binlhs) == SSA_NAME)
2584     {
2585       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2586       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2587                          && !stmt_could_throw_p (binlhsdef));
2588     }
2589
2590   if (TREE_CODE (binrhs) == SSA_NAME)
2591     {
2592       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2593       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2594                          && !stmt_could_throw_p (binrhsdef));
2595     }
2596
2597   /* If the LHS is not reassociable, but the RHS is, we need to swap
2598      them.  If neither is reassociable, there is nothing we can do, so
2599      just put them in the ops vector.  If the LHS is reassociable,
2600      linearize it.  If both are reassociable, then linearize the RHS
2601      and the LHS.  */
2602
2603   if (!binlhsisreassoc)
2604     {
2605       tree temp;
2606
2607       /* If this is not a associative operation like division, give up.  */
2608       if (!is_associative)
2609         {
2610           add_to_ops_vec (ops, binrhs);
2611           return;
2612         }
2613
2614       if (!binrhsisreassoc)
2615         {
2616           add_to_ops_vec (ops, binrhs);
2617           add_to_ops_vec (ops, binlhs);
2618           return;
2619         }
2620
2621       if (dump_file && (dump_flags & TDF_DETAILS))
2622         {
2623           fprintf (dump_file, "swapping operands of ");
2624           print_gimple_stmt (dump_file, stmt, 0, 0);
2625         }
2626
2627       swap_tree_operands (stmt,
2628                           gimple_assign_rhs1_ptr (stmt),
2629                           gimple_assign_rhs2_ptr (stmt));
2630       update_stmt (stmt);
2631
2632       if (dump_file && (dump_flags & TDF_DETAILS))
2633         {
2634           fprintf (dump_file, " is now ");
2635           print_gimple_stmt (dump_file, stmt, 0, 0);
2636         }
2637
2638       /* We want to make it so the lhs is always the reassociative op,
2639          so swap.  */
2640       temp = binlhs;
2641       binlhs = binrhs;
2642       binrhs = temp;
2643     }
2644   else if (binrhsisreassoc)
2645     {
2646       linearize_expr (stmt);
2647       binlhs = gimple_assign_rhs1 (stmt);
2648       binrhs = gimple_assign_rhs2 (stmt);
2649     }
2650
2651   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2652               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2653                                       rhscode, loop));
2654   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2655                        is_associative, set_visited);
2656   add_to_ops_vec (ops, binrhs);
2657 }
2658
2659 /* Repropagate the negates back into subtracts, since no other pass
2660    currently does it.  */
2661
2662 static void
2663 repropagate_negates (void)
2664 {
2665   unsigned int i = 0;
2666   tree negate;
2667
2668   FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2669     {
2670       gimple user = get_single_immediate_use (negate);
2671
2672       if (!user || !is_gimple_assign (user))
2673         continue;
2674
2675       /* The negate operand can be either operand of a PLUS_EXPR
2676          (it can be the LHS if the RHS is a constant for example).
2677
2678          Force the negate operand to the RHS of the PLUS_EXPR, then
2679          transform the PLUS_EXPR into a MINUS_EXPR.  */
2680       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2681         {
2682           /* If the negated operand appears on the LHS of the
2683              PLUS_EXPR, exchange the operands of the PLUS_EXPR
2684              to force the negated operand to the RHS of the PLUS_EXPR.  */
2685           if (gimple_assign_rhs1 (user) == negate)
2686             {
2687               swap_tree_operands (user,
2688                                   gimple_assign_rhs1_ptr (user),
2689                                   gimple_assign_rhs2_ptr (user));
2690             }
2691
2692           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2693              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
2694           if (gimple_assign_rhs2 (user) == negate)
2695             {
2696               tree rhs1 = gimple_assign_rhs1 (user);
2697               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2698               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2699               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2700               update_stmt (user);
2701             }
2702         }
2703       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2704         {
2705           if (gimple_assign_rhs1 (user) == negate)
2706             {
2707               /* We have
2708                    x = -a
2709                    y = x - b
2710                  which we transform into
2711                    x = a + b
2712                    y = -x .
2713                  This pushes down the negate which we possibly can merge
2714                  into some other operation, hence insert it into the
2715                  plus_negates vector.  */
2716               gimple feed = SSA_NAME_DEF_STMT (negate);
2717               tree a = gimple_assign_rhs1 (feed);
2718               tree rhs2 = gimple_assign_rhs2 (user);
2719               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2720               gimple_replace_lhs (feed, negate);
2721               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2722               update_stmt (gsi_stmt (gsi));
2723               gsi2 = gsi_for_stmt (user);
2724               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2725               update_stmt (gsi_stmt (gsi2));
2726               gsi_move_before (&gsi, &gsi2);
2727               VEC_safe_push (tree, heap, plus_negates,
2728                              gimple_assign_lhs (gsi_stmt (gsi2)));
2729             }
2730           else
2731             {
2732               /* Transform "x = -a; y = b - x" into "y = b + a", getting
2733                  rid of one operation.  */
2734               gimple feed = SSA_NAME_DEF_STMT (negate);
2735               tree a = gimple_assign_rhs1 (feed);
2736               tree rhs1 = gimple_assign_rhs1 (user);
2737               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2738               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2739               update_stmt (gsi_stmt (gsi));
2740             }
2741         }
2742     }
2743 }
2744
2745 /* Returns true if OP is of a type for which we can do reassociation.
2746    That is for integral or non-saturating fixed-point types, and for
2747    floating point type when associative-math is enabled.  */
2748
2749 static bool
2750 can_reassociate_p (tree op)
2751 {
2752   tree type = TREE_TYPE (op);
2753   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2754       || NON_SAT_FIXED_POINT_TYPE_P (type)
2755       || (flag_associative_math && FLOAT_TYPE_P (type)))
2756     return true;
2757   return false;
2758 }
2759
2760 /* Break up subtract operations in block BB.
2761
2762    We do this top down because we don't know whether the subtract is
2763    part of a possible chain of reassociation except at the top.
2764
2765    IE given
2766    d = f + g
2767    c = a + e
2768    b = c - d
2769    q = b - r
2770    k = t - q
2771
2772    we want to break up k = t - q, but we won't until we've transformed q
2773    = b - r, which won't be broken up until we transform b = c - d.
2774
2775    En passant, clear the GIMPLE visited flag on every statement.  */
2776
2777 static void
2778 break_up_subtract_bb (basic_block bb)
2779 {
2780   gimple_stmt_iterator gsi;
2781   basic_block son;
2782
2783   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2784     {
2785       gimple stmt = gsi_stmt (gsi);
2786       gimple_set_visited (stmt, false);
2787
2788       if (!is_gimple_assign (stmt)
2789           || !can_reassociate_p (gimple_assign_lhs (stmt)))
2790         continue;
2791
2792       /* Look for simple gimple subtract operations.  */
2793       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
2794         {
2795           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
2796               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
2797             continue;
2798
2799           /* Check for a subtract used only in an addition.  If this
2800              is the case, transform it into add of a negate for better
2801              reassociation.  IE transform C = A-B into C = A + -B if C
2802              is only used in an addition.  */
2803           if (should_break_up_subtract (stmt))
2804             break_up_subtract (stmt, &gsi);
2805         }
2806       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
2807                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
2808         VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
2809     }
2810   for (son = first_dom_son (CDI_DOMINATORS, bb);
2811        son;
2812        son = next_dom_son (CDI_DOMINATORS, son))
2813     break_up_subtract_bb (son);
2814 }
2815
2816 /* Reassociate expressions in basic block BB and its post-dominator as
2817    children.  */
2818
2819 static void
2820 reassociate_bb (basic_block bb)
2821 {
2822   gimple_stmt_iterator gsi;
2823   basic_block son;
2824
2825   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2826     {
2827       gimple stmt = gsi_stmt (gsi);
2828
2829       if (is_gimple_assign (stmt)
2830           && !stmt_could_throw_p (stmt))
2831         {
2832           tree lhs, rhs1, rhs2;
2833           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2834
2835           /* If this is not a gimple binary expression, there is
2836              nothing for us to do with it.  */
2837           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
2838             continue;
2839
2840           /* If this was part of an already processed statement,
2841              we don't need to touch it again. */
2842           if (gimple_visited_p (stmt))
2843             {
2844               /* This statement might have become dead because of previous
2845                  reassociations.  */
2846               if (has_zero_uses (gimple_get_lhs (stmt)))
2847                 {
2848                   gsi_remove (&gsi, true);
2849                   release_defs (stmt);
2850                   /* We might end up removing the last stmt above which
2851                      places the iterator to the end of the sequence.
2852                      Reset it to the last stmt in this case which might
2853                      be the end of the sequence as well if we removed
2854                      the last statement of the sequence.  In which case
2855                      we need to bail out.  */
2856                   if (gsi_end_p (gsi))
2857                     {
2858                       gsi = gsi_last_bb (bb);
2859                       if (gsi_end_p (gsi))
2860                         break;
2861                     }
2862                 }
2863               continue;
2864             }
2865
2866           lhs = gimple_assign_lhs (stmt);
2867           rhs1 = gimple_assign_rhs1 (stmt);
2868           rhs2 = gimple_assign_rhs2 (stmt);
2869
2870           /* For non-bit or min/max operations we can't associate
2871              all types.  Verify that here.  */
2872           if (rhs_code != BIT_IOR_EXPR
2873               && rhs_code != BIT_AND_EXPR
2874               && rhs_code != BIT_XOR_EXPR
2875               && rhs_code != MIN_EXPR
2876               && rhs_code != MAX_EXPR
2877               && (!can_reassociate_p (lhs)
2878                   || !can_reassociate_p (rhs1)
2879                   || !can_reassociate_p (rhs2)))
2880             continue;
2881
2882           if (associative_tree_code (rhs_code))
2883             {
2884               VEC(operand_entry_t, heap) *ops = NULL;
2885
2886               /* There may be no immediate uses left by the time we
2887                  get here because we may have eliminated them all.  */
2888               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
2889                 continue;
2890
2891               gimple_set_visited (stmt, true);
2892               linearize_expr_tree (&ops, stmt, true, true);
2893               VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2894               optimize_ops_list (rhs_code, &ops);
2895               if (undistribute_ops_list (rhs_code, &ops,
2896                                          loop_containing_stmt (stmt)))
2897                 {
2898                   VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2899                   optimize_ops_list (rhs_code, &ops);
2900                 }
2901
2902               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
2903                 optimize_range_tests (rhs_code, &ops);
2904
2905               if (VEC_length (operand_entry_t, ops) == 1)
2906                 {
2907                   if (dump_file && (dump_flags & TDF_DETAILS))
2908                     {
2909                       fprintf (dump_file, "Transforming ");
2910                       print_gimple_stmt (dump_file, stmt, 0, 0);
2911                     }
2912
2913                   rhs1 = gimple_assign_rhs1 (stmt);
2914                   gimple_assign_set_rhs_from_tree (&gsi,
2915                                                    VEC_last (operand_entry_t,
2916                                                              ops)->op);
2917                   update_stmt (stmt);
2918                   remove_visited_stmt_chain (rhs1);
2919
2920                   if (dump_file && (dump_flags & TDF_DETAILS))
2921                     {
2922                       fprintf (dump_file, " into ");
2923                       print_gimple_stmt (dump_file, stmt, 0, 0);
2924                     }
2925                 }
2926               else
2927                 {
2928                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2929                   int ops_num = VEC_length (operand_entry_t, ops);
2930                   int width = get_reassociation_width (ops_num, rhs_code, mode);
2931
2932                   if (dump_file && (dump_flags & TDF_DETAILS))
2933                     fprintf (dump_file,
2934                              "Width = %d was chosen for reassociation\n", width);
2935
2936                   if (width > 1
2937                       && VEC_length (operand_entry_t, ops) > 3)
2938                     rewrite_expr_tree_parallel (stmt, width, ops);
2939                   else
2940                     rewrite_expr_tree (stmt, 0, ops, false);
2941                 }
2942
2943               VEC_free (operand_entry_t, heap, ops);
2944             }
2945         }
2946     }
2947   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
2948        son;
2949        son = next_dom_son (CDI_POST_DOMINATORS, son))
2950     reassociate_bb (son);
2951 }
2952
2953 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2954 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2955
2956 /* Dump the operand entry vector OPS to FILE.  */
2957
2958 void
2959 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2960 {
2961   operand_entry_t oe;
2962   unsigned int i;
2963
2964   FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
2965     {
2966       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2967       print_generic_expr (file, oe->op, 0);
2968     }
2969 }
2970
2971 /* Dump the operand entry vector OPS to STDERR.  */
2972
2973 DEBUG_FUNCTION void
2974 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2975 {
2976   dump_ops_vector (stderr, ops);
2977 }
2978
2979 static void
2980 do_reassoc (void)
2981 {
2982   break_up_subtract_bb (ENTRY_BLOCK_PTR);
2983   reassociate_bb (EXIT_BLOCK_PTR);
2984 }
2985
2986 /* Initialize the reassociation pass.  */
2987
2988 static void
2989 init_reassoc (void)
2990 {
2991   int i;
2992   long rank = 2;
2993   tree param;
2994   int *bbs = XNEWVEC (int, last_basic_block + 1);
2995
2996   /* Find the loops, so that we can prevent moving calculations in
2997      them.  */
2998   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2999
3000   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3001
3002   operand_entry_pool = create_alloc_pool ("operand entry pool",
3003                                           sizeof (struct operand_entry), 30);
3004   next_operand_entry_id = 0;
3005
3006   /* Reverse RPO (Reverse Post Order) will give us something where
3007      deeper loops come later.  */
3008   pre_and_rev_post_order_compute (NULL, bbs, false);
3009   bb_rank = XCNEWVEC (long, last_basic_block + 1);
3010   operand_rank = pointer_map_create ();
3011
3012   /* Give each argument a distinct rank.   */
3013   for (param = DECL_ARGUMENTS (current_function_decl);
3014        param;
3015        param = DECL_CHAIN (param))
3016     {
3017       if (gimple_default_def (cfun, param) != NULL)
3018         {
3019           tree def = gimple_default_def (cfun, param);
3020           insert_operand_rank (def, ++rank);
3021         }
3022     }
3023
3024   /* Give the chain decl a distinct rank. */
3025   if (cfun->static_chain_decl != NULL)
3026     {
3027       tree def = gimple_default_def (cfun, cfun->static_chain_decl);
3028       if (def != NULL)
3029         insert_operand_rank (def, ++rank);
3030     }
3031
3032   /* Set up rank for each BB  */
3033   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3034     bb_rank[bbs[i]] = ++rank  << 16;
3035
3036   free (bbs);
3037   calculate_dominance_info (CDI_POST_DOMINATORS);
3038   plus_negates = NULL;
3039 }
3040
3041 /* Cleanup after the reassociation pass, and print stats if
3042    requested.  */
3043
3044 static void
3045 fini_reassoc (void)
3046 {
3047   statistics_counter_event (cfun, "Linearized",
3048                             reassociate_stats.linearized);
3049   statistics_counter_event (cfun, "Constants eliminated",
3050                             reassociate_stats.constants_eliminated);
3051   statistics_counter_event (cfun, "Ops eliminated",
3052                             reassociate_stats.ops_eliminated);
3053   statistics_counter_event (cfun, "Statements rewritten",
3054                             reassociate_stats.rewritten);
3055
3056   pointer_map_destroy (operand_rank);
3057   free_alloc_pool (operand_entry_pool);
3058   free (bb_rank);
3059   VEC_free (tree, heap, plus_negates);
3060   free_dominance_info (CDI_POST_DOMINATORS);
3061   loop_optimizer_finalize ();
3062 }
3063
3064 /* Gate and execute functions for Reassociation.  */
3065
3066 static unsigned int
3067 execute_reassoc (void)
3068 {
3069   init_reassoc ();
3070
3071   do_reassoc ();
3072   repropagate_negates ();
3073
3074   fini_reassoc ();
3075   return 0;
3076 }
3077
3078 static bool
3079 gate_tree_ssa_reassoc (void)
3080 {
3081   return flag_tree_reassoc != 0;
3082 }
3083
3084 struct gimple_opt_pass pass_reassoc =
3085 {
3086  {
3087   GIMPLE_PASS,
3088   "reassoc",                            /* name */
3089   gate_tree_ssa_reassoc,                /* gate */
3090   execute_reassoc,                      /* execute */
3091   NULL,                                 /* sub */
3092   NULL,                                 /* next */
3093   0,                                    /* static_pass_number */
3094   TV_TREE_REASSOC,                      /* tv_id */
3095   PROP_cfg | PROP_ssa,                  /* properties_required */
3096   0,                                    /* properties_provided */
3097   0,                                    /* properties_destroyed */
3098   0,                                    /* todo_flags_start */
3099   TODO_verify_ssa
3100     | TODO_verify_flow
3101     | TODO_ggc_collect                  /* todo_flags_finish */
3102  }
3103 };