gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Types of uses.  */
 125 enum use_type
 126 {
 127   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 128   USE_ADDRESS,          /* Use in an address.  */
 129   USE_COMPARE           /* Use is a compare.  */
 130 };
 131
 132 /* The candidate - cost pair.  */
 133 struct cost_pair
 134 {
 135   struct iv_cand *cand; /* The candidate.  */
 136   unsigned cost;        /* The cost.  */
 137   bitmap depends_on;    /* The list of invariants that have to be
 138                            preserved.  */
 139   tree value;           /* For final value elimination, the expression for
 140                            the final value of the iv.  For iv elimination,
 141                            the new bound to compare with.  */
 142 };
 143
 144 /* Use.  */
 145 struct iv_use
 146 {
 147   unsigned id;          /* The id of the use.  */
 148   enum use_type type;   /* Type of the use.  */
 149   struct iv *iv;        /* The induction variable it is based on.  */
 150   tree stmt;            /* Statement in that it occurs.  */
 151   tree *op_p;           /* The place where it occurs.  */
 152   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 153                            important ones.  */
 154
 155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 156   struct cost_pair *cost_map;
 157                         /* The costs wrto the iv candidates.  */
 158
 159   struct iv_cand *selected;
 160                         /* The selected candidate.  */
 161 };
 162
 163 /* The position where the iv is computed.  */
 164 enum iv_position
 165 {
 166   IP_NORMAL,            /* At the end, just before the exit condition.  */
 167   IP_END,               /* At the end of the latch block.  */
 168   IP_ORIGINAL           /* The original biv.  */
 169 };
 170
 171 /* The induction variable candidate.  */
 172 struct iv_cand
 173 {
 174   unsigned id;          /* The number of the candidate.  */
 175   bool important;       /* Whether this is an "important" candidate, i.e. such
 176                            that it should be considered by all uses.  */
 177   enum iv_position pos; /* Where it is computed.  */
 178   tree incremented_at;  /* For original biv, the statement where it is
 179                            incremented.  */
 180   tree var_before;      /* The variable used for it before increment.  */
 181   tree var_after;       /* The variable used for it after increment.  */
 182   struct iv *iv;        /* The value of the candidate.  NULL for
 183                            "pseudocandidate" used to indicate the possibility
 184                            to replace the final value of an iv by direct
 185                            computation of the value.  */
 186   unsigned cost;        /* Cost of the candidate.  */
 187   bitmap depends_on;    /* The list of invariants that are used in step of the
 188                            biv.  */
 189 };
 190
 191 /* The data used by the induction variable optimizations.  */
 192
 193 typedef struct iv_use *iv_use_p;
 194 DEF_VEC_P(iv_use_p);
 195 DEF_VEC_ALLOC_P(iv_use_p,heap);
 196
 197 typedef struct iv_cand *iv_cand_p;
 198 DEF_VEC_P(iv_cand_p);
 199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 200
 201 struct ivopts_data
 202 {
 203   /* The currently optimized loop.  */
 204   struct loop *current_loop;
 205
 206   /* Number of registers used in it.  */
 207   unsigned regs_used;
 208
 209   /* Numbers of iterations for all exits of the current loop.  */
 210   htab_t niters;
 211
 212   /* The size of version_info array allocated.  */
 213   unsigned version_info_size;
 214
 215   /* The array of information for the ssa names.  */
 216   struct version_info *version_info;
 217
 218   /* The bitmap of indices in version_info whose value was changed.  */
 219   bitmap relevant;
 220
 221   /* The maximum invariant id.  */
 222   unsigned max_inv_id;
 223
 224   /* The uses of induction variables.  */
 225   VEC(iv_use_p,heap) *iv_uses;
 226
 227   /* The candidates.  */
 228   VEC(iv_cand_p,heap) *iv_candidates;
 229
 230   /* A bitmap of important candidates.  */
 231   bitmap important_candidates;
 232
 233   /* Whether to consider just related and important candidates when replacing a
 234      use.  */
 235   bool consider_all_candidates;
 236 };
 237
 238 /* An assignment of iv candidates to uses.  */
 239
 240 struct iv_ca
 241 {
 242   /* The number of uses covered by the assignment.  */
 243   unsigned upto;
 244
 245   /* Number of uses that cannot be expressed by the candidates in the set.  */
 246   unsigned bad_uses;
 247
 248   /* Candidate assigned to a use, together with the related costs.  */
 249   struct cost_pair **cand_for_use;
 250
 251   /* Number of times each candidate is used.  */
 252   unsigned *n_cand_uses;
 253
 254   /* The candidates used.  */
 255   bitmap cands;
 256
 257   /* The number of candidates in the set.  */
 258   unsigned n_cands;
 259
 260   /* Total number of registers needed.  */
 261   unsigned n_regs;
 262
 263   /* Total cost of expressing uses.  */
 264   unsigned cand_use_cost;
 265
 266   /* Total cost of candidates.  */
 267   unsigned cand_cost;
 268
 269   /* Number of times each invariant is used.  */
 270   unsigned *n_invariant_uses;
 271
 272   /* Total cost of the assignment.  */
 273   unsigned cost;
 274 };
 275
 276 /* Difference of two iv candidate assignments.  */
 277
 278 struct iv_ca_delta
 279 {
 280   /* Changed use.  */
 281   struct iv_use *use;
 282
 283   /* An old assignment (for rollback purposes).  */
 284   struct cost_pair *old_cp;
 285
 286   /* A new assignment.  */
 287   struct cost_pair *new_cp;
 288
 289   /* Next change in the list.  */
 290   struct iv_ca_delta *next_change;
 291 };
 292
 293 /* Bound on number of candidates below that all candidates are considered.  */
 294
 295 #define CONSIDER_ALL_CANDIDATES_BOUND \
 296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 297
 298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 299    optimizing such a loop would help, and it would take ages).  */
 300
 301 #define MAX_CONSIDERED_USES \
 302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 303
 304 /* If there are at most this number of ivs in the set, try removing unnecessary
 305    ivs from the set always.  */
 306
 307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 309
 310 /* The list of trees for that the decl_rtl field must be reset is stored
 311    here.  */
 312
 313 static VEC(tree,heap) *decl_rtl_to_reset;
 314
 315 /* Number of uses recorded in DATA.  */
 316
 317 static inline unsigned
 318 n_iv_uses (struct ivopts_data *data)
 319 {
 320   return VEC_length (iv_use_p, data->iv_uses);
 321 }
 322
 323 /* Ith use recorded in DATA.  */
 324
 325 static inline struct iv_use *
 326 iv_use (struct ivopts_data *data, unsigned i)
 327 {
 328   return VEC_index (iv_use_p, data->iv_uses, i);
 329 }
 330
 331 /* Number of candidates recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_cands (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_cand_p, data->iv_candidates);
 337 }
 338
 339 /* Ith candidate recorded in DATA.  */
 340
 341 static inline struct iv_cand *
 342 iv_cand (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_cand_p, data->iv_candidates, i);
 345 }
 346
 347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 348
 349 edge
 350 single_dom_exit (struct loop *loop)
 351 {
 352   edge exit = loop->single_exit;
 353
 354   if (!exit)
 355     return NULL;
 356
 357   if (!just_once_each_iteration_p (loop, exit->src))
 358     return NULL;
 359
 360   return exit;
 361 }
 362
 363 /* Dumps information about the induction variable IV to FILE.  */
 364
 365 extern void dump_iv (FILE *, struct iv *);
 366 void
 367 dump_iv (FILE *file, struct iv *iv)
 368 {
 369   if (iv->ssa_name)
 370     {
 371       fprintf (file, "ssa name ");
 372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 373       fprintf (file, "\n");
 374     }
 375
 376   fprintf (file, "  type ");
 377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 378   fprintf (file, "\n");
 379
 380   if (iv->step)
 381     {
 382       fprintf (file, "  base ");
 383       print_generic_expr (file, iv->base, TDF_SLIM);
 384       fprintf (file, "\n");
 385
 386       fprintf (file, "  step ");
 387       print_generic_expr (file, iv->step, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390   else
 391     {
 392       fprintf (file, "  invariant ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395     }
 396
 397   if (iv->base_object)
 398     {
 399       fprintf (file, "  base object ");
 400       print_generic_expr (file, iv->base_object, TDF_SLIM);
 401       fprintf (file, "\n");
 402     }
 403
 404   if (iv->biv_p)
 405     fprintf (file, "  is a biv\n");
 406 }
 407
 408 /* Dumps information about the USE to FILE.  */
 409
 410 extern void dump_use (FILE *, struct iv_use *);
 411 void
 412 dump_use (FILE *file, struct iv_use *use)
 413 {
 414   fprintf (file, "use %d\n", use->id);
 415
 416   switch (use->type)
 417     {
 418     case USE_NONLINEAR_EXPR:
 419       fprintf (file, "  generic\n");
 420       break;
 421
 422     case USE_ADDRESS:
 423       fprintf (file, "  address\n");
 424       break;
 425
 426     case USE_COMPARE:
 427       fprintf (file, "  compare\n");
 428       break;
 429
 430     default:
 431       gcc_unreachable ();
 432     }
 433
 434   fprintf (file, "  in statement ");
 435   print_generic_expr (file, use->stmt, TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   fprintf (file, "  at position ");
 439   if (use->op_p)
 440     print_generic_expr (file, *use->op_p, TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   dump_iv (file, use->iv);
 444
 445   if (use->related_cands)
 446     {
 447       fprintf (file, "  related candidates ");
 448       dump_bitmap (file, use->related_cands);
 449     }
 450 }
 451
 452 /* Dumps information about the uses to FILE.  */
 453
 454 extern void dump_uses (FILE *, struct ivopts_data *);
 455 void
 456 dump_uses (FILE *file, struct ivopts_data *data)
 457 {
 458   unsigned i;
 459   struct iv_use *use;
 460
 461   for (i = 0; i < n_iv_uses (data); i++)
 462     {
 463       use = iv_use (data, i);
 464
 465       dump_use (file, use);
 466       fprintf (file, "\n");
 467     }
 468 }
 469
 470 /* Dumps information about induction variable candidate CAND to FILE.  */
 471
 472 extern void dump_cand (FILE *, struct iv_cand *);
 473 void
 474 dump_cand (FILE *file, struct iv_cand *cand)
 475 {
 476   struct iv *iv = cand->iv;
 477
 478   fprintf (file, "candidate %d%s\n",
 479            cand->id, cand->important ? " (important)" : "");
 480
 481   if (cand->depends_on)
 482     {
 483       fprintf (file, "  depends on ");
 484       dump_bitmap (file, cand->depends_on);
 485     }
 486
 487   if (!iv)
 488     {
 489       fprintf (file, "  final value replacement\n");
 490       return;
 491     }
 492
 493   switch (cand->pos)
 494     {
 495     case IP_NORMAL:
 496       fprintf (file, "  incremented before exit test\n");
 497       break;
 498
 499     case IP_END:
 500       fprintf (file, "  incremented at end\n");
 501       break;
 502
 503     case IP_ORIGINAL:
 504       fprintf (file, "  original biv\n");
 505       break;
 506     }
 507
 508   dump_iv (file, iv);
 509 }
 510
 511 /* Returns the info for ssa version VER.  */
 512
 513 static inline struct version_info *
 514 ver_info (struct ivopts_data *data, unsigned ver)
 515 {
 516   return data->version_info + ver;
 517 }
 518
 519 /* Returns the info for ssa name NAME.  */
 520
 521 static inline struct version_info *
 522 name_info (struct ivopts_data *data, tree name)
 523 {
 524   return ver_info (data, SSA_NAME_VERSION (name));
 525 }
 526
 527 /* Checks whether there exists number X such that X * B = A, counting modulo
 528    2^BITS.  */
 529
 530 static bool
 531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 532         HOST_WIDE_INT *x)
 533 {
 534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 535   unsigned HOST_WIDE_INT inv, ex, val;
 536   unsigned i;
 537
 538   a &= mask;
 539   b &= mask;
 540
 541   /* First divide the whole equation by 2 as long as possible.  */
 542   while (!(a & 1) && !(b & 1))
 543     {
 544       a >>= 1;
 545       b >>= 1;
 546       bits--;
 547       mask >>= 1;
 548     }
 549
 550   if (!(b & 1))
 551     {
 552       /* If b is still even, a is odd and there is no such x.  */
 553       return false;
 554     }
 555
 556   /* Find the inverse of b.  We compute it as
 557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 558   inv = 1;
 559   ex = b;
 560   for (i = 0; i < bits - 1; i++)
 561     {
 562       inv = (inv * ex) & mask;
 563       ex = (ex * ex) & mask;
 564     }
 565
 566   val = (a * inv) & mask;
 567
 568   gcc_assert (((val * b) & mask) == a);
 569
 570   if ((val >> (bits - 1)) & 1)
 571     val |= ~mask;
 572
 573   *x = val;
 574
 575   return true;
 576 }
 577
 578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 579    emitted in LOOP.  */
 580
 581 static bool
 582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 583 {
 584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 585
 586   gcc_assert (bb);
 587
 588   if (sbb == loop->latch)
 589     return true;
 590
 591   if (sbb != bb)
 592     return false;
 593
 594   return stmt == last_stmt (bb);
 595 }
 596
 597 /* Returns true if STMT if after the place where the original induction
 598    variable CAND is incremented.  */
 599
 600 static bool
 601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 602 {
 603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 604   basic_block stmt_bb = bb_for_stmt (stmt);
 605   block_stmt_iterator bsi;
 606
 607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 608     return false;
 609
 610   if (stmt_bb != cand_bb)
 611     return true;
 612
 613   /* Scan the block from the end, since the original ivs are usually
 614      incremented at the end of the loop body.  */
 615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 616     {
 617       if (bsi_stmt (bsi) == cand->incremented_at)
 618         return false;
 619       if (bsi_stmt (bsi) == stmt)
 620         return true;
 621     }
 622 }
 623
 624 /* Returns true if STMT if after the place where the induction variable
 625    CAND is incremented in LOOP.  */
 626
 627 static bool
 628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 629 {
 630   switch (cand->pos)
 631     {
 632     case IP_END:
 633       return false;
 634
 635     case IP_NORMAL:
 636       return stmt_after_ip_normal_pos (loop, stmt);
 637
 638     case IP_ORIGINAL:
 639       return stmt_after_ip_original_pos (cand, stmt);
 640
 641     default:
 642       gcc_unreachable ();
 643     }
 644 }
 645
 646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 647
 648 static bool
 649 abnormal_ssa_name_p (tree exp)
 650 {
 651   if (!exp)
 652     return false;
 653
 654   if (TREE_CODE (exp) != SSA_NAME)
 655     return false;
 656
 657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 658 }
 659
 660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 661    abnormal phi node.  Callback for for_each_index.  */
 662
 663 static bool
 664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 665                                   void *data ATTRIBUTE_UNUSED)
 666 {
 667   if (TREE_CODE (base) == ARRAY_REF)
 668     {
 669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 670         return false;
 671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 672         return false;
 673     }
 674
 675   return !abnormal_ssa_name_p (*index);
 676 }
 677
 678 /* Returns true if EXPR contains a ssa name that occurs in an
 679    abnormal phi node.  */
 680
 681 bool
 682 contains_abnormal_ssa_name_p (tree expr)
 683 {
 684   enum tree_code code;
 685   enum tree_code_class class;
 686
 687   if (!expr)
 688     return false;
 689
 690   code = TREE_CODE (expr);
 691   class = TREE_CODE_CLASS (code);
 692
 693   if (code == SSA_NAME)
 694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 695
 696   if (code == INTEGER_CST
 697       || is_gimple_min_invariant (expr))
 698     return false;
 699
 700   if (code == ADDR_EXPR)
 701     return !for_each_index (&TREE_OPERAND (expr, 0),
 702                             idx_contains_abnormal_ssa_name_p,
 703                             NULL);
 704
 705   switch (class)
 706     {
 707     case tcc_binary:
 708     case tcc_comparison:
 709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 710         return true;
 711
 712       /* Fallthru.  */
 713     case tcc_unary:
 714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 715         return true;
 716
 717       break;
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722
 723   return false;
 724 }
 725
 726 /* Element of the table in that we cache the numbers of iterations obtained
 727    from exits of the loop.  */
 728
 729 struct nfe_cache_elt
 730 {
 731   /* The edge for that the number of iterations is cached.  */
 732   edge exit;
 733
 734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 735      determined.  */
 736   tree niter;
 737 };
 738
 739 /* Hash function for nfe_cache_elt E.  */
 740
 741 static hashval_t
 742 nfe_hash (const void *e)
 743 {
 744   const struct nfe_cache_elt *elt = e;
 745
 746   return htab_hash_pointer (elt->exit);
 747 }
 748
 749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 750
 751 static int
 752 nfe_eq (const void *e1, const void *e2)
 753 {
 754   const struct nfe_cache_elt *elt1 = e1;
 755
 756   return elt1->exit == e2;
 757 }
 758
 759 /*  Returns tree describing number of iterations determined from
 760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 761
 762 static tree
 763 niter_for_exit (struct ivopts_data *data, edge exit)
 764 {
 765   struct nfe_cache_elt *nfe_desc;
 766   struct tree_niter_desc desc;
 767   PTR *slot;
 768
 769   slot = htab_find_slot_with_hash (data->niters, exit,
 770                                    htab_hash_pointer (exit),
 771                                    INSERT);
 772
 773   if (!*slot)
 774     {
 775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 776       nfe_desc->exit = exit;
 777
 778       /* Try to determine number of iterations.  We must know it
 779          unconditionally (i.e., without possibility of # of iterations
 780          being zero).  Also, we cannot safely work with ssa names that
 781          appear in phi nodes on abnormal edges, so that we do not create
 782          overlapping life ranges for them (PR 27283).  */
 783       if (number_of_iterations_exit (data->current_loop,
 784                                      exit, &desc, true)
 785           && zero_p (desc.may_be_zero)
 786           && !contains_abnormal_ssa_name_p (desc.niter))
 787         nfe_desc->niter = desc.niter;
 788       else
 789         nfe_desc->niter = NULL_TREE;
 790     }
 791   else
 792     nfe_desc = *slot;
 793
 794   return nfe_desc->niter;
 795 }
 796
 797 /* Returns tree describing number of iterations determined from
 798    single dominating exit of DATA->current_loop, or NULL if something
 799    goes wrong.  */
 800
 801 static tree
 802 niter_for_single_dom_exit (struct ivopts_data *data)
 803 {
 804   edge exit = single_dom_exit (data->current_loop);
 805
 806   if (!exit)
 807     return NULL;
 808
 809   return niter_for_exit (data, exit);
 810 }
 811
 812 /* Initializes data structures used by the iv optimization pass, stored
 813    in DATA.  */
 814
 815 static void
 816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 817 {
 818   data->version_info_size = 2 * num_ssa_names;
 819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 820   data->relevant = BITMAP_ALLOC (NULL);
 821   data->important_candidates = BITMAP_ALLOC (NULL);
 822   data->max_inv_id = 0;
 823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 827 }
 828
 829 /* Returns a memory object to that EXPR points.  In case we are able to
 830    determine that it does not point to any such object, NULL is returned.  */
 831
 832 static tree
 833 determine_base_object (tree expr)
 834 {
 835   enum tree_code code = TREE_CODE (expr);
 836   tree base, obj, op0, op1;
 837
 838   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 839     return NULL_TREE;
 840
 841   switch (code)
 842     {
 843     case INTEGER_CST:
 844       return NULL_TREE;
 845
 846     case ADDR_EXPR:
 847       obj = TREE_OPERAND (expr, 0);
 848       base = get_base_address (obj);
 849
 850       if (!base)
 851         return expr;
 852
 853       if (TREE_CODE (base) == INDIRECT_REF)
 854         return determine_base_object (TREE_OPERAND (base, 0));
 855
 856       return fold_convert (ptr_type_node,
 857                            build_fold_addr_expr (base));
 858
 859     case PLUS_EXPR:
 860     case MINUS_EXPR:
 861       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 862       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 863
 864       if (!op1)
 865         return op0;
 866
 867       if (!op0)
 868         return (code == PLUS_EXPR
 869                 ? op1
 870                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 871
 872       return fold_build2 (code, ptr_type_node, op0, op1);
 873
 874     case NOP_EXPR:
 875     case CONVERT_EXPR:
 876       return determine_base_object (TREE_OPERAND (expr, 0));
 877
 878     default:
 879       return fold_convert (ptr_type_node, expr);
 880     }
 881 }
 882
 883 /* Allocates an induction variable with given initial value BASE and step STEP
 884    for loop LOOP.  */
 885
 886 static struct iv *
 887 alloc_iv (tree base, tree step)
 888 {
 889   struct iv *iv = XCNEW (struct iv);
 890
 891   if (step && integer_zerop (step))
 892     step = NULL_TREE;
 893
 894   iv->base = base;
 895   iv->base_object = determine_base_object (base);
 896   iv->step = step;
 897   iv->biv_p = false;
 898   iv->have_use_for = false;
 899   iv->use_id = 0;
 900   iv->ssa_name = NULL_TREE;
 901
 902   return iv;
 903 }
 904
 905 /* Sets STEP and BASE for induction variable IV.  */
 906
 907 static void
 908 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 909 {
 910   struct version_info *info = name_info (data, iv);
 911
 912   gcc_assert (!info->iv);
 913
 914   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 915   info->iv = alloc_iv (base, step);
 916   info->iv->ssa_name = iv;
 917 }
 918
 919 /* Finds induction variable declaration for VAR.  */
 920
 921 static struct iv *
 922 get_iv (struct ivopts_data *data, tree var)
 923 {
 924   basic_block bb;
 925
 926   if (!name_info (data, var)->iv)
 927     {
 928       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 929
 930       if (!bb
 931           || !flow_bb_inside_loop_p (data->current_loop, bb))
 932         set_iv (data, var, var, NULL_TREE);
 933     }
 934
 935   return name_info (data, var)->iv;
 936 }
 937
 938 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 939    not define a simple affine biv with nonzero step.  */
 940
 941 static tree
 942 determine_biv_step (tree phi)
 943 {
 944   struct loop *loop = bb_for_stmt (phi)->loop_father;
 945   tree name = PHI_RESULT (phi);
 946   affine_iv iv;
 947
 948   if (!is_gimple_reg (name))
 949     return NULL_TREE;
 950
 951   if (!simple_iv (loop, phi, name, &iv, true))
 952     return NULL_TREE;
 953
 954   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 955 }
 956
 957 /* Finds basic ivs.  */
 958
 959 static bool
 960 find_bivs (struct ivopts_data *data)
 961 {
 962   tree phi, step, type, base;
 963   bool found = false;
 964   struct loop *loop = data->current_loop;
 965
 966   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 967     {
 968       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 969         continue;
 970
 971       step = determine_biv_step (phi);
 972       if (!step)
 973         continue;
 974
 975       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 976       base = expand_simple_operations (base);
 977       if (contains_abnormal_ssa_name_p (base)
 978           || contains_abnormal_ssa_name_p (step))
 979         continue;
 980
 981       type = TREE_TYPE (PHI_RESULT (phi));
 982       base = fold_convert (type, base);
 983       if (step)
 984         step = fold_convert (type, step);
 985
 986       set_iv (data, PHI_RESULT (phi), base, step);
 987       found = true;
 988     }
 989
 990   return found;
 991 }
 992
 993 /* Marks basic ivs.  */
 994
 995 static void
 996 mark_bivs (struct ivopts_data *data)
 997 {
 998   tree phi, var;
 999   struct iv *iv, *incr_iv;
1000   struct loop *loop = data->current_loop;
1001   basic_block incr_bb;
1002
1003   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1004     {
1005       iv = get_iv (data, PHI_RESULT (phi));
1006       if (!iv)
1007         continue;
1008
1009       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010       incr_iv = get_iv (data, var);
1011       if (!incr_iv)
1012         continue;
1013
1014       /* If the increment is in the subloop, ignore it.  */
1015       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016       if (incr_bb->loop_father != data->current_loop
1017           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018         continue;
1019
1020       iv->biv_p = true;
1021       incr_iv->biv_p = true;
1022     }
1023 }
1024
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026    parameters to IV.  */
1027
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1030 {
1031   tree lhs;
1032   struct loop *loop = data->current_loop;
1033
1034   iv->base = NULL_TREE;
1035   iv->step = NULL_TREE;
1036
1037   if (TREE_CODE (stmt) != MODIFY_EXPR)
1038     return false;
1039
1040   lhs = TREE_OPERAND (stmt, 0);
1041   if (TREE_CODE (lhs) != SSA_NAME)
1042     return false;
1043
1044   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1045     return false;
1046   iv->base = expand_simple_operations (iv->base);
1047
1048   if (contains_abnormal_ssa_name_p (iv->base)
1049       || contains_abnormal_ssa_name_p (iv->step))
1050     return false;
1051
1052   return true;
1053 }
1054
1055 /* Finds general ivs in statement STMT.  */
1056
1057 static void
1058 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1059 {
1060   affine_iv iv;
1061
1062   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1063     return;
1064
1065   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1066 }
1067
1068 /* Finds general ivs in basic block BB.  */
1069
1070 static void
1071 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1072 {
1073   block_stmt_iterator bsi;
1074
1075   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1076     find_givs_in_stmt (data, bsi_stmt (bsi));
1077 }
1078
1079 /* Finds general ivs.  */
1080
1081 static void
1082 find_givs (struct ivopts_data *data)
1083 {
1084   struct loop *loop = data->current_loop;
1085   basic_block *body = get_loop_body_in_dom_order (loop);
1086   unsigned i;
1087
1088   for (i = 0; i < loop->num_nodes; i++)
1089     find_givs_in_bb (data, body[i]);
1090   free (body);
1091 }
1092
1093 /* For each ssa name defined in LOOP determines whether it is an induction
1094    variable and if so, its initial value and step.  */
1095
1096 static bool
1097 find_induction_variables (struct ivopts_data *data)
1098 {
1099   unsigned i;
1100   bitmap_iterator bi;
1101
1102   if (!find_bivs (data))
1103     return false;
1104
1105   find_givs (data);
1106   mark_bivs (data);
1107
1108   if (dump_file && (dump_flags & TDF_DETAILS))
1109     {
1110       tree niter = niter_for_single_dom_exit (data);
1111
1112       if (niter)
1113         {
1114           fprintf (dump_file, "  number of iterations ");
1115           print_generic_expr (dump_file, niter, TDF_SLIM);
1116           fprintf (dump_file, "\n\n");
1117         };
1118
1119       fprintf (dump_file, "Induction variables:\n\n");
1120
1121       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1122         {
1123           if (ver_info (data, i)->iv)
1124             dump_iv (dump_file, ver_info (data, i)->iv);
1125         }
1126     }
1127
1128   return true;
1129 }
1130
1131 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1132
1133 static struct iv_use *
1134 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1135             tree stmt, enum use_type use_type)
1136 {
1137   struct iv_use *use = XCNEW (struct iv_use);
1138
1139   use->id = n_iv_uses (data);
1140   use->type = use_type;
1141   use->iv = iv;
1142   use->stmt = stmt;
1143   use->op_p = use_p;
1144   use->related_cands = BITMAP_ALLOC (NULL);
1145
1146   /* To avoid showing ssa name in the dumps, if it was not reset by the
1147      caller.  */
1148   iv->ssa_name = NULL_TREE;
1149
1150   if (dump_file && (dump_flags & TDF_DETAILS))
1151     dump_use (dump_file, use);
1152
1153   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1154
1155   return use;
1156 }
1157
1158 /* Checks whether OP is a loop-level invariant and if so, records it.
1159    NONLINEAR_USE is true if the invariant is used in a way we do not
1160    handle specially.  */
1161
1162 static void
1163 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1164 {
1165   basic_block bb;
1166   struct version_info *info;
1167
1168   if (TREE_CODE (op) != SSA_NAME
1169       || !is_gimple_reg (op))
1170     return;
1171
1172   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1173   if (bb
1174       && flow_bb_inside_loop_p (data->current_loop, bb))
1175     return;
1176
1177   info = name_info (data, op);
1178   info->name = op;
1179   info->has_nonlin_use |= nonlinear_use;
1180   if (!info->inv_id)
1181     info->inv_id = ++data->max_inv_id;
1182   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1183 }
1184
1185 /* Checks whether the use OP is interesting and if so, records it.  */
1186
1187 static struct iv_use *
1188 find_interesting_uses_op (struct ivopts_data *data, tree op)
1189 {
1190   struct iv *iv;
1191   struct iv *civ;
1192   tree stmt;
1193   struct iv_use *use;
1194
1195   if (TREE_CODE (op) != SSA_NAME)
1196     return NULL;
1197
1198   iv = get_iv (data, op);
1199   if (!iv)
1200     return NULL;
1201
1202   if (iv->have_use_for)
1203     {
1204       use = iv_use (data, iv->use_id);
1205
1206       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1207       return use;
1208     }
1209
1210   if (zero_p (iv->step))
1211     {
1212       record_invariant (data, op, true);
1213       return NULL;
1214     }
1215   iv->have_use_for = true;
1216
1217   civ = XNEW (struct iv);
1218   *civ = *iv;
1219
1220   stmt = SSA_NAME_DEF_STMT (op);
1221   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1222               || TREE_CODE (stmt) == MODIFY_EXPR);
1223
1224   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1225   iv->use_id = use->id;
1226
1227   return use;
1228 }
1229
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231    and if so, records it.  */
1232
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1235 {
1236   tree *op0_p;
1237   tree *op1_p;
1238   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1239   struct iv const_iv;
1240   tree zero = integer_zero_node;
1241
1242   const_iv.step = NULL_TREE;
1243
1244   if (TREE_CODE (*cond_p) != SSA_NAME
1245       && !COMPARISON_CLASS_P (*cond_p))
1246     return;
1247
1248   if (TREE_CODE (*cond_p) == SSA_NAME)
1249     {
1250       op0_p = cond_p;
1251       op1_p = &zero;
1252     }
1253   else
1254     {
1255       op0_p = &TREE_OPERAND (*cond_p, 0);
1256       op1_p = &TREE_OPERAND (*cond_p, 1);
1257     }
1258
1259   if (TREE_CODE (*op0_p) == SSA_NAME)
1260     iv0 = get_iv (data, *op0_p);
1261   else
1262     iv0 = &const_iv;
1263
1264   if (TREE_CODE (*op1_p) == SSA_NAME)
1265     iv1 = get_iv (data, *op1_p);
1266   else
1267     iv1 = &const_iv;
1268
1269   if (/* When comparing with non-invariant value, we may not do any senseful
1270          induction variable elimination.  */
1271       (!iv0 || !iv1)
1272       /* Eliminating condition based on two ivs would be nontrivial.
1273          ??? TODO -- it is not really important to handle this case.  */
1274       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1275     {
1276       find_interesting_uses_op (data, *op0_p);
1277       find_interesting_uses_op (data, *op1_p);
1278       return;
1279     }
1280
1281   if (zero_p (iv0->step) && zero_p (iv1->step))
1282     {
1283       /* If both are invariants, this is a work for unswitching.  */
1284       return;
1285     }
1286
1287   civ = XNEW (struct iv);
1288   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1289   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1290 }
1291
1292 /* Returns true if expression EXPR is obviously invariant in LOOP,
1293    i.e. if all its operands are defined outside of the LOOP.  */
1294
1295 bool
1296 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1297 {
1298   basic_block def_bb;
1299   unsigned i, len;
1300
1301   if (is_gimple_min_invariant (expr))
1302     return true;
1303
1304   if (TREE_CODE (expr) == SSA_NAME)
1305     {
1306       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1307       if (def_bb
1308           && flow_bb_inside_loop_p (loop, def_bb))
1309         return false;
1310
1311       return true;
1312     }
1313
1314   if (!EXPR_P (expr))
1315     return false;
1316
1317   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1318   for (i = 0; i < len; i++)
1319     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1320       return false;
1321
1322   return true;
1323 }
1324
1325 /* Cumulates the steps of indices into DATA and replaces their values with the
1326    initial ones.  Returns false when the value of the index cannot be determined.
1327    Callback for for_each_index.  */
1328
1329 struct ifs_ivopts_data
1330 {
1331   struct ivopts_data *ivopts_data;
1332   tree stmt;
1333   tree *step_p;
1334 };
1335
1336 static bool
1337 idx_find_step (tree base, tree *idx, void *data)
1338 {
1339   struct ifs_ivopts_data *dta = data;
1340   struct iv *iv;
1341   tree step, iv_base, iv_step, lbound, off;
1342   struct loop *loop = dta->ivopts_data->current_loop;
1343
1344   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1345       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1346     return false;
1347
1348   /* If base is a component ref, require that the offset of the reference
1349      be invariant.  */
1350   if (TREE_CODE (base) == COMPONENT_REF)
1351     {
1352       off = component_ref_field_offset (base);
1353       return expr_invariant_in_loop_p (loop, off);
1354     }
1355
1356   /* If base is array, first check whether we will be able to move the
1357      reference out of the loop (in order to take its address in strength
1358      reduction).  In order for this to work we need both lower bound
1359      and step to be loop invariants.  */
1360   if (TREE_CODE (base) == ARRAY_REF)
1361     {
1362       step = array_ref_element_size (base);
1363       lbound = array_ref_low_bound (base);
1364
1365       if (!expr_invariant_in_loop_p (loop, step)
1366           || !expr_invariant_in_loop_p (loop, lbound))
1367         return false;
1368     }
1369
1370   if (TREE_CODE (*idx) != SSA_NAME)
1371     return true;
1372
1373   iv = get_iv (dta->ivopts_data, *idx);
1374   if (!iv)
1375     return false;
1376
1377   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1378           *&x[0], which is not folded and does not trigger the
1379           ARRAY_REF path below.  */
1380   *idx = iv->base;
1381
1382   if (!iv->step)
1383     return true;
1384
1385   if (TREE_CODE (base) == ARRAY_REF)
1386     {
1387       step = array_ref_element_size (base);
1388
1389       /* We only handle addresses whose step is an integer constant.  */
1390       if (TREE_CODE (step) != INTEGER_CST)
1391         return false;
1392     }
1393   else
1394     /* The step for pointer arithmetics already is 1 byte.  */
1395     step = build_int_cst (sizetype, 1);
1396
1397   iv_base = iv->base;
1398   iv_step = iv->step;
1399   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1400                             sizetype, &iv_base, &iv_step, dta->stmt,
1401                             false))
1402     {
1403       /* The index might wrap.  */
1404       return false;
1405     }
1406
1407   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1408
1409   if (!*dta->step_p)
1410     *dta->step_p = step;
1411   else
1412     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1413
1414   return true;
1415 }
1416
1417 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1418    object is passed to it in DATA.  */
1419
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422                 void *data)
1423 {
1424   find_interesting_uses_op (data, *idx);
1425   if (TREE_CODE (base) == ARRAY_REF)
1426     {
1427       find_interesting_uses_op (data, array_ref_element_size (base));
1428       find_interesting_uses_op (data, array_ref_low_bound (base));
1429     }
1430   return true;
1431 }
1432
1433 /* Returns true if memory reference REF may be unaligned.  */
1434
1435 static bool
1436 may_be_unaligned_p (tree ref)
1437 {
1438   tree base;
1439   tree base_type;
1440   HOST_WIDE_INT bitsize;
1441   HOST_WIDE_INT bitpos;
1442   tree toffset;
1443   enum machine_mode mode;
1444   int unsignedp, volatilep;
1445   unsigned base_align;
1446
1447   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448      thus they are not misaligned.  */
1449   if (TREE_CODE (ref) == TARGET_MEM_REF)
1450     return false;
1451
1452   /* The test below is basically copy of what expr.c:normal_inner_ref
1453      does to check whether the object must be loaded by parts when
1454      STRICT_ALIGNMENT is true.  */
1455   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1456                               &unsignedp, &volatilep, true);
1457   base_type = TREE_TYPE (base);
1458   base_align = TYPE_ALIGN (base_type);
1459
1460   if (mode != BLKmode
1461       && (base_align < GET_MODE_ALIGNMENT (mode)
1462           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1463           || bitpos % BITS_PER_UNIT != 0))
1464     return true;
1465
1466   return false;
1467 }
1468
1469 /* Return true if EXPR may be non-addressable.   */
1470
1471 static bool
1472 may_be_nonaddressable_p (tree expr)
1473 {
1474   switch (TREE_CODE (expr))
1475     {
1476     case COMPONENT_REF:
1477       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1478              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1479
1480     case ARRAY_REF:
1481     case ARRAY_RANGE_REF:
1482       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1483
1484     case VIEW_CONVERT_EXPR:
1485       /* This kind of view-conversions may wrap non-addressable objects
1486          and make them look addressable.  After some processing the
1487          non-addressability may be uncovered again, causing ADDR_EXPRs
1488          of inappropriate objects to be built.  */
1489       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1490              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1491
1492     default:
1493       break;
1494     }
1495
1496   return false;
1497 }
1498
1499 /* Finds addresses in *OP_P inside STMT.  */
1500
1501 static void
1502 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1503 {
1504   tree base = *op_p, step = NULL;
1505   struct iv *civ;
1506   struct ifs_ivopts_data ifs_ivopts_data;
1507
1508   /* Do not play with volatile memory references.  A bit too conservative,
1509      perhaps, but safe.  */
1510   if (stmt_ann (stmt)->has_volatile_ops)
1511     goto fail;
1512
1513   /* Ignore bitfields for now.  Not really something terribly complicated
1514      to handle.  TODO.  */
1515   if (TREE_CODE (base) == BIT_FIELD_REF)
1516     goto fail;
1517
1518   if (may_be_nonaddressable_p (base))
1519     goto fail;
1520
1521   if (STRICT_ALIGNMENT
1522       && may_be_unaligned_p (base))
1523     goto fail;
1524
1525   base = unshare_expr (base);
1526
1527   if (TREE_CODE (base) == TARGET_MEM_REF)
1528     {
1529       tree type = build_pointer_type (TREE_TYPE (base));
1530       tree astep;
1531
1532       if (TMR_BASE (base)
1533           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1534         {
1535           civ = get_iv (data, TMR_BASE (base));
1536           if (!civ)
1537             goto fail;
1538
1539           TMR_BASE (base) = civ->base;
1540           step = civ->step;
1541         }
1542       if (TMR_INDEX (base)
1543           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1544         {
1545           civ = get_iv (data, TMR_INDEX (base));
1546           if (!civ)
1547             goto fail;
1548
1549           TMR_INDEX (base) = civ->base;
1550           astep = civ->step;
1551
1552           if (astep)
1553             {
1554               if (TMR_STEP (base))
1555                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1556
1557               if (step)
1558                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1559               else
1560                 step = astep;
1561             }
1562         }
1563
1564       if (zero_p (step))
1565         goto fail;
1566       base = tree_mem_ref_addr (type, base);
1567     }
1568   else
1569     {
1570       ifs_ivopts_data.ivopts_data = data;
1571       ifs_ivopts_data.stmt = stmt;
1572       ifs_ivopts_data.step_p = &step;
1573       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1574           || zero_p (step))
1575         goto fail;
1576
1577       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1578       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1579
1580       base = build_fold_addr_expr (base);
1581
1582       /* Substituting bases of IVs into the base expression might
1583          have caused folding opportunities.  */
1584       if (TREE_CODE (base) == ADDR_EXPR)
1585         {
1586           tree *ref = &TREE_OPERAND (base, 0);
1587           while (handled_component_p (*ref))
1588             ref = &TREE_OPERAND (*ref, 0);
1589           if (TREE_CODE (*ref) == INDIRECT_REF)
1590             *ref = fold_indirect_ref (*ref);
1591         }
1592     }
1593
1594   civ = alloc_iv (base, step);
1595   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1596   return;
1597
1598 fail:
1599   for_each_index (op_p, idx_record_use, data);
1600 }
1601
1602 /* Finds and records invariants used in STMT.  */
1603
1604 static void
1605 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1606 {
1607   ssa_op_iter iter;
1608   use_operand_p use_p;
1609   tree op;
1610
1611   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1612     {
1613       op = USE_FROM_PTR (use_p);
1614       record_invariant (data, op, false);
1615     }
1616 }
1617
1618 /* Finds interesting uses of induction variables in the statement STMT.  */
1619
1620 static void
1621 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1622 {
1623   struct iv *iv;
1624   tree op, lhs, rhs;
1625   ssa_op_iter iter;
1626   use_operand_p use_p;
1627
1628   find_invariants_stmt (data, stmt);
1629
1630   if (TREE_CODE (stmt) == COND_EXPR)
1631     {
1632       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1633       return;
1634     }
1635
1636   if (TREE_CODE (stmt) == MODIFY_EXPR)
1637     {
1638       lhs = TREE_OPERAND (stmt, 0);
1639       rhs = TREE_OPERAND (stmt, 1);
1640
1641       if (TREE_CODE (lhs) == SSA_NAME)
1642         {
1643           /* If the statement defines an induction variable, the uses are not
1644              interesting by themselves.  */
1645
1646           iv = get_iv (data, lhs);
1647
1648           if (iv && !zero_p (iv->step))
1649             return;
1650         }
1651
1652       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1653         {
1654         case tcc_comparison:
1655           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1656           return;
1657
1658         case tcc_reference:
1659           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1660           if (REFERENCE_CLASS_P (lhs))
1661             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1662           return;
1663
1664         default: ;
1665         }
1666
1667       if (REFERENCE_CLASS_P (lhs)
1668           && is_gimple_val (rhs))
1669         {
1670           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1671           find_interesting_uses_op (data, rhs);
1672           return;
1673         }
1674
1675       /* TODO -- we should also handle address uses of type
1676
1677          memory = call (whatever);
1678
1679          and
1680
1681          call (memory).  */
1682     }
1683
1684   if (TREE_CODE (stmt) == PHI_NODE
1685       && bb_for_stmt (stmt) == data->current_loop->header)
1686     {
1687       lhs = PHI_RESULT (stmt);
1688       iv = get_iv (data, lhs);
1689
1690       if (iv && !zero_p (iv->step))
1691         return;
1692     }
1693
1694   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1695     {
1696       op = USE_FROM_PTR (use_p);
1697
1698       if (TREE_CODE (op) != SSA_NAME)
1699         continue;
1700
1701       iv = get_iv (data, op);
1702       if (!iv)
1703         continue;
1704
1705       find_interesting_uses_op (data, op);
1706     }
1707 }
1708
1709 /* Finds interesting uses of induction variables outside of loops
1710    on loop exit edge EXIT.  */
1711
1712 static void
1713 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1714 {
1715   tree phi, def;
1716
1717   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1718     {
1719       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1720       find_interesting_uses_op (data, def);
1721     }
1722 }
1723
1724 /* Finds uses of the induction variables that are interesting.  */
1725
1726 static void
1727 find_interesting_uses (struct ivopts_data *data)
1728 {
1729   basic_block bb;
1730   block_stmt_iterator bsi;
1731   tree phi;
1732   basic_block *body = get_loop_body (data->current_loop);
1733   unsigned i;
1734   struct version_info *info;
1735   edge e;
1736
1737   if (dump_file && (dump_flags & TDF_DETAILS))
1738     fprintf (dump_file, "Uses:\n\n");
1739
1740   for (i = 0; i < data->current_loop->num_nodes; i++)
1741     {
1742       edge_iterator ei;
1743       bb = body[i];
1744
1745       FOR_EACH_EDGE (e, ei, bb->succs)
1746         if (e->dest != EXIT_BLOCK_PTR
1747             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1748           find_interesting_uses_outside (data, e);
1749
1750       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1751         find_interesting_uses_stmt (data, phi);
1752       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1753         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1754     }
1755
1756   if (dump_file && (dump_flags & TDF_DETAILS))
1757     {
1758       bitmap_iterator bi;
1759
1760       fprintf (dump_file, "\n");
1761
1762       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1763         {
1764           info = ver_info (data, i);
1765           if (info->inv_id)
1766             {
1767               fprintf (dump_file, "  ");
1768               print_generic_expr (dump_file, info->name, TDF_SLIM);
1769               fprintf (dump_file, " is invariant (%d)%s\n",
1770                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1771             }
1772         }
1773
1774       fprintf (dump_file, "\n");
1775     }
1776
1777   free (body);
1778 }
1779
1780 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1781    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1782    we are at the top-level of the processed address.  */
1783
1784 static tree
1785 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1786                 unsigned HOST_WIDE_INT *offset)
1787 {
1788   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1789   enum tree_code code;
1790   tree type, orig_type = TREE_TYPE (expr);
1791   unsigned HOST_WIDE_INT off0, off1, st;
1792   tree orig_expr = expr;
1793
1794   STRIP_NOPS (expr);
1795
1796   type = TREE_TYPE (expr);
1797   code = TREE_CODE (expr);
1798   *offset = 0;
1799
1800   switch (code)
1801     {
1802     case INTEGER_CST:
1803       if (!cst_and_fits_in_hwi (expr)
1804           || zero_p (expr))
1805         return orig_expr;
1806
1807       *offset = int_cst_value (expr);
1808       return build_int_cst (orig_type, 0);
1809
1810     case PLUS_EXPR:
1811     case MINUS_EXPR:
1812       op0 = TREE_OPERAND (expr, 0);
1813       op1 = TREE_OPERAND (expr, 1);
1814
1815       op0 = strip_offset_1 (op0, false, false, &off0);
1816       op1 = strip_offset_1 (op1, false, false, &off1);
1817
1818       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1819       if (op0 == TREE_OPERAND (expr, 0)
1820           && op1 == TREE_OPERAND (expr, 1))
1821         return orig_expr;
1822
1823       if (zero_p (op1))
1824         expr = op0;
1825       else if (zero_p (op0))
1826         {
1827           if (code == PLUS_EXPR)
1828             expr = op1;
1829           else
1830             expr = fold_build1 (NEGATE_EXPR, type, op1);
1831         }
1832       else
1833         expr = fold_build2 (code, type, op0, op1);
1834
1835       return fold_convert (orig_type, expr);
1836
1837     case ARRAY_REF:
1838       if (!inside_addr)
1839         return orig_expr;
1840
1841       step = array_ref_element_size (expr);
1842       if (!cst_and_fits_in_hwi (step))
1843         break;
1844
1845       st = int_cst_value (step);
1846       op1 = TREE_OPERAND (expr, 1);
1847       op1 = strip_offset_1 (op1, false, false, &off1);
1848       *offset = off1 * st;
1849
1850       if (top_compref
1851           && zero_p (op1))
1852         {
1853           /* Strip the component reference completely.  */
1854           op0 = TREE_OPERAND (expr, 0);
1855           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1856           *offset += off0;
1857           return op0;
1858         }
1859       break;
1860
1861     case COMPONENT_REF:
1862       if (!inside_addr)
1863         return orig_expr;
1864
1865       tmp = component_ref_field_offset (expr);
1866       if (top_compref
1867           && cst_and_fits_in_hwi (tmp))
1868         {
1869           /* Strip the component reference completely.  */
1870           op0 = TREE_OPERAND (expr, 0);
1871           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1872           *offset = off0 + int_cst_value (tmp);
1873           return op0;
1874         }
1875       break;
1876
1877     case ADDR_EXPR:
1878       op0 = TREE_OPERAND (expr, 0);
1879       op0 = strip_offset_1 (op0, true, true, &off0);
1880       *offset += off0;
1881
1882       if (op0 == TREE_OPERAND (expr, 0))
1883         return orig_expr;
1884
1885       expr = build_fold_addr_expr (op0);
1886       return fold_convert (orig_type, expr);
1887
1888     case INDIRECT_REF:
1889       inside_addr = false;
1890       break;
1891
1892     default:
1893       return orig_expr;
1894     }
1895
1896   /* Default handling of expressions for that we want to recurse into
1897      the first operand.  */
1898   op0 = TREE_OPERAND (expr, 0);
1899   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1900   *offset += off0;
1901
1902   if (op0 == TREE_OPERAND (expr, 0)
1903       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1904     return orig_expr;
1905
1906   expr = copy_node (expr);
1907   TREE_OPERAND (expr, 0) = op0;
1908   if (op1)
1909     TREE_OPERAND (expr, 1) = op1;
1910
1911   /* Inside address, we might strip the top level component references,
1912      thus changing type of the expression.  Handling of ADDR_EXPR
1913      will fix that.  */
1914   expr = fold_convert (orig_type, expr);
1915
1916   return expr;
1917 }
1918
1919 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1920
1921 static tree
1922 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1923 {
1924   return strip_offset_1 (expr, false, false, offset);
1925 }
1926
1927 /* Returns variant of TYPE that can be used as base for different uses.
1928    We return unsigned type with the same precision, which avoids problems
1929    with overflows.  */
1930
1931 static tree
1932 generic_type_for (tree type)
1933 {
1934   if (POINTER_TYPE_P (type))
1935     return unsigned_type_for (type);
1936
1937   if (TYPE_UNSIGNED (type))
1938     return type;
1939
1940   return unsigned_type_for (type);
1941 }
1942
1943 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1944    the bitmap to that we should store it.  */
1945
1946 static struct ivopts_data *fd_ivopts_data;
1947 static tree
1948 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1949 {
1950   bitmap *depends_on = data;
1951   struct version_info *info;
1952
1953   if (TREE_CODE (*expr_p) != SSA_NAME)
1954     return NULL_TREE;
1955   info = name_info (fd_ivopts_data, *expr_p);
1956
1957   if (!info->inv_id || info->has_nonlin_use)
1958     return NULL_TREE;
1959
1960   if (!*depends_on)
1961     *depends_on = BITMAP_ALLOC (NULL);
1962   bitmap_set_bit (*depends_on, info->inv_id);
1963
1964   return NULL_TREE;
1965 }
1966
1967 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1968    position to POS.  If USE is not NULL, the candidate is set as related to
1969    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1970    replacement of the final value of the iv by a direct computation.  */
1971
1972 static struct iv_cand *
1973 add_candidate_1 (struct ivopts_data *data,
1974                  tree base, tree step, bool important, enum iv_position pos,
1975                  struct iv_use *use, tree incremented_at)
1976 {
1977   unsigned i;
1978   struct iv_cand *cand = NULL;
1979   tree type, orig_type;
1980
1981   if (base)
1982     {
1983       orig_type = TREE_TYPE (base);
1984       type = generic_type_for (orig_type);
1985       if (type != orig_type)
1986         {
1987           base = fold_convert (type, base);
1988           if (step)
1989             step = fold_convert (type, step);
1990         }
1991     }
1992
1993   for (i = 0; i < n_iv_cands (data); i++)
1994     {
1995       cand = iv_cand (data, i);
1996
1997       if (cand->pos != pos)
1998         continue;
1999
2000       if (cand->incremented_at != incremented_at)
2001         continue;
2002
2003       if (!cand->iv)
2004         {
2005           if (!base && !step)
2006             break;
2007
2008           continue;
2009         }
2010
2011       if (!base && !step)
2012         continue;
2013
2014       if (!operand_equal_p (base, cand->iv->base, 0))
2015         continue;
2016
2017       if (zero_p (cand->iv->step))
2018         {
2019           if (zero_p (step))
2020             break;
2021         }
2022       else
2023         {
2024           if (step && operand_equal_p (step, cand->iv->step, 0))
2025             break;
2026         }
2027     }
2028
2029   if (i == n_iv_cands (data))
2030     {
2031       cand = XCNEW (struct iv_cand);
2032       cand->id = i;
2033
2034       if (!base && !step)
2035         cand->iv = NULL;
2036       else
2037         cand->iv = alloc_iv (base, step);
2038
2039       cand->pos = pos;
2040       if (pos != IP_ORIGINAL && cand->iv)
2041         {
2042           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2043           cand->var_after = cand->var_before;
2044         }
2045       cand->important = important;
2046       cand->incremented_at = incremented_at;
2047       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2048
2049       if (step
2050           && TREE_CODE (step) != INTEGER_CST)
2051         {
2052           fd_ivopts_data = data;
2053           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2054         }
2055
2056       if (dump_file && (dump_flags & TDF_DETAILS))
2057         dump_cand (dump_file, cand);
2058     }
2059
2060   if (important && !cand->important)
2061     {
2062       cand->important = true;
2063       if (dump_file && (dump_flags & TDF_DETAILS))
2064         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2065     }
2066
2067   if (use)
2068     {
2069       bitmap_set_bit (use->related_cands, i);
2070       if (dump_file && (dump_flags & TDF_DETAILS))
2071         fprintf (dump_file, "Candidate %d is related to use %d\n",
2072                  cand->id, use->id);
2073     }
2074
2075   return cand;
2076 }
2077
2078 /* Returns true if incrementing the induction variable at the end of the LOOP
2079    is allowed.
2080
2081    The purpose is to avoid splitting latch edge with a biv increment, thus
2082    creating a jump, possibly confusing other optimization passes and leaving
2083    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2084    is not available (so we do not have a better alternative), or if the latch
2085    edge is already nonempty.  */
2086
2087 static bool
2088 allow_ip_end_pos_p (struct loop *loop)
2089 {
2090   if (!ip_normal_pos (loop))
2091     return true;
2092
2093   if (!empty_block_p (ip_end_pos (loop)))
2094     return true;
2095
2096   return false;
2097 }
2098
2099 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2100    position to POS.  If USE is not NULL, the candidate is set as related to
2101    it.  The candidate computation is scheduled on all available positions.  */
2102
2103 static void
2104 add_candidate (struct ivopts_data *data,
2105                tree base, tree step, bool important, struct iv_use *use)
2106 {
2107   if (ip_normal_pos (data->current_loop))
2108     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2109   if (ip_end_pos (data->current_loop)
2110       && allow_ip_end_pos_p (data->current_loop))
2111     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2112 }
2113
2114 /* Add a standard "0 + 1 * iteration" iv candidate for a
2115    type with SIZE bits.  */
2116
2117 static void
2118 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2119                                      unsigned int size)
2120 {
2121   tree type = lang_hooks.types.type_for_size (size, true);
2122   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2123                  true, NULL);
2124 }
2125
2126 /* Adds standard iv candidates.  */
2127
2128 static void
2129 add_standard_iv_candidates (struct ivopts_data *data)
2130 {
2131   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2132
2133   /* The same for a double-integer type if it is still fast enough.  */
2134   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2135     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2136 }
2137
2138
2139 /* Adds candidates bases on the old induction variable IV.  */
2140
2141 static void
2142 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2143 {
2144   tree phi, def;
2145   struct iv_cand *cand;
2146
2147   add_candidate (data, iv->base, iv->step, true, NULL);
2148
2149   /* The same, but with initial value zero.  */
2150   add_candidate (data,
2151                  build_int_cst (TREE_TYPE (iv->base), 0),
2152                  iv->step, true, NULL);
2153
2154   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2155   if (TREE_CODE (phi) == PHI_NODE)
2156     {
2157       /* Additionally record the possibility of leaving the original iv
2158          untouched.  */
2159       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2160       cand = add_candidate_1 (data,
2161                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2162                               SSA_NAME_DEF_STMT (def));
2163       cand->var_before = iv->ssa_name;
2164       cand->var_after = def;
2165     }
2166 }
2167
2168 /* Adds candidates based on the old induction variables.  */
2169
2170 static void
2171 add_old_ivs_candidates (struct ivopts_data *data)
2172 {
2173   unsigned i;
2174   struct iv *iv;
2175   bitmap_iterator bi;
2176
2177   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2178     {
2179       iv = ver_info (data, i)->iv;
2180       if (iv && iv->biv_p && !zero_p (iv->step))
2181         add_old_iv_candidates (data, iv);
2182     }
2183 }
2184
2185 /* Adds candidates based on the value of the induction variable IV and USE.  */
2186
2187 static void
2188 add_iv_value_candidates (struct ivopts_data *data,
2189                          struct iv *iv, struct iv_use *use)
2190 {
2191   unsigned HOST_WIDE_INT offset;
2192   tree base;
2193
2194   add_candidate (data, iv->base, iv->step, false, use);
2195
2196   /* The same, but with initial value zero.  Make such variable important,
2197      since it is generic enough so that possibly many uses may be based
2198      on it.  */
2199   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2200                  iv->step, true, use);
2201
2202   /* Third, try removing the constant offset.  */
2203   base = strip_offset (iv->base, &offset);
2204   if (offset)
2205     add_candidate (data, base, iv->step, false, use);
2206 }
2207
2208 /* Adds candidates based on the uses.  */
2209
2210 static void
2211 add_derived_ivs_candidates (struct ivopts_data *data)
2212 {
2213   unsigned i;
2214
2215   for (i = 0; i < n_iv_uses (data); i++)
2216     {
2217       struct iv_use *use = iv_use (data, i);
2218
2219       if (!use)
2220         continue;
2221
2222       switch (use->type)
2223         {
2224         case USE_NONLINEAR_EXPR:
2225         case USE_COMPARE:
2226         case USE_ADDRESS:
2227           /* Just add the ivs based on the value of the iv used here.  */
2228           add_iv_value_candidates (data, use->iv, use);
2229           break;
2230
2231         default:
2232           gcc_unreachable ();
2233         }
2234     }
2235 }
2236
2237 /* Record important candidates and add them to related_cands bitmaps
2238    if needed.  */
2239
2240 static void
2241 record_important_candidates (struct ivopts_data *data)
2242 {
2243   unsigned i;
2244   struct iv_use *use;
2245
2246   for (i = 0; i < n_iv_cands (data); i++)
2247     {
2248       struct iv_cand *cand = iv_cand (data, i);
2249
2250       if (cand->important)
2251         bitmap_set_bit (data->important_candidates, i);
2252     }
2253
2254   data->consider_all_candidates = (n_iv_cands (data)
2255                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2256
2257   if (data->consider_all_candidates)
2258     {
2259       /* We will not need "related_cands" bitmaps in this case,
2260          so release them to decrease peak memory consumption.  */
2261       for (i = 0; i < n_iv_uses (data); i++)
2262         {
2263           use = iv_use (data, i);
2264           BITMAP_FREE (use->related_cands);
2265         }
2266     }
2267   else
2268     {
2269       /* Add important candidates to the related_cands bitmaps.  */
2270       for (i = 0; i < n_iv_uses (data); i++)
2271         bitmap_ior_into (iv_use (data, i)->related_cands,
2272                          data->important_candidates);
2273     }
2274 }
2275
2276 /* Finds the candidates for the induction variables.  */
2277
2278 static void
2279 find_iv_candidates (struct ivopts_data *data)
2280 {
2281   /* Add commonly used ivs.  */
2282   add_standard_iv_candidates (data);
2283
2284   /* Add old induction variables.  */
2285   add_old_ivs_candidates (data);
2286
2287   /* Add induction variables derived from uses.  */
2288   add_derived_ivs_candidates (data);
2289
2290   /* Record the important candidates.  */
2291   record_important_candidates (data);
2292 }
2293
2294 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2295    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2296    we allocate a simple list to every use.  */
2297
2298 static void
2299 alloc_use_cost_map (struct ivopts_data *data)
2300 {
2301   unsigned i, size, s, j;
2302
2303   for (i = 0; i < n_iv_uses (data); i++)
2304     {
2305       struct iv_use *use = iv_use (data, i);
2306       bitmap_iterator bi;
2307
2308       if (data->consider_all_candidates)
2309         size = n_iv_cands (data);
2310       else
2311         {
2312           s = 0;
2313           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2314             {
2315               s++;
2316             }
2317
2318           /* Round up to the power of two, so that moduling by it is fast.  */
2319           for (size = 1; size < s; size <<= 1)
2320             continue;
2321         }
2322
2323       use->n_map_members = size;
2324       use->cost_map = XCNEWVEC (struct cost_pair, size);
2325     }
2326 }
2327
2328 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2329    on invariants DEPENDS_ON and that the value used in expressing it
2330    is VALUE.*/
2331
2332 static void
2333 set_use_iv_cost (struct ivopts_data *data,
2334                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2335                  bitmap depends_on, tree value)
2336 {
2337   unsigned i, s;
2338
2339   if (cost == INFTY)
2340     {
2341       BITMAP_FREE (depends_on);
2342       return;
2343     }
2344
2345   if (data->consider_all_candidates)
2346     {
2347       use->cost_map[cand->id].cand = cand;
2348       use->cost_map[cand->id].cost = cost;
2349       use->cost_map[cand->id].depends_on = depends_on;
2350       use->cost_map[cand->id].value = value;
2351       return;
2352     }
2353
2354   /* n_map_members is a power of two, so this computes modulo.  */
2355   s = cand->id & (use->n_map_members - 1);
2356   for (i = s; i < use->n_map_members; i++)
2357     if (!use->cost_map[i].cand)
2358       goto found;
2359   for (i = 0; i < s; i++)
2360     if (!use->cost_map[i].cand)
2361       goto found;
2362
2363   gcc_unreachable ();
2364
2365 found:
2366   use->cost_map[i].cand = cand;
2367   use->cost_map[i].cost = cost;
2368   use->cost_map[i].depends_on = depends_on;
2369   use->cost_map[i].value = value;
2370 }
2371
2372 /* Gets cost of (USE, CANDIDATE) pair.  */
2373
2374 static struct cost_pair *
2375 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2376                  struct iv_cand *cand)
2377 {
2378   unsigned i, s;
2379   struct cost_pair *ret;
2380
2381   if (!cand)
2382     return NULL;
2383
2384   if (data->consider_all_candidates)
2385     {
2386       ret = use->cost_map + cand->id;
2387       if (!ret->cand)
2388         return NULL;
2389
2390       return ret;
2391     }
2392
2393   /* n_map_members is a power of two, so this computes modulo.  */
2394   s = cand->id & (use->n_map_members - 1);
2395   for (i = s; i < use->n_map_members; i++)
2396     if (use->cost_map[i].cand == cand)
2397       return use->cost_map + i;
2398
2399   for (i = 0; i < s; i++)
2400     if (use->cost_map[i].cand == cand)
2401       return use->cost_map + i;
2402
2403   return NULL;
2404 }
2405
2406 /* Returns estimate on cost of computing SEQ.  */
2407
2408 static unsigned
2409 seq_cost (rtx seq)
2410 {
2411   unsigned cost = 0;
2412   rtx set;
2413
2414   for (; seq; seq = NEXT_INSN (seq))
2415     {
2416       set = single_set (seq);
2417       if (set)
2418         cost += rtx_cost (set, SET);
2419       else
2420         cost++;
2421     }
2422
2423   return cost;
2424 }
2425
2426 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2427 static rtx
2428 produce_memory_decl_rtl (tree obj, int *regno)
2429 {
2430   rtx x;
2431
2432   gcc_assert (obj);
2433   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2434     {
2435       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2436       x = gen_rtx_SYMBOL_REF (Pmode, name);
2437     }
2438   else
2439     x = gen_raw_REG (Pmode, (*regno)++);
2440
2441   return gen_rtx_MEM (DECL_MODE (obj), x);
2442 }
2443
2444 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2445    walk_tree.  DATA contains the actual fake register number.  */
2446
2447 static tree
2448 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2449 {
2450   tree obj = NULL_TREE;
2451   rtx x = NULL_RTX;
2452   int *regno = data;
2453
2454   switch (TREE_CODE (*expr_p))
2455     {
2456     case ADDR_EXPR:
2457       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2458            handled_component_p (*expr_p);
2459            expr_p = &TREE_OPERAND (*expr_p, 0))
2460         continue;
2461       obj = *expr_p;
2462       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2463         x = produce_memory_decl_rtl (obj, regno);
2464       break;
2465
2466     case SSA_NAME:
2467       *ws = 0;
2468       obj = SSA_NAME_VAR (*expr_p);
2469       if (!DECL_RTL_SET_P (obj))
2470         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2471       break;
2472
2473     case VAR_DECL:
2474     case PARM_DECL:
2475     case RESULT_DECL:
2476       *ws = 0;
2477       obj = *expr_p;
2478
2479       if (DECL_RTL_SET_P (obj))
2480         break;
2481
2482       if (DECL_MODE (obj) == BLKmode)
2483         x = produce_memory_decl_rtl (obj, regno);
2484       else
2485         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2486
2487       break;
2488
2489     default:
2490       break;
2491     }
2492
2493   if (x)
2494     {
2495       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2496       SET_DECL_RTL (obj, x);
2497     }
2498
2499   return NULL_TREE;
2500 }
2501
2502 /* Determines cost of the computation of EXPR.  */
2503
2504 static unsigned
2505 computation_cost (tree expr)
2506 {
2507   rtx seq, rslt;
2508   tree type = TREE_TYPE (expr);
2509   unsigned cost;
2510   /* Avoid using hard regs in ways which may be unsupported.  */
2511   int regno = LAST_VIRTUAL_REGISTER + 1;
2512
2513   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2514   start_sequence ();
2515   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2516   seq = get_insns ();
2517   end_sequence ();
2518
2519   cost = seq_cost (seq);
2520   if (MEM_P (rslt))
2521     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2522
2523   return cost;
2524 }
2525
2526 /* Returns variable containing the value of candidate CAND at statement AT.  */
2527
2528 static tree
2529 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2530 {
2531   if (stmt_after_increment (loop, cand, stmt))
2532     return cand->var_after;
2533   else
2534     return cand->var_before;
2535 }
2536
2537 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2538    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2539
2540 int
2541 tree_int_cst_sign_bit (tree t)
2542 {
2543   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2544   unsigned HOST_WIDE_INT w;
2545
2546   if (bitno < HOST_BITS_PER_WIDE_INT)
2547     w = TREE_INT_CST_LOW (t);
2548   else
2549     {
2550       w = TREE_INT_CST_HIGH (t);
2551       bitno -= HOST_BITS_PER_WIDE_INT;
2552     }
2553
2554   return (w >> bitno) & 1;
2555 }
2556
2557 /* If we can prove that TOP = cst * BOT for some constant cst,
2558    store cst to MUL and return true.  Otherwise return false.
2559    The returned value is always sign-extended, regardless of the
2560    signedness of TOP and BOT.  */
2561
2562 static bool
2563 constant_multiple_of (tree top, tree bot, double_int *mul)
2564 {
2565   tree mby;
2566   enum tree_code code;
2567   double_int res, p0, p1;
2568   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2569
2570   STRIP_NOPS (top);
2571   STRIP_NOPS (bot);
2572
2573   if (operand_equal_p (top, bot, 0))
2574     {
2575       *mul = double_int_one;
2576       return true;
2577     }
2578
2579   code = TREE_CODE (top);
2580   switch (code)
2581     {
2582     case MULT_EXPR:
2583       mby = TREE_OPERAND (top, 1);
2584       if (TREE_CODE (mby) != INTEGER_CST)
2585         return false;
2586
2587       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2588         return false;
2589
2590       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2591                               precision);
2592       return true;
2593
2594     case PLUS_EXPR:
2595     case MINUS_EXPR:
2596       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2597           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2598         return false;
2599
2600       if (code == MINUS_EXPR)
2601         p1 = double_int_neg (p1);
2602       *mul = double_int_sext (double_int_add (p0, p1), precision);
2603       return true;
2604
2605     case INTEGER_CST:
2606       if (TREE_CODE (bot) != INTEGER_CST)
2607         return false;
2608
2609       p0 = double_int_sext (tree_to_double_int (bot), precision);
2610       p1 = double_int_sext (tree_to_double_int (top), precision);
2611       if (double_int_zero_p (p1))
2612         return false;
2613       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2614                               precision);
2615       return double_int_zero_p (res);
2616
2617     default:
2618       return false;
2619     }
2620 }
2621
2622 /* Sets COMB to CST.  */
2623
2624 static void
2625 aff_combination_const (struct affine_tree_combination *comb, tree type,
2626                        unsigned HOST_WIDE_INT cst)
2627 {
2628   unsigned prec = TYPE_PRECISION (type);
2629
2630   comb->type = type;
2631   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2632
2633   comb->n = 0;
2634   comb->rest = NULL_TREE;
2635   comb->offset = cst & comb->mask;
2636 }
2637
2638 /* Sets COMB to single element ELT.  */
2639
2640 static void
2641 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2642 {
2643   unsigned prec = TYPE_PRECISION (type);
2644
2645   comb->type = type;
2646   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2647
2648   comb->n = 1;
2649   comb->elts[0] = elt;
2650   comb->coefs[0] = 1;
2651   comb->rest = NULL_TREE;
2652   comb->offset = 0;
2653 }
2654
2655 /* Scales COMB by SCALE.  */
2656
2657 static void
2658 aff_combination_scale (struct affine_tree_combination *comb,
2659                        unsigned HOST_WIDE_INT scale)
2660 {
2661   unsigned i, j;
2662
2663   if (scale == 1)
2664     return;
2665
2666   if (scale == 0)
2667     {
2668       aff_combination_const (comb, comb->type, 0);
2669       return;
2670     }
2671
2672   comb->offset = (scale * comb->offset) & comb->mask;
2673   for (i = 0, j = 0; i < comb->n; i++)
2674     {
2675       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2676       comb->elts[j] = comb->elts[i];
2677       if (comb->coefs[j] != 0)
2678         j++;
2679     }
2680   comb->n = j;
2681
2682   if (comb->rest)
2683     {
2684       if (comb->n < MAX_AFF_ELTS)
2685         {
2686           comb->coefs[comb->n] = scale;
2687           comb->elts[comb->n] = comb->rest;
2688           comb->rest = NULL_TREE;
2689           comb->n++;
2690         }
2691       else
2692         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2693                                   build_int_cst_type (comb->type, scale));
2694     }
2695 }
2696
2697 /* Adds ELT * SCALE to COMB.  */
2698
2699 static void
2700 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2701                          unsigned HOST_WIDE_INT scale)
2702 {
2703   unsigned i;
2704
2705   if (scale == 0)
2706     return;
2707
2708   for (i = 0; i < comb->n; i++)
2709     if (operand_equal_p (comb->elts[i], elt, 0))
2710       {
2711         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2712         if (comb->coefs[i])
2713           return;
2714
2715         comb->n--;
2716         comb->coefs[i] = comb->coefs[comb->n];
2717         comb->elts[i] = comb->elts[comb->n];
2718
2719         if (comb->rest)
2720           {
2721             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2722             comb->coefs[comb->n] = 1;
2723             comb->elts[comb->n] = comb->rest;
2724             comb->rest = NULL_TREE;
2725             comb->n++;
2726           }
2727         return;
2728       }
2729   if (comb->n < MAX_AFF_ELTS)
2730     {
2731       comb->coefs[comb->n] = scale;
2732       comb->elts[comb->n] = elt;
2733       comb->n++;
2734       return;
2735     }
2736
2737   if (scale == 1)
2738     elt = fold_convert (comb->type, elt);
2739   else
2740     elt = fold_build2 (MULT_EXPR, comb->type,
2741                        fold_convert (comb->type, elt),
2742                        build_int_cst_type (comb->type, scale));
2743
2744   if (comb->rest)
2745     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2746   else
2747     comb->rest = elt;
2748 }
2749
2750 /* Adds COMB2 to COMB1.  */
2751
2752 static void
2753 aff_combination_add (struct affine_tree_combination *comb1,
2754                      struct affine_tree_combination *comb2)
2755 {
2756   unsigned i;
2757
2758   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2759   for (i = 0; i < comb2->n; i++)
2760     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2761   if (comb2->rest)
2762     aff_combination_add_elt (comb1, comb2->rest, 1);
2763 }
2764
2765 /* Convert COMB to TYPE.  */
2766
2767 static void
2768 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2769 {
2770   unsigned prec = TYPE_PRECISION (type);
2771   unsigned i;
2772
2773   /* If the precision of both types is the same, it suffices to change the type
2774      of the whole combination -- the elements are allowed to have another type
2775      equivalent wrto STRIP_NOPS.  */
2776   if (prec == TYPE_PRECISION (comb->type))
2777     {
2778       comb->type = type;
2779       return;
2780     }
2781
2782   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2783   comb->offset = comb->offset & comb->mask;
2784
2785   /* The type of the elements can be different from comb->type only as
2786      much as what STRIP_NOPS would remove.  We can just directly cast
2787      to TYPE.  */
2788   for (i = 0; i < comb->n; i++)
2789     comb->elts[i] = fold_convert (type, comb->elts[i]);
2790   if (comb->rest)
2791     comb->rest = fold_convert (type, comb->rest);
2792
2793   comb->type = type;
2794 }
2795
2796 /* Splits EXPR into an affine combination of parts.  */
2797
2798 static void
2799 tree_to_aff_combination (tree expr, tree type,
2800                          struct affine_tree_combination *comb)
2801 {
2802   struct affine_tree_combination tmp;
2803   enum tree_code code;
2804   tree cst, core, toffset;
2805   HOST_WIDE_INT bitpos, bitsize;
2806   enum machine_mode mode;
2807   int unsignedp, volatilep;
2808
2809   STRIP_NOPS (expr);
2810
2811   code = TREE_CODE (expr);
2812   switch (code)
2813     {
2814     case INTEGER_CST:
2815       aff_combination_const (comb, type, int_cst_value (expr));
2816       return;
2817
2818     case PLUS_EXPR:
2819     case MINUS_EXPR:
2820       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2821       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2822       if (code == MINUS_EXPR)
2823         aff_combination_scale (&tmp, -1);
2824       aff_combination_add (comb, &tmp);
2825       return;
2826
2827     case MULT_EXPR:
2828       cst = TREE_OPERAND (expr, 1);
2829       if (TREE_CODE (cst) != INTEGER_CST)
2830         break;
2831       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2832       aff_combination_scale (comb, int_cst_value (cst));
2833       return;
2834
2835     case NEGATE_EXPR:
2836       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2837       aff_combination_scale (comb, -1);
2838       return;
2839
2840     case ADDR_EXPR:
2841       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2842                                   &toffset, &mode, &unsignedp, &volatilep,
2843                                   false);
2844       if (bitpos % BITS_PER_UNIT != 0)
2845         break;
2846       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2847       core = build_fold_addr_expr (core);
2848       if (TREE_CODE (core) == ADDR_EXPR)
2849         aff_combination_add_elt (comb, core, 1);
2850       else
2851         {
2852           tree_to_aff_combination (core, type, &tmp);
2853           aff_combination_add (comb, &tmp);
2854         }
2855       if (toffset)
2856         {
2857           tree_to_aff_combination (toffset, type, &tmp);
2858           aff_combination_add (comb, &tmp);
2859         }
2860       return;
2861
2862     default:
2863       break;
2864     }
2865
2866   aff_combination_elt (comb, type, expr);
2867 }
2868
2869 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2870
2871 static tree
2872 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2873                  unsigned HOST_WIDE_INT mask)
2874 {
2875   enum tree_code code;
2876
2877   scale &= mask;
2878   elt = fold_convert (type, elt);
2879
2880   if (scale == 1)
2881     {
2882       if (!expr)
2883         return elt;
2884
2885       return fold_build2 (PLUS_EXPR, type, expr, elt);
2886     }
2887
2888   if (scale == mask)
2889     {
2890       if (!expr)
2891         return fold_build1 (NEGATE_EXPR, type, elt);
2892
2893       return fold_build2 (MINUS_EXPR, type, expr, elt);
2894     }
2895
2896   if (!expr)
2897     return fold_build2 (MULT_EXPR, type, elt,
2898                         build_int_cst_type (type, scale));
2899
2900   if ((scale | (mask >> 1)) == mask)
2901     {
2902       /* Scale is negative.  */
2903       code = MINUS_EXPR;
2904       scale = (-scale) & mask;
2905     }
2906   else
2907     code = PLUS_EXPR;
2908
2909   elt = fold_build2 (MULT_EXPR, type, elt,
2910                      build_int_cst_type (type, scale));
2911   return fold_build2 (code, type, expr, elt);
2912 }
2913
2914 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2915
2916 static void
2917 unshare_aff_combination (struct affine_tree_combination *comb)
2918 {
2919   unsigned i;
2920
2921   for (i = 0; i < comb->n; i++)
2922     comb->elts[i] = unshare_expr (comb->elts[i]);
2923   if (comb->rest)
2924     comb->rest = unshare_expr (comb->rest);
2925 }
2926
2927 /* Makes tree from the affine combination COMB.  */
2928
2929 static tree
2930 aff_combination_to_tree (struct affine_tree_combination *comb)
2931 {
2932   tree type = comb->type;
2933   tree expr = comb->rest;
2934   unsigned i;
2935   unsigned HOST_WIDE_INT off, sgn;
2936
2937   if (comb->n == 0 && comb->offset == 0)
2938     {
2939       if (expr)
2940         {
2941           /* Handle the special case produced by get_computation_aff when
2942              the type does not fit in HOST_WIDE_INT.  */
2943           return fold_convert (type, expr);
2944         }
2945       else
2946         return build_int_cst (type, 0);
2947     }
2948
2949   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2950
2951   for (i = 0; i < comb->n; i++)
2952     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2953                             comb->mask);
2954
2955   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2956     {
2957       /* Offset is negative.  */
2958       off = (-comb->offset) & comb->mask;
2959       sgn = comb->mask;
2960     }
2961   else
2962     {
2963       off = comb->offset;
2964       sgn = 1;
2965     }
2966   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2967                           comb->mask);
2968 }
2969
2970 /* Folds EXPR using the affine expressions framework.  */
2971
2972 static tree
2973 fold_affine_expr (tree expr)
2974 {
2975   tree type = TREE_TYPE (expr);
2976   struct affine_tree_combination comb;
2977
2978   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2979     return expr;
2980
2981   tree_to_aff_combination (expr, type, &comb);
2982   return aff_combination_to_tree (&comb);
2983 }
2984
2985 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2986    same precision that is at least as wide as the precision of TYPE, stores
2987    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2988    type of A and B.  */
2989
2990 static tree
2991 determine_common_wider_type (tree *a, tree *b)
2992 {
2993   tree wider_type = NULL;
2994   tree suba, subb;
2995   tree atype = TREE_TYPE (*a);
2996
2997   if ((TREE_CODE (*a) == NOP_EXPR
2998        || TREE_CODE (*a) == CONVERT_EXPR))
2999     {
3000       suba = TREE_OPERAND (*a, 0);
3001       wider_type = TREE_TYPE (suba);
3002       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3003         return atype;
3004     }
3005   else
3006     return atype;
3007
3008   if ((TREE_CODE (*b) == NOP_EXPR
3009        || TREE_CODE (*b) == CONVERT_EXPR))
3010     {
3011       subb = TREE_OPERAND (*b, 0);
3012       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3013         return atype;
3014     }
3015   else
3016     return atype;
3017
3018   *a = suba;
3019   *b = subb;
3020   return wider_type;
3021 }
3022
3023 /* Determines the expression by that USE is expressed from induction variable
3024    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3025    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3026
3027 static bool
3028 get_computation_aff (struct loop *loop,
3029                      struct iv_use *use, struct iv_cand *cand, tree at,
3030                      struct affine_tree_combination *aff)
3031 {
3032   tree ubase = use->iv->base;
3033   tree ustep = use->iv->step;
3034   tree cbase = cand->iv->base;
3035   tree cstep = cand->iv->step;
3036   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3037   tree common_type;
3038   tree uutype;
3039   tree expr, delta;
3040   tree ratio;
3041   unsigned HOST_WIDE_INT ustepi, cstepi;
3042   HOST_WIDE_INT ratioi;
3043   struct affine_tree_combination cbase_aff, expr_aff;
3044   tree cstep_orig = cstep, ustep_orig = ustep;
3045   double_int rat;
3046
3047   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3048     {
3049       /* We do not have a precision to express the values of use.  */
3050       return false;
3051     }
3052
3053   expr = var_at_stmt (loop, cand, at);
3054
3055   if (TREE_TYPE (expr) != ctype)
3056     {
3057       /* This may happen with the original ivs.  */
3058       expr = fold_convert (ctype, expr);
3059     }
3060
3061   if (TYPE_UNSIGNED (utype))
3062     uutype = utype;
3063   else
3064     {
3065       uutype = unsigned_type_for (utype);
3066       ubase = fold_convert (uutype, ubase);
3067       ustep = fold_convert (uutype, ustep);
3068     }
3069
3070   if (uutype != ctype)
3071     {
3072       expr = fold_convert (uutype, expr);
3073       cbase = fold_convert (uutype, cbase);
3074       cstep = fold_convert (uutype, cstep);
3075
3076       /* If the conversion is not noop, we must take it into account when
3077          considering the value of the step.  */
3078       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3079         cstep_orig = cstep;
3080     }
3081
3082   if (cst_and_fits_in_hwi (cstep_orig)
3083       && cst_and_fits_in_hwi (ustep_orig))
3084     {
3085       ustepi = int_cst_value (ustep_orig);
3086       cstepi = int_cst_value (cstep_orig);
3087
3088       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3089         {
3090           /* TODO maybe consider case when ustep divides cstep and the ratio is
3091              a power of 2 (so that the division is fast to execute)?  We would
3092              need to be much more careful with overflows etc. then.  */
3093           return false;
3094         }
3095
3096       ratio = build_int_cst_type (uutype, ratioi);
3097     }
3098   else
3099     {
3100       if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3101         return false;
3102       ratio = double_int_to_tree (uutype, rat);
3103
3104       /* Ratioi is only used to detect special cases when the multiplicative
3105          factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3106          set it to 0.  */
3107       if (double_int_fits_in_shwi_p (rat))
3108         ratioi = double_int_to_shwi (rat);
3109       else
3110         ratioi = 0;
3111     }
3112
3113   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3114      type, we achieve better folding by computing their difference in this
3115      wider type, and cast the result to UUTYPE.  We do not need to worry about
3116      overflows, as all the arithmetics will in the end be performed in UUTYPE
3117      anyway.  */
3118   common_type = determine_common_wider_type (&ubase, &cbase);
3119
3120   /* We may need to shift the value if we are after the increment.  */
3121   if (stmt_after_increment (loop, cand, at))
3122     {
3123       if (uutype != common_type)
3124         cstep = fold_convert (common_type, cstep);
3125       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3126     }
3127
3128   /* use = ubase - ratio * cbase + ratio * var.
3129
3130      In general case ubase + ratio * (var - cbase) could be better (one less
3131      multiplication), but often it is possible to eliminate redundant parts
3132      of computations from (ubase - ratio * cbase) term, and if it does not
3133      happen, fold is able to apply the distributive law to obtain this form
3134      anyway.  */
3135
3136   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3137     {
3138       /* Let's compute in trees and just return the result in AFF.  This case
3139          should not be very common, and fold itself is not that bad either,
3140          so making the aff. functions more complicated to handle this case
3141          is not that urgent.  */
3142       if (ratioi == 1)
3143         {
3144           delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3145           if (uutype != common_type)
3146             delta = fold_convert (uutype, delta);
3147           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3148         }
3149       else if (ratioi == -1)
3150         {
3151           delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3152           if (uutype != common_type)
3153             delta = fold_convert (uutype, delta);
3154           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3155         }
3156       else
3157         {
3158           delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3159           delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3160           if (uutype != common_type)
3161             delta = fold_convert (uutype, delta);
3162           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3163           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3164         }
3165
3166       aff->type = uutype;
3167       aff->n = 0;
3168       aff->offset = 0;
3169       aff->mask = 0;
3170       aff->rest = expr;
3171       return true;
3172     }
3173
3174   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3175      possible to compute ratioi.  */
3176   gcc_assert (ratioi);
3177
3178   tree_to_aff_combination (ubase, common_type, aff);
3179   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3180   tree_to_aff_combination (expr, uutype, &expr_aff);
3181   aff_combination_scale (&cbase_aff, -ratioi);
3182   aff_combination_scale (&expr_aff, ratioi);
3183   aff_combination_add (aff, &cbase_aff);
3184   if (common_type != uutype)
3185     aff_combination_convert (uutype, aff);
3186   aff_combination_add (aff, &expr_aff);
3187
3188   return true;
3189 }
3190
3191 /* Determines the expression by that USE is expressed from induction variable
3192    CAND at statement AT in LOOP.  The computation is unshared.  */
3193
3194 static tree
3195 get_computation_at (struct loop *loop,
3196                     struct iv_use *use, struct iv_cand *cand, tree at)
3197 {
3198   struct affine_tree_combination aff;
3199   tree type = TREE_TYPE (use->iv->base);
3200
3201   if (!get_computation_aff (loop, use, cand, at, &aff))
3202     return NULL_TREE;
3203   unshare_aff_combination (&aff);
3204   return fold_convert (type, aff_combination_to_tree (&aff));
3205 }
3206
3207 /* Determines the expression by that USE is expressed from induction variable
3208    CAND in LOOP.  The computation is unshared.  */
3209
3210 static tree
3211 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3212 {
3213   return get_computation_at (loop, use, cand, use->stmt);
3214 }
3215
3216 /* Returns cost of addition in MODE.  */
3217
3218 static unsigned
3219 add_cost (enum machine_mode mode)
3220 {
3221   static unsigned costs[NUM_MACHINE_MODES];
3222   rtx seq;
3223   unsigned cost;
3224
3225   if (costs[mode])
3226     return costs[mode];
3227
3228   start_sequence ();
3229   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3230                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3231                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3232                  NULL_RTX);
3233   seq = get_insns ();
3234   end_sequence ();
3235
3236   cost = seq_cost (seq);
3237   if (!cost)
3238     cost = 1;
3239
3240   costs[mode] = cost;
3241
3242   if (dump_file && (dump_flags & TDF_DETAILS))
3243     fprintf (dump_file, "Addition in %s costs %d\n",
3244              GET_MODE_NAME (mode), cost);
3245   return cost;
3246 }
3247
3248 /* Entry in a hashtable of already known costs for multiplication.  */
3249 struct mbc_entry
3250 {
3251   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3252   enum machine_mode mode;       /* In mode.  */
3253   unsigned cost;                /* The cost.  */
3254 };
3255
3256 /* Counts hash value for the ENTRY.  */
3257
3258 static hashval_t
3259 mbc_entry_hash (const void *entry)
3260 {
3261   const struct mbc_entry *e = entry;
3262
3263   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3264 }
3265
3266 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3267
3268 static int
3269 mbc_entry_eq (const void *entry1, const void *entry2)
3270 {
3271   const struct mbc_entry *e1 = entry1;
3272   const struct mbc_entry *e2 = entry2;
3273
3274   return (e1->mode == e2->mode
3275           && e1->cst == e2->cst);
3276 }
3277
3278 /* Returns cost of multiplication by constant CST in MODE.  */
3279
3280 unsigned
3281 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3282 {
3283   static htab_t costs;
3284   struct mbc_entry **cached, act;
3285   rtx seq;
3286   unsigned cost;
3287
3288   if (!costs)
3289     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3290
3291   act.mode = mode;
3292   act.cst = cst;
3293   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3294   if (*cached)
3295     return (*cached)->cost;
3296
3297   *cached = XNEW (struct mbc_entry);
3298   (*cached)->mode = mode;
3299   (*cached)->cst = cst;
3300
3301   start_sequence ();
3302   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3303                gen_int_mode (cst, mode), NULL_RTX, 0);
3304   seq = get_insns ();
3305   end_sequence ();
3306
3307   cost = seq_cost (seq);
3308
3309   if (dump_file && (dump_flags & TDF_DETAILS))
3310     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3311              (int) cst, GET_MODE_NAME (mode), cost);
3312
3313   (*cached)->cost = cost;
3314
3315   return cost;
3316 }
3317
3318 /* Returns true if multiplying by RATIO is allowed in address.  */
3319
3320 bool
3321 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3322 {
3323 #define MAX_RATIO 128
3324   static sbitmap valid_mult;
3325
3326   if (!valid_mult)
3327     {
3328       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3329       rtx addr;
3330       HOST_WIDE_INT i;
3331
3332       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3333       sbitmap_zero (valid_mult);
3334       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3335       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3336         {
3337           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3338           if (memory_address_p (Pmode, addr))
3339             SET_BIT (valid_mult, i + MAX_RATIO);
3340         }
3341
3342       if (dump_file && (dump_flags & TDF_DETAILS))
3343         {
3344           fprintf (dump_file, "  allowed multipliers:");
3345           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3346             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3347               fprintf (dump_file, " %d", (int) i);
3348           fprintf (dump_file, "\n");
3349           fprintf (dump_file, "\n");
3350         }
3351     }
3352
3353   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3354     return false;
3355
3356   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3357 }
3358
3359 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3360    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3361    variable is omitted.  The created memory accesses MODE.
3362
3363    TODO -- there must be some better way.  This all is quite crude.  */
3364
3365 static unsigned
3366 get_address_cost (bool symbol_present, bool var_present,
3367                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3368 {
3369   static bool initialized = false;
3370   static HOST_WIDE_INT rat, off;
3371   static HOST_WIDE_INT min_offset, max_offset;
3372   static unsigned costs[2][2][2][2];
3373   unsigned cost, acost;
3374   rtx seq, addr, base;
3375   bool offset_p, ratio_p;
3376   rtx reg1;
3377   HOST_WIDE_INT s_offset;
3378   unsigned HOST_WIDE_INT mask;
3379   unsigned bits;
3380
3381   if (!initialized)
3382     {
3383       HOST_WIDE_INT i;
3384       initialized = true;
3385
3386       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3387
3388       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3389       for (i = 1; i <= 1 << 20; i <<= 1)
3390         {
3391           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3392           if (!memory_address_p (Pmode, addr))
3393             break;
3394         }
3395       max_offset = i >> 1;
3396       off = max_offset;
3397
3398       for (i = 1; i <= 1 << 20; i <<= 1)
3399         {
3400           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3401           if (!memory_address_p (Pmode, addr))
3402             break;
3403         }
3404       min_offset = -(i >> 1);
3405
3406       if (dump_file && (dump_flags & TDF_DETAILS))
3407         {
3408           fprintf (dump_file, "get_address_cost:\n");
3409           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3410           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3411         }
3412
3413       rat = 1;
3414       for (i = 2; i <= MAX_RATIO; i++)
3415         if (multiplier_allowed_in_address_p (i))
3416           {
3417             rat = i;
3418             break;
3419           }
3420     }
3421
3422   bits = GET_MODE_BITSIZE (Pmode);
3423   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3424   offset &= mask;
3425   if ((offset >> (bits - 1) & 1))
3426     offset |= ~mask;
3427   s_offset = offset;
3428
3429   cost = 0;
3430   offset_p = (s_offset != 0
3431               && min_offset <= s_offset && s_offset <= max_offset);
3432   ratio_p = (ratio != 1
3433              && multiplier_allowed_in_address_p (ratio));
3434
3435   if (ratio != 1 && !ratio_p)
3436     cost += multiply_by_cost (ratio, Pmode);
3437
3438   if (s_offset && !offset_p && !symbol_present)
3439     {
3440       cost += add_cost (Pmode);
3441       var_present = true;
3442     }
3443
3444   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3445   if (!acost)
3446     {
3447       int old_cse_not_expected;
3448       acost = 0;
3449
3450       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3451       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3452       if (ratio_p)
3453         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3454
3455       if (var_present)
3456         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3457
3458       if (symbol_present)
3459         {
3460           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3461           if (offset_p)
3462             base = gen_rtx_fmt_e (CONST, Pmode,
3463                                   gen_rtx_fmt_ee (PLUS, Pmode,
3464                                                   base,
3465                                                   gen_int_mode (off, Pmode)));
3466         }
3467       else if (offset_p)
3468         base = gen_int_mode (off, Pmode);
3469       else
3470         base = NULL_RTX;
3471
3472       if (base)
3473         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3474
3475       start_sequence ();
3476       /* To avoid splitting addressing modes, pretend that no cse will
3477          follow.  */
3478       old_cse_not_expected = cse_not_expected;
3479       cse_not_expected = true;
3480       addr = memory_address (Pmode, addr);
3481       cse_not_expected = old_cse_not_expected;
3482       seq = get_insns ();
3483       end_sequence ();
3484
3485       acost = seq_cost (seq);
3486       acost += address_cost (addr, Pmode);
3487
3488       if (!acost)
3489         acost = 1;
3490       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3491     }
3492
3493   return cost + acost;
3494 }
3495
3496 /* Estimates cost of forcing expression EXPR into a variable.  */
3497
3498 unsigned
3499 force_expr_to_var_cost (tree expr)
3500 {
3501   static bool costs_initialized = false;
3502   static unsigned integer_cost;
3503   static unsigned symbol_cost;
3504   static unsigned address_cost;
3505   tree op0, op1;
3506   unsigned cost0, cost1, cost;
3507   enum machine_mode mode;
3508
3509   if (!costs_initialized)
3510     {
3511       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3512       rtx x = gen_rtx_MEM (DECL_MODE (var),
3513                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3514       tree addr;
3515       tree type = build_pointer_type (integer_type_node);
3516
3517       integer_cost = computation_cost (build_int_cst (integer_type_node,
3518                                                       2000));
3519
3520       SET_DECL_RTL (var, x);
3521       TREE_STATIC (var) = 1;
3522       addr = build1 (ADDR_EXPR, type, var);
3523       symbol_cost = computation_cost (addr) + 1;
3524
3525       address_cost
3526         = computation_cost (build2 (PLUS_EXPR, type,
3527                                     addr,
3528                                     build_int_cst (type, 2000))) + 1;
3529       if (dump_file && (dump_flags & TDF_DETAILS))
3530         {
3531           fprintf (dump_file, "force_expr_to_var_cost:\n");
3532           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3533           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3534           fprintf (dump_file, "  address %d\n", (int) address_cost);
3535           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3536           fprintf (dump_file, "\n");
3537         }
3538
3539       costs_initialized = true;
3540     }
3541
3542   STRIP_NOPS (expr);
3543
3544   if (SSA_VAR_P (expr))
3545     return 0;
3546
3547   if (TREE_INVARIANT (expr))
3548     {
3549       if (TREE_CODE (expr) == INTEGER_CST)
3550         return integer_cost;
3551
3552       if (TREE_CODE (expr) == ADDR_EXPR)
3553         {
3554           tree obj = TREE_OPERAND (expr, 0);
3555
3556           if (TREE_CODE (obj) == VAR_DECL
3557               || TREE_CODE (obj) == PARM_DECL
3558               || TREE_CODE (obj) == RESULT_DECL)
3559             return symbol_cost;
3560         }
3561
3562       return address_cost;
3563     }
3564
3565   switch (TREE_CODE (expr))
3566     {
3567     case PLUS_EXPR:
3568     case MINUS_EXPR:
3569     case MULT_EXPR:
3570       op0 = TREE_OPERAND (expr, 0);
3571       op1 = TREE_OPERAND (expr, 1);
3572       STRIP_NOPS (op0);
3573       STRIP_NOPS (op1);
3574
3575       if (is_gimple_val (op0))
3576         cost0 = 0;
3577       else
3578         cost0 = force_expr_to_var_cost (op0);
3579
3580       if (is_gimple_val (op1))
3581         cost1 = 0;
3582       else
3583         cost1 = force_expr_to_var_cost (op1);
3584
3585       break;
3586
3587     default:
3588       /* Just an arbitrary value, FIXME.  */
3589       return target_spill_cost;
3590     }
3591
3592   mode = TYPE_MODE (TREE_TYPE (expr));
3593   switch (TREE_CODE (expr))
3594     {
3595     case PLUS_EXPR:
3596     case MINUS_EXPR:
3597       cost = add_cost (mode);
3598       break;
3599
3600     case MULT_EXPR:
3601       if (cst_and_fits_in_hwi (op0))
3602         cost = multiply_by_cost (int_cst_value (op0), mode);
3603       else if (cst_and_fits_in_hwi (op1))
3604         cost = multiply_by_cost (int_cst_value (op1), mode);
3605       else
3606         return target_spill_cost;
3607       break;
3608
3609     default:
3610       gcc_unreachable ();
3611     }
3612
3613   cost += cost0;
3614   cost += cost1;
3615
3616   /* Bound the cost by target_spill_cost.  The parts of complicated
3617      computations often are either loop invariant or at least can
3618      be shared between several iv uses, so letting this grow without
3619      limits would not give reasonable results.  */
3620   return cost < target_spill_cost ? cost : target_spill_cost;
3621 }
3622
3623 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3624    invariants the computation depends on.  */
3625
3626 static unsigned
3627 force_var_cost (struct ivopts_data *data,
3628                 tree expr, bitmap *depends_on)
3629 {
3630   if (depends_on)
3631     {
3632       fd_ivopts_data = data;
3633       walk_tree (&expr, find_depends, depends_on, NULL);
3634     }
3635
3636   return force_expr_to_var_cost (expr);
3637 }
3638
3639 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3640    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3641    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3642    invariants the computation depends on.  */
3643
3644 static unsigned
3645 split_address_cost (struct ivopts_data *data,
3646                     tree addr, bool *symbol_present, bool *var_present,
3647                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3648 {
3649   tree core;
3650   HOST_WIDE_INT bitsize;
3651   HOST_WIDE_INT bitpos;
3652   tree toffset;
3653   enum machine_mode mode;
3654   int unsignedp, volatilep;
3655
3656   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3657                               &unsignedp, &volatilep, false);
3658
3659   if (toffset != 0
3660       || bitpos % BITS_PER_UNIT != 0
3661       || TREE_CODE (core) != VAR_DECL)
3662     {
3663       *symbol_present = false;
3664       *var_present = true;
3665       fd_ivopts_data = data;
3666       walk_tree (&addr, find_depends, depends_on, NULL);
3667       return target_spill_cost;
3668     }
3669
3670   *offset += bitpos / BITS_PER_UNIT;
3671   if (TREE_STATIC (core)
3672       || DECL_EXTERNAL (core))
3673     {
3674       *symbol_present = true;
3675       *var_present = false;
3676       return 0;
3677     }
3678
3679   *symbol_present = false;
3680   *var_present = true;
3681   return 0;
3682 }
3683
3684 /* Estimates cost of expressing difference of addresses E1 - E2 as
3685    var + symbol + offset.  The value of offset is added to OFFSET,
3686    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3687    part is missing.  DEPENDS_ON is a set of the invariants the computation
3688    depends on.  */
3689
3690 static unsigned
3691 ptr_difference_cost (struct ivopts_data *data,
3692                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3693                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3694 {
3695   HOST_WIDE_INT diff = 0;
3696   unsigned cost;
3697
3698   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3699
3700   if (ptr_difference_const (e1, e2, &diff))
3701     {
3702       *offset += diff;
3703       *symbol_present = false;
3704       *var_present = false;
3705       return 0;
3706     }
3707
3708   if (e2 == integer_zero_node)
3709     return split_address_cost (data, TREE_OPERAND (e1, 0),
3710                                symbol_present, var_present, offset, depends_on);
3711
3712   *symbol_present = false;
3713   *var_present = true;
3714
3715   cost = force_var_cost (data, e1, depends_on);
3716   cost += force_var_cost (data, e2, depends_on);
3717   cost += add_cost (Pmode);
3718
3719   return cost;
3720 }
3721
3722 /* Estimates cost of expressing difference E1 - E2 as
3723    var + symbol + offset.  The value of offset is added to OFFSET,
3724    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3725    part is missing.  DEPENDS_ON is a set of the invariants the computation
3726    depends on.  */
3727
3728 static unsigned
3729 difference_cost (struct ivopts_data *data,
3730                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3731                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3732 {
3733   unsigned cost;
3734   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3735   unsigned HOST_WIDE_INT off1, off2;
3736
3737   e1 = strip_offset (e1, &off1);
3738   e2 = strip_offset (e2, &off2);
3739   *offset += off1 - off2;
3740
3741   STRIP_NOPS (e1);
3742   STRIP_NOPS (e2);
3743
3744   if (TREE_CODE (e1) == ADDR_EXPR)
3745     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3746                                 depends_on);
3747   *symbol_present = false;
3748
3749   if (operand_equal_p (e1, e2, 0))
3750     {
3751       *var_present = false;
3752       return 0;
3753     }
3754   *var_present = true;
3755   if (zero_p (e2))
3756     return force_var_cost (data, e1, depends_on);
3757
3758   if (zero_p (e1))
3759     {
3760       cost = force_var_cost (data, e2, depends_on);
3761       cost += multiply_by_cost (-1, mode);
3762
3763       return cost;
3764     }
3765
3766   cost = force_var_cost (data, e1, depends_on);
3767   cost += force_var_cost (data, e2, depends_on);
3768   cost += add_cost (mode);
3769
3770   return cost;
3771 }
3772
3773 /* Determines the cost of the computation by that USE is expressed
3774    from induction variable CAND.  If ADDRESS_P is true, we just need
3775    to create an address from it, otherwise we want to get it into
3776    register.  A set of invariants we depend on is stored in
3777    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3778
3779 static unsigned
3780 get_computation_cost_at (struct ivopts_data *data,
3781                          struct iv_use *use, struct iv_cand *cand,
3782                          bool address_p, bitmap *depends_on, tree at)
3783 {
3784   tree ubase = use->iv->base, ustep = use->iv->step;
3785   tree cbase, cstep;
3786   tree utype = TREE_TYPE (ubase), ctype;
3787   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3788   HOST_WIDE_INT ratio, aratio;
3789   bool var_present, symbol_present;
3790   unsigned cost = 0, n_sums;
3791
3792   *depends_on = NULL;
3793
3794   /* Only consider real candidates.  */
3795   if (!cand->iv)
3796     return INFTY;
3797
3798   cbase = cand->iv->base;
3799   cstep = cand->iv->step;
3800   ctype = TREE_TYPE (cbase);
3801
3802   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3803     {
3804       /* We do not have a precision to express the values of use.  */
3805       return INFTY;
3806     }
3807
3808   if (address_p)
3809     {
3810       /* Do not try to express address of an object with computation based
3811          on address of a different object.  This may cause problems in rtl
3812          level alias analysis (that does not expect this to be happening,
3813          as this is illegal in C), and would be unlikely to be useful
3814          anyway.  */
3815       if (use->iv->base_object
3816           && cand->iv->base_object
3817           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3818         return INFTY;
3819     }
3820
3821   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3822     {
3823       /* TODO -- add direct handling of this case.  */
3824       goto fallback;
3825     }
3826
3827   /* CSTEPI is removed from the offset in case statement is after the
3828      increment.  If the step is not constant, we use zero instead.
3829      This is a bit imprecise (there is the extra addition), but
3830      redundancy elimination is likely to transform the code so that
3831      it uses value of the variable before increment anyway,
3832      so it is not that much unrealistic.  */
3833   if (cst_and_fits_in_hwi (cstep))
3834     cstepi = int_cst_value (cstep);
3835   else
3836     cstepi = 0;
3837
3838   if (cst_and_fits_in_hwi (ustep)
3839       && cst_and_fits_in_hwi (cstep))
3840     {
3841       ustepi = int_cst_value (ustep);
3842
3843       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3844         return INFTY;
3845     }
3846   else
3847     {
3848       double_int rat;
3849
3850       if (!constant_multiple_of (ustep, cstep, &rat))
3851         return INFTY;
3852
3853       if (double_int_fits_in_shwi_p (rat))
3854         ratio = double_int_to_shwi (rat);
3855       else
3856         return INFTY;
3857     }
3858
3859   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3860      or ratio == 1, it is better to handle this like
3861
3862      ubase - ratio * cbase + ratio * var
3863
3864      (also holds in the case ratio == -1, TODO.  */
3865
3866   if (cst_and_fits_in_hwi (cbase))
3867     {
3868       offset = - ratio * int_cst_value (cbase);
3869       cost += difference_cost (data,
3870                                ubase, integer_zero_node,
3871                                &symbol_present, &var_present, &offset,
3872                                depends_on);
3873     }
3874   else if (ratio == 1)
3875     {
3876       cost += difference_cost (data,
3877                                ubase, cbase,
3878                                &symbol_present, &var_present, &offset,
3879                                depends_on);
3880     }
3881   else
3882     {
3883       cost += force_var_cost (data, cbase, depends_on);
3884       cost += add_cost (TYPE_MODE (ctype));
3885       cost += difference_cost (data,
3886                                ubase, integer_zero_node,
3887                                &symbol_present, &var_present, &offset,
3888                                depends_on);
3889     }
3890
3891   /* If we are after the increment, the value of the candidate is higher by
3892      one iteration.  */
3893   if (stmt_after_increment (data->current_loop, cand, at))
3894     offset -= ratio * cstepi;
3895
3896   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3897      (symbol/var/const parts may be omitted).  If we are looking for an address,
3898      find the cost of addressing this.  */
3899   if (address_p)
3900     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3901
3902   /* Otherwise estimate the costs for computing the expression.  */
3903   aratio = ratio > 0 ? ratio : -ratio;
3904   if (!symbol_present && !var_present && !offset)
3905     {
3906       if (ratio != 1)
3907         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3908
3909       return cost;
3910     }
3911
3912   if (aratio != 1)
3913     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3914
3915   n_sums = 1;
3916   if (var_present
3917       /* Symbol + offset should be compile-time computable.  */
3918       && (symbol_present || offset))
3919     n_sums++;
3920
3921   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3922
3923 fallback:
3924   {
3925     /* Just get the expression, expand it and measure the cost.  */
3926     tree comp = get_computation_at (data->current_loop, use, cand, at);
3927
3928     if (!comp)
3929       return INFTY;
3930
3931     if (address_p)
3932       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3933
3934     return computation_cost (comp);
3935   }
3936 }
3937
3938 /* Determines the cost of the computation by that USE is expressed
3939    from induction variable CAND.  If ADDRESS_P is true, we just need
3940    to create an address from it, otherwise we want to get it into
3941    register.  A set of invariants we depend on is stored in
3942    DEPENDS_ON.  */
3943
3944 static unsigned
3945 get_computation_cost (struct ivopts_data *data,
3946                       struct iv_use *use, struct iv_cand *cand,
3947                       bool address_p, bitmap *depends_on)
3948 {
3949   return get_computation_cost_at (data,
3950                                   use, cand, address_p, depends_on, use->stmt);
3951 }
3952
3953 /* Determines cost of basing replacement of USE on CAND in a generic
3954    expression.  */
3955
3956 static bool
3957 determine_use_iv_cost_generic (struct ivopts_data *data,
3958                                struct iv_use *use, struct iv_cand *cand)
3959 {
3960   bitmap depends_on;
3961   unsigned cost;
3962
3963   /* The simple case first -- if we need to express value of the preserved
3964      original biv, the cost is 0.  This also prevents us from counting the
3965      cost of increment twice -- once at this use and once in the cost of
3966      the candidate.  */
3967   if (cand->pos == IP_ORIGINAL
3968       && cand->incremented_at == use->stmt)
3969     {
3970       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3971       return true;
3972     }
3973
3974   cost = get_computation_cost (data, use, cand, false, &depends_on);
3975   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3976
3977   return cost != INFTY;
3978 }
3979
3980 /* Determines cost of basing replacement of USE on CAND in an address.  */
3981
3982 static bool
3983 determine_use_iv_cost_address (struct ivopts_data *data,
3984                                struct iv_use *use, struct iv_cand *cand)
3985 {
3986   bitmap depends_on;
3987   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3988
3989   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3990
3991   return cost != INFTY;
3992 }
3993
3994 /* Computes value of induction variable IV in iteration NITER.  */
3995
3996 static tree
3997 iv_value (struct iv *iv, tree niter)
3998 {
3999   tree val;
4000   tree type = TREE_TYPE (iv->base);
4001
4002   niter = fold_convert (type, niter);
4003   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4004
4005   return fold_build2 (PLUS_EXPR, type, iv->base, val);
4006 }
4007
4008 /* Computes value of candidate CAND at position AT in iteration NITER.  */
4009
4010 static tree
4011 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4012 {
4013   tree val = iv_value (cand->iv, niter);
4014   tree type = TREE_TYPE (cand->iv->base);
4015
4016   if (stmt_after_increment (loop, cand, at))
4017     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4018
4019   return val;
4020 }
4021
4022 /* Returns period of induction variable iv.  */
4023
4024 static tree
4025 iv_period (struct iv *iv)
4026 {
4027   tree step = iv->step, period, type;
4028   tree pow2div;
4029
4030   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4031
4032   /* Period of the iv is gcd (step, type range).  Since type range is power
4033      of two, it suffices to determine the maximum power of two that divides
4034      step.  */
4035   pow2div = num_ending_zeros (step);
4036   type = unsigned_type_for (TREE_TYPE (step));
4037
4038   period = build_low_bits_mask (type,
4039                                 (TYPE_PRECISION (type)
4040                                  - tree_low_cst (pow2div, 1)));
4041
4042   return period;
4043 }
4044
4045 /* Returns the comparison operator used when eliminating the iv USE.  */
4046
4047 static enum tree_code
4048 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4049 {
4050   struct loop *loop = data->current_loop;
4051   basic_block ex_bb;
4052   edge exit;
4053
4054   ex_bb = bb_for_stmt (use->stmt);
4055   exit = EDGE_SUCC (ex_bb, 0);
4056   if (flow_bb_inside_loop_p (loop, exit->dest))
4057     exit = EDGE_SUCC (ex_bb, 1);
4058
4059   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4060 }
4061
4062 /* Check whether it is possible to express the condition in USE by comparison
4063    of candidate CAND.  If so, store the value compared with to BOUND.  */
4064
4065 static bool
4066 may_eliminate_iv (struct ivopts_data *data,
4067                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4068 {
4069   basic_block ex_bb;
4070   edge exit;
4071   tree nit, nit_type;
4072   tree wider_type, period, per_type;
4073   struct loop *loop = data->current_loop;
4074
4075   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4076     return false;
4077
4078   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4079      for other conditions inside loop body.  */
4080   ex_bb = bb_for_stmt (use->stmt);
4081   if (use->stmt != last_stmt (ex_bb)
4082       || TREE_CODE (use->stmt) != COND_EXPR)
4083     return false;
4084   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4085     return false;
4086
4087   exit = EDGE_SUCC (ex_bb, 0);
4088   if (flow_bb_inside_loop_p (loop, exit->dest))
4089     exit = EDGE_SUCC (ex_bb, 1);
4090   if (flow_bb_inside_loop_p (loop, exit->dest))
4091     return false;
4092
4093   nit = niter_for_exit (data, exit);
4094   if (!nit)
4095     return false;
4096
4097   nit_type = TREE_TYPE (nit);
4098
4099   /* Determine whether we may use the variable to test whether niter iterations
4100      elapsed.  This is the case iff the period of the induction variable is
4101      greater than the number of iterations.  */
4102   period = iv_period (cand->iv);
4103   if (!period)
4104     return false;
4105   per_type = TREE_TYPE (period);
4106
4107   wider_type = TREE_TYPE (period);
4108   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4109     wider_type = per_type;
4110   else
4111     wider_type = nit_type;
4112
4113   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4114                                       fold_convert (wider_type, period),
4115                                       fold_convert (wider_type, nit))))
4116     return false;
4117
4118   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4119   return true;
4120 }
4121
4122 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4123
4124 static bool
4125 determine_use_iv_cost_condition (struct ivopts_data *data,
4126                                  struct iv_use *use, struct iv_cand *cand)
4127 {
4128   tree bound = NULL_TREE, op, cond;
4129   bitmap depends_on = NULL;
4130   unsigned cost;
4131
4132   /* Only consider real candidates.  */
4133   if (!cand->iv)
4134     {
4135       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4136       return false;
4137     }
4138
4139   if (may_eliminate_iv (data, use, cand, &bound))
4140     {
4141       cost = force_var_cost (data, bound, &depends_on);
4142
4143       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4144       return cost != INFTY;
4145     }
4146
4147   /* The induction variable elimination failed; just express the original
4148      giv.  If it is compared with an invariant, note that we cannot get
4149      rid of it.  */
4150   cost = get_computation_cost (data, use, cand, false, &depends_on);
4151
4152   cond = *use->op_p;
4153   if (TREE_CODE (cond) != SSA_NAME)
4154     {
4155       op = TREE_OPERAND (cond, 0);
4156       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4157         op = TREE_OPERAND (cond, 1);
4158       if (TREE_CODE (op) == SSA_NAME)
4159         {
4160           op = get_iv (data, op)->base;
4161           fd_ivopts_data = data;
4162           walk_tree (&op, find_depends, &depends_on, NULL);
4163         }
4164     }
4165
4166   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4167   return cost != INFTY;
4168 }
4169
4170 /* Determines cost of basing replacement of USE on CAND.  Returns false
4171    if USE cannot be based on CAND.  */
4172
4173 static bool
4174 determine_use_iv_cost (struct ivopts_data *data,
4175                        struct iv_use *use, struct iv_cand *cand)
4176 {
4177   switch (use->type)
4178     {
4179     case USE_NONLINEAR_EXPR:
4180       return determine_use_iv_cost_generic (data, use, cand);
4181
4182     case USE_ADDRESS:
4183       return determine_use_iv_cost_address (data, use, cand);
4184
4185     case USE_COMPARE:
4186       return determine_use_iv_cost_condition (data, use, cand);
4187
4188     default:
4189       gcc_unreachable ();
4190     }
4191 }
4192
4193 /* Determines costs of basing the use of the iv on an iv candidate.  */
4194
4195 static void
4196 determine_use_iv_costs (struct ivopts_data *data)
4197 {
4198   unsigned i, j;
4199   struct iv_use *use;
4200   struct iv_cand *cand;
4201   bitmap to_clear = BITMAP_ALLOC (NULL);
4202
4203   alloc_use_cost_map (data);
4204
4205   for (i = 0; i < n_iv_uses (data); i++)
4206     {
4207       use = iv_use (data, i);
4208
4209       if (data->consider_all_candidates)
4210         {
4211           for (j = 0; j < n_iv_cands (data); j++)
4212             {
4213               cand = iv_cand (data, j);
4214               determine_use_iv_cost (data, use, cand);
4215             }
4216         }
4217       else
4218         {
4219           bitmap_iterator bi;
4220
4221           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4222             {
4223               cand = iv_cand (data, j);
4224               if (!determine_use_iv_cost (data, use, cand))
4225                 bitmap_set_bit (to_clear, j);
4226             }
4227
4228           /* Remove the candidates for that the cost is infinite from
4229              the list of related candidates.  */
4230           bitmap_and_compl_into (use->related_cands, to_clear);
4231           bitmap_clear (to_clear);
4232         }
4233     }
4234
4235   BITMAP_FREE (to_clear);
4236
4237   if (dump_file && (dump_flags & TDF_DETAILS))
4238     {
4239       fprintf (dump_file, "Use-candidate costs:\n");
4240
4241       for (i = 0; i < n_iv_uses (data); i++)
4242         {
4243           use = iv_use (data, i);
4244
4245           fprintf (dump_file, "Use %d:\n", i);
4246           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4247           for (j = 0; j < use->n_map_members; j++)
4248             {
4249               if (!use->cost_map[j].cand
4250                   || use->cost_map[j].cost == INFTY)
4251                 continue;
4252
4253               fprintf (dump_file, "  %d\t%d\t",
4254                        use->cost_map[j].cand->id,
4255                        use->cost_map[j].cost);
4256               if (use->cost_map[j].depends_on)
4257                 bitmap_print (dump_file,
4258                               use->cost_map[j].depends_on, "","");
4259               fprintf (dump_file, "\n");
4260             }
4261
4262           fprintf (dump_file, "\n");
4263         }
4264       fprintf (dump_file, "\n");
4265     }
4266 }
4267
4268 /* Determines cost of the candidate CAND.  */
4269
4270 static void
4271 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4272 {
4273   unsigned cost_base, cost_step;
4274   tree base;
4275
4276   if (!cand->iv)
4277     {
4278       cand->cost = 0;
4279       return;
4280     }
4281
4282   /* There are two costs associated with the candidate -- its increment
4283      and its initialization.  The second is almost negligible for any loop
4284      that rolls enough, so we take it just very little into account.  */
4285
4286   base = cand->iv->base;
4287   cost_base = force_var_cost (data, base, NULL);
4288   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4289
4290   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4291
4292   /* Prefer the original iv unless we may gain something by replacing it;
4293      this is not really relevant for artificial ivs created by other
4294      passes.  */
4295   if (cand->pos == IP_ORIGINAL
4296       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4297     cand->cost--;
4298
4299   /* Prefer not to insert statements into latch unless there are some
4300      already (so that we do not create unnecessary jumps).  */
4301   if (cand->pos == IP_END
4302       && empty_block_p (ip_end_pos (data->current_loop)))
4303     cand->cost++;
4304 }
4305
4306 /* Determines costs of computation of the candidates.  */
4307
4308 static void
4309 determine_iv_costs (struct ivopts_data *data)
4310 {
4311   unsigned i;
4312
4313   if (dump_file && (dump_flags & TDF_DETAILS))
4314     {
4315       fprintf (dump_file, "Candidate costs:\n");
4316       fprintf (dump_file, "  cand\tcost\n");
4317     }
4318
4319   for (i = 0; i < n_iv_cands (data); i++)
4320     {
4321       struct iv_cand *cand = iv_cand (data, i);
4322
4323       determine_iv_cost (data, cand);
4324
4325       if (dump_file && (dump_flags & TDF_DETAILS))
4326         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4327     }
4328
4329 if (dump_file && (dump_flags & TDF_DETAILS))
4330       fprintf (dump_file, "\n");
4331 }
4332
4333 /* Calculates cost for having SIZE induction variables.  */
4334
4335 static unsigned
4336 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4337 {
4338   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4339 }
4340
4341 /* For each size of the induction variable set determine the penalty.  */
4342
4343 static void
4344 determine_set_costs (struct ivopts_data *data)
4345 {
4346   unsigned j, n;
4347   tree phi, op;
4348   struct loop *loop = data->current_loop;
4349   bitmap_iterator bi;
4350
4351   /* We use the following model (definitely improvable, especially the
4352      cost function -- TODO):
4353
4354      We estimate the number of registers available (using MD data), name it A.
4355
4356      We estimate the number of registers used by the loop, name it U.  This
4357      number is obtained as the number of loop phi nodes (not counting virtual
4358      registers and bivs) + the number of variables from outside of the loop.
4359
4360      We set a reserve R (free regs that are used for temporary computations,
4361      etc.).  For now the reserve is a constant 3.
4362
4363      Let I be the number of induction variables.
4364
4365      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4366         make a lot of ivs without a reason).
4367      -- if A - R < U + I <= A, the cost is I * PRES_COST
4368      -- if U + I > A, the cost is I * PRES_COST and
4369         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4370
4371   if (dump_file && (dump_flags & TDF_DETAILS))
4372     {
4373       fprintf (dump_file, "Global costs:\n");
4374       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4375       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4376       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4377       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4378     }
4379
4380   n = 0;
4381   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4382     {
4383       op = PHI_RESULT (phi);
4384
4385       if (!is_gimple_reg (op))
4386         continue;
4387
4388       if (get_iv (data, op))
4389         continue;
4390
4391       n++;
4392     }
4393
4394   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4395     {
4396       struct version_info *info = ver_info (data, j);
4397
4398       if (info->inv_id && info->has_nonlin_use)
4399         n++;
4400     }
4401
4402   data->regs_used = n;
4403   if (dump_file && (dump_flags & TDF_DETAILS))
4404     fprintf (dump_file, "  regs_used %d\n", n);
4405
4406   if (dump_file && (dump_flags & TDF_DETAILS))
4407     {
4408       fprintf (dump_file, "  cost for size:\n");
4409       fprintf (dump_file, "  ivs\tcost\n");
4410       for (j = 0; j <= 2 * target_avail_regs; j++)
4411         fprintf (dump_file, "  %d\t%d\n", j,
4412                  ivopts_global_cost_for_size (data, j));
4413       fprintf (dump_file, "\n");
4414     }
4415 }
4416
4417 /* Returns true if A is a cheaper cost pair than B.  */
4418
4419 static bool
4420 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4421 {
4422   if (!a)
4423     return false;
4424
4425   if (!b)
4426     return true;
4427
4428   if (a->cost < b->cost)
4429     return true;
4430
4431   if (a->cost > b->cost)
4432     return false;
4433
4434   /* In case the costs are the same, prefer the cheaper candidate.  */
4435   if (a->cand->cost < b->cand->cost)
4436     return true;
4437
4438   return false;
4439 }
4440
4441 /* Computes the cost field of IVS structure.  */
4442
4443 static void
4444 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4445 {
4446   unsigned cost = 0;
4447
4448   cost += ivs->cand_use_cost;
4449   cost += ivs->cand_cost;
4450   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4451
4452   ivs->cost = cost;
4453 }
4454
4455 /* Remove invariants in set INVS to set IVS.  */
4456
4457 static void
4458 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4459 {
4460   bitmap_iterator bi;
4461   unsigned iid;
4462
4463   if (!invs)
4464     return;
4465
4466   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4467     {
4468       ivs->n_invariant_uses[iid]--;
4469       if (ivs->n_invariant_uses[iid] == 0)
4470         ivs->n_regs--;
4471     }
4472 }
4473
4474 /* Set USE not to be expressed by any candidate in IVS.  */
4475
4476 static void
4477 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4478                  struct iv_use *use)
4479 {
4480   unsigned uid = use->id, cid;
4481   struct cost_pair *cp;
4482
4483   cp = ivs->cand_for_use[uid];
4484   if (!cp)
4485     return;
4486   cid = cp->cand->id;
4487
4488   ivs->bad_uses++;
4489   ivs->cand_for_use[uid] = NULL;
4490   ivs->n_cand_uses[cid]--;
4491
4492   if (ivs->n_cand_uses[cid] == 0)
4493     {
4494       bitmap_clear_bit (ivs->cands, cid);
4495       /* Do not count the pseudocandidates.  */
4496       if (cp->cand->iv)
4497         ivs->n_regs--;
4498       ivs->n_cands--;
4499       ivs->cand_cost -= cp->cand->cost;
4500
4501       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4502     }
4503
4504   ivs->cand_use_cost -= cp->cost;
4505
4506   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4507   iv_ca_recount_cost (data, ivs);
4508 }
4509
4510 /* Add invariants in set INVS to set IVS.  */
4511
4512 static void
4513 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4514 {
4515   bitmap_iterator bi;
4516   unsigned iid;
4517
4518   if (!invs)
4519     return;
4520
4521   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4522     {
4523       ivs->n_invariant_uses[iid]++;
4524       if (ivs->n_invariant_uses[iid] == 1)
4525         ivs->n_regs++;
4526     }
4527 }
4528
4529 /* Set cost pair for USE in set IVS to CP.  */
4530
4531 static void
4532 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4533               struct iv_use *use, struct cost_pair *cp)
4534 {
4535   unsigned uid = use->id, cid;
4536
4537   if (ivs->cand_for_use[uid] == cp)
4538     return;
4539
4540   if (ivs->cand_for_use[uid])
4541     iv_ca_set_no_cp (data, ivs, use);
4542
4543   if (cp)
4544     {
4545       cid = cp->cand->id;
4546
4547       ivs->bad_uses--;
4548       ivs->cand_for_use[uid] = cp;
4549       ivs->n_cand_uses[cid]++;
4550       if (ivs->n_cand_uses[cid] == 1)
4551         {
4552           bitmap_set_bit (ivs->cands, cid);
4553           /* Do not count the pseudocandidates.  */
4554           if (cp->cand->iv)
4555             ivs->n_regs++;
4556           ivs->n_cands++;
4557           ivs->cand_cost += cp->cand->cost;
4558
4559           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4560         }
4561
4562       ivs->cand_use_cost += cp->cost;
4563       iv_ca_set_add_invariants (ivs, cp->depends_on);
4564       iv_ca_recount_cost (data, ivs);
4565     }
4566 }
4567
4568 /* Extend set IVS by expressing USE by some of the candidates in it
4569    if possible.  */
4570
4571 static void
4572 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4573                struct iv_use *use)
4574 {
4575   struct cost_pair *best_cp = NULL, *cp;
4576   bitmap_iterator bi;
4577   unsigned i;
4578
4579   gcc_assert (ivs->upto >= use->id);
4580
4581   if (ivs->upto == use->id)
4582     {
4583       ivs->upto++;
4584       ivs->bad_uses++;
4585     }
4586
4587   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4588     {
4589       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4590
4591       if (cheaper_cost_pair (cp, best_cp))
4592         best_cp = cp;
4593     }
4594
4595   iv_ca_set_cp (data, ivs, use, best_cp);
4596 }
4597
4598 /* Get cost for assignment IVS.  */
4599
4600 static unsigned
4601 iv_ca_cost (struct iv_ca *ivs)
4602 {
4603   return (ivs->bad_uses ? INFTY : ivs->cost);
4604 }
4605
4606 /* Returns true if all dependences of CP are among invariants in IVS.  */
4607
4608 static bool
4609 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4610 {
4611   unsigned i;
4612   bitmap_iterator bi;
4613
4614   if (!cp->depends_on)
4615     return true;
4616
4617   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4618     {
4619       if (ivs->n_invariant_uses[i] == 0)
4620         return false;
4621     }
4622
4623   return true;
4624 }
4625
4626 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4627    it before NEXT_CHANGE.  */
4628
4629 static struct iv_ca_delta *
4630 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4631                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4632 {
4633   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4634
4635   change->use = use;
4636   change->old_cp = old_cp;
4637   change->new_cp = new_cp;
4638   change->next_change = next_change;
4639
4640   return change;
4641 }
4642
4643 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4644    are rewritten.  */
4645
4646 static struct iv_ca_delta *
4647 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4648 {
4649   struct iv_ca_delta *last;
4650
4651   if (!l2)
4652     return l1;
4653
4654   if (!l1)
4655     return l2;
4656
4657   for (last = l1; last->next_change; last = last->next_change)
4658     continue;
4659   last->next_change = l2;
4660
4661   return l1;
4662 }
4663
4664 /* Returns candidate by that USE is expressed in IVS.  */
4665
4666 static struct cost_pair *
4667 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4668 {
4669   return ivs->cand_for_use[use->id];
4670 }
4671
4672 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4673
4674 static struct iv_ca_delta *
4675 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4676 {
4677   struct iv_ca_delta *act, *next, *prev = NULL;
4678   struct cost_pair *tmp;
4679
4680   for (act = delta; act; act = next)
4681     {
4682       next = act->next_change;
4683       act->next_change = prev;
4684       prev = act;
4685
4686       tmp = act->old_cp;
4687       act->old_cp = act->new_cp;
4688       act->new_cp = tmp;
4689     }
4690
4691   return prev;
4692 }
4693
4694 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4695    reverted instead.  */
4696
4697 static void
4698 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4699                     struct iv_ca_delta *delta, bool forward)
4700 {
4701   struct cost_pair *from, *to;
4702   struct iv_ca_delta *act;
4703
4704   if (!forward)
4705     delta = iv_ca_delta_reverse (delta);
4706
4707   for (act = delta; act; act = act->next_change)
4708     {
4709       from = act->old_cp;
4710       to = act->new_cp;
4711       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4712       iv_ca_set_cp (data, ivs, act->use, to);
4713     }
4714
4715   if (!forward)
4716     iv_ca_delta_reverse (delta);
4717 }
4718
4719 /* Returns true if CAND is used in IVS.  */
4720
4721 static bool
4722 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4723 {
4724   return ivs->n_cand_uses[cand->id] > 0;
4725 }
4726
4727 /* Returns number of induction variable candidates in the set IVS.  */
4728
4729 static unsigned
4730 iv_ca_n_cands (struct iv_ca *ivs)
4731 {
4732   return ivs->n_cands;
4733 }
4734
4735 /* Free the list of changes DELTA.  */
4736
4737 static void
4738 iv_ca_delta_free (struct iv_ca_delta **delta)
4739 {
4740   struct iv_ca_delta *act, *next;
4741
4742   for (act = *delta; act; act = next)
4743     {
4744       next = act->next_change;
4745       free (act);
4746     }
4747
4748   *delta = NULL;
4749 }
4750
4751 /* Allocates new iv candidates assignment.  */
4752
4753 static struct iv_ca *
4754 iv_ca_new (struct ivopts_data *data)
4755 {
4756   struct iv_ca *nw = XNEW (struct iv_ca);
4757
4758   nw->upto = 0;
4759   nw->bad_uses = 0;
4760   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4761   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4762   nw->cands = BITMAP_ALLOC (NULL);
4763   nw->n_cands = 0;
4764   nw->n_regs = 0;
4765   nw->cand_use_cost = 0;
4766   nw->cand_cost = 0;
4767   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4768   nw->cost = 0;
4769
4770   return nw;
4771 }
4772
4773 /* Free memory occupied by the set IVS.  */
4774
4775 static void
4776 iv_ca_free (struct iv_ca **ivs)
4777 {
4778   free ((*ivs)->cand_for_use);
4779   free ((*ivs)->n_cand_uses);
4780   BITMAP_FREE ((*ivs)->cands);
4781   free ((*ivs)->n_invariant_uses);
4782   free (*ivs);
4783   *ivs = NULL;
4784 }
4785
4786 /* Dumps IVS to FILE.  */
4787
4788 static void
4789 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4790 {
4791   const char *pref = "  invariants ";
4792   unsigned i;
4793
4794   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4795   bitmap_print (file, ivs->cands, "  candidates ","\n");
4796
4797   for (i = 1; i <= data->max_inv_id; i++)
4798     if (ivs->n_invariant_uses[i])
4799       {
4800         fprintf (file, "%s%d", pref, i);
4801         pref = ", ";
4802       }
4803   fprintf (file, "\n");
4804 }
4805
4806 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4807    new set, and store differences in DELTA.  Number of induction variables
4808    in the new set is stored to N_IVS.  */
4809
4810 static unsigned
4811 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4812               struct iv_cand *cand, struct iv_ca_delta **delta,
4813               unsigned *n_ivs)
4814 {
4815   unsigned i, cost;
4816   struct iv_use *use;
4817   struct cost_pair *old_cp, *new_cp;
4818
4819   *delta = NULL;
4820   for (i = 0; i < ivs->upto; i++)
4821     {
4822       use = iv_use (data, i);
4823       old_cp = iv_ca_cand_for_use (ivs, use);
4824
4825       if (old_cp
4826           && old_cp->cand == cand)
4827         continue;
4828
4829       new_cp = get_use_iv_cost (data, use, cand);
4830       if (!new_cp)
4831         continue;
4832
4833       if (!iv_ca_has_deps (ivs, new_cp))
4834         continue;
4835
4836       if (!cheaper_cost_pair (new_cp, old_cp))
4837         continue;
4838
4839       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4840     }
4841
4842   iv_ca_delta_commit (data, ivs, *delta, true);
4843   cost = iv_ca_cost (ivs);
4844   if (n_ivs)
4845     *n_ivs = iv_ca_n_cands (ivs);
4846   iv_ca_delta_commit (data, ivs, *delta, false);
4847
4848   return cost;
4849 }
4850
4851 /* Try narrowing set IVS by removing CAND.  Return the cost of
4852    the new set and store the differences in DELTA.  */
4853
4854 static unsigned
4855 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4856               struct iv_cand *cand, struct iv_ca_delta **delta)
4857 {
4858   unsigned i, ci;
4859   struct iv_use *use;
4860   struct cost_pair *old_cp, *new_cp, *cp;
4861   bitmap_iterator bi;
4862   struct iv_cand *cnd;
4863   unsigned cost;
4864
4865   *delta = NULL;
4866   for (i = 0; i < n_iv_uses (data); i++)
4867     {
4868       use = iv_use (data, i);
4869
4870       old_cp = iv_ca_cand_for_use (ivs, use);
4871       if (old_cp->cand != cand)
4872         continue;
4873
4874       new_cp = NULL;
4875
4876       if (data->consider_all_candidates)
4877         {
4878           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4879             {
4880               if (ci == cand->id)
4881                 continue;
4882
4883               cnd = iv_cand (data, ci);
4884
4885               cp = get_use_iv_cost (data, use, cnd);
4886               if (!cp)
4887                 continue;
4888               if (!iv_ca_has_deps (ivs, cp))
4889                 continue;
4890
4891               if (!cheaper_cost_pair (cp, new_cp))
4892                 continue;
4893
4894               new_cp = cp;
4895             }
4896         }
4897       else
4898         {
4899           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4900             {
4901               if (ci == cand->id)
4902                 continue;
4903
4904               cnd = iv_cand (data, ci);
4905
4906               cp = get_use_iv_cost (data, use, cnd);
4907               if (!cp)
4908                 continue;
4909               if (!iv_ca_has_deps (ivs, cp))
4910                 continue;
4911
4912               if (!cheaper_cost_pair (cp, new_cp))
4913                 continue;
4914
4915               new_cp = cp;
4916             }
4917         }
4918
4919       if (!new_cp)
4920         {
4921           iv_ca_delta_free (delta);
4922           return INFTY;
4923         }
4924
4925       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4926     }
4927
4928   iv_ca_delta_commit (data, ivs, *delta, true);
4929   cost = iv_ca_cost (ivs);
4930   iv_ca_delta_commit (data, ivs, *delta, false);
4931
4932   return cost;
4933 }
4934
4935 /* Try optimizing the set of candidates IVS by removing candidates different
4936    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4937    differences in DELTA.  */
4938
4939 static unsigned
4940 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4941              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4942 {
4943   bitmap_iterator bi;
4944   struct iv_ca_delta *act_delta, *best_delta;
4945   unsigned i, best_cost, acost;
4946   struct iv_cand *cand;
4947
4948   best_delta = NULL;
4949   best_cost = iv_ca_cost (ivs);
4950
4951   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4952     {
4953       cand = iv_cand (data, i);
4954
4955       if (cand == except_cand)
4956         continue;
4957
4958       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4959
4960       if (acost < best_cost)
4961         {
4962           best_cost = acost;
4963           iv_ca_delta_free (&best_delta);
4964           best_delta = act_delta;
4965         }
4966       else
4967         iv_ca_delta_free (&act_delta);
4968     }
4969
4970   if (!best_delta)
4971     {
4972       *delta = NULL;
4973       return best_cost;
4974     }
4975
4976   /* Recurse to possibly remove other unnecessary ivs.  */
4977   iv_ca_delta_commit (data, ivs, best_delta, true);
4978   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4979   iv_ca_delta_commit (data, ivs, best_delta, false);
4980   *delta = iv_ca_delta_join (best_delta, *delta);
4981   return best_cost;
4982 }
4983
4984 /* Tries to extend the sets IVS in the best possible way in order
4985    to express the USE.  */
4986
4987 static bool
4988 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4989                   struct iv_use *use)
4990 {
4991   unsigned best_cost, act_cost;
4992   unsigned i;
4993   bitmap_iterator bi;
4994   struct iv_cand *cand;
4995   struct iv_ca_delta *best_delta = NULL, *act_delta;
4996   struct cost_pair *cp;
4997
4998   iv_ca_add_use (data, ivs, use);
4999   best_cost = iv_ca_cost (ivs);
5000
5001   cp = iv_ca_cand_for_use (ivs, use);
5002   if (cp)
5003     {
5004       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5005       iv_ca_set_no_cp (data, ivs, use);
5006     }
5007
5008   /* First try important candidates.  Only if it fails, try the specific ones.
5009      Rationale -- in loops with many variables the best choice often is to use
5010      just one generic biv.  If we added here many ivs specific to the uses,
5011      the optimization algorithm later would be likely to get stuck in a local
5012      minimum, thus causing us to create too many ivs.  The approach from
5013      few ivs to more seems more likely to be successful -- starting from few
5014      ivs, replacing an expensive use by a specific iv should always be a
5015      win.  */
5016   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5017     {
5018       cand = iv_cand (data, i);
5019
5020       if (iv_ca_cand_used_p (ivs, cand))
5021         continue;
5022
5023       cp = get_use_iv_cost (data, use, cand);
5024       if (!cp)
5025         continue;
5026
5027       iv_ca_set_cp (data, ivs, use, cp);
5028       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5029       iv_ca_set_no_cp (data, ivs, use);
5030       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5031
5032       if (act_cost < best_cost)
5033         {
5034           best_cost = act_cost;
5035
5036           iv_ca_delta_free (&best_delta);
5037           best_delta = act_delta;
5038         }
5039       else
5040         iv_ca_delta_free (&act_delta);
5041     }
5042
5043   if (best_cost == INFTY)
5044     {
5045       for (i = 0; i < use->n_map_members; i++)
5046         {
5047           cp = use->cost_map + i;
5048           cand = cp->cand;
5049           if (!cand)
5050             continue;
5051
5052           /* Already tried this.  */
5053           if (cand->important)
5054             continue;
5055
5056           if (iv_ca_cand_used_p (ivs, cand))
5057             continue;
5058
5059           act_delta = NULL;
5060           iv_ca_set_cp (data, ivs, use, cp);
5061           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5062           iv_ca_set_no_cp (data, ivs, use);
5063           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5064                                        cp, act_delta);
5065
5066           if (act_cost < best_cost)
5067             {
5068               best_cost = act_cost;
5069
5070               if (best_delta)
5071                 iv_ca_delta_free (&best_delta);
5072               best_delta = act_delta;
5073             }
5074           else
5075             iv_ca_delta_free (&act_delta);
5076         }
5077     }
5078
5079   iv_ca_delta_commit (data, ivs, best_delta, true);
5080   iv_ca_delta_free (&best_delta);
5081
5082   return (best_cost != INFTY);
5083 }
5084
5085 /* Finds an initial assignment of candidates to uses.  */
5086
5087 static struct iv_ca *
5088 get_initial_solution (struct ivopts_data *data)
5089 {
5090   struct iv_ca *ivs = iv_ca_new (data);
5091   unsigned i;
5092
5093   for (i = 0; i < n_iv_uses (data); i++)
5094     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5095       {
5096         iv_ca_free (&ivs);
5097         return NULL;
5098       }
5099
5100   return ivs;
5101 }
5102
5103 /* Tries to improve set of induction variables IVS.  */
5104
5105 static bool
5106 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5107 {
5108   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5109   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5110   struct iv_cand *cand;
5111
5112   /* Try extending the set of induction variables by one.  */
5113   for (i = 0; i < n_iv_cands (data); i++)
5114     {
5115       cand = iv_cand (data, i);
5116
5117       if (iv_ca_cand_used_p (ivs, cand))
5118         continue;
5119
5120       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5121       if (!act_delta)
5122         continue;
5123
5124       /* If we successfully added the candidate and the set is small enough,
5125          try optimizing it by removing other candidates.  */
5126       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5127         {
5128           iv_ca_delta_commit (data, ivs, act_delta, true);
5129           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5130           iv_ca_delta_commit (data, ivs, act_delta, false);
5131           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5132         }
5133
5134       if (acost < best_cost)
5135         {
5136           best_cost = acost;
5137           iv_ca_delta_free (&best_delta);
5138           best_delta = act_delta;
5139         }
5140       else
5141         iv_ca_delta_free (&act_delta);
5142     }
5143
5144   if (!best_delta)
5145     {
5146       /* Try removing the candidates from the set instead.  */
5147       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5148
5149       /* Nothing more we can do.  */
5150       if (!best_delta)
5151         return false;
5152     }
5153
5154   iv_ca_delta_commit (data, ivs, best_delta, true);
5155   gcc_assert (best_cost == iv_ca_cost (ivs));
5156   iv_ca_delta_free (&best_delta);
5157   return true;
5158 }
5159
5160 /* Attempts to find the optimal set of induction variables.  We do simple
5161    greedy heuristic -- we try to replace at most one candidate in the selected
5162    solution and remove the unused ivs while this improves the cost.  */
5163
5164 static struct iv_ca *
5165 find_optimal_iv_set (struct ivopts_data *data)
5166 {
5167   unsigned i;
5168   struct iv_ca *set;
5169   struct iv_use *use;
5170
5171   /* Get the initial solution.  */
5172   set = get_initial_solution (data);
5173   if (!set)
5174     {
5175       if (dump_file && (dump_flags & TDF_DETAILS))
5176         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5177       return NULL;
5178     }
5179
5180   if (dump_file && (dump_flags & TDF_DETAILS))
5181     {
5182       fprintf (dump_file, "Initial set of candidates:\n");
5183       iv_ca_dump (data, dump_file, set);
5184     }
5185
5186   while (try_improve_iv_set (data, set))
5187     {
5188       if (dump_file && (dump_flags & TDF_DETAILS))
5189         {
5190           fprintf (dump_file, "Improved to:\n");
5191           iv_ca_dump (data, dump_file, set);
5192         }
5193     }
5194
5195   if (dump_file && (dump_flags & TDF_DETAILS))
5196     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5197
5198   for (i = 0; i < n_iv_uses (data); i++)
5199     {
5200       use = iv_use (data, i);
5201       use->selected = iv_ca_cand_for_use (set, use)->cand;
5202     }
5203
5204   return set;
5205 }
5206
5207 /* Creates a new induction variable corresponding to CAND.  */
5208
5209 static void
5210 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5211 {
5212   block_stmt_iterator incr_pos;
5213   tree base;
5214   bool after = false;
5215
5216   if (!cand->iv)
5217     return;
5218
5219   switch (cand->pos)
5220     {
5221     case IP_NORMAL:
5222       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5223       break;
5224
5225     case IP_END:
5226       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5227       after = true;
5228       break;
5229
5230     case IP_ORIGINAL:
5231       /* Mark that the iv is preserved.  */
5232       name_info (data, cand->var_before)->preserve_biv = true;
5233       name_info (data, cand->var_after)->preserve_biv = true;
5234
5235       /* Rewrite the increment so that it uses var_before directly.  */
5236       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5237
5238       return;
5239     }
5240
5241   gimple_add_tmp_var (cand->var_before);
5242   add_referenced_var (cand->var_before);
5243
5244   base = unshare_expr (cand->iv->base);
5245
5246   create_iv (base, unshare_expr (cand->iv->step),
5247              cand->var_before, data->current_loop,
5248              &incr_pos, after, &cand->var_before, &cand->var_after);
5249 }
5250
5251 /* Creates new induction variables described in SET.  */
5252
5253 static void
5254 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5255 {
5256   unsigned i;
5257   struct iv_cand *cand;
5258   bitmap_iterator bi;
5259
5260   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5261     {
5262       cand = iv_cand (data, i);
5263       create_new_iv (data, cand);
5264     }
5265 }
5266
5267 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5268    is true, remove also the ssa name defined by the statement.  */
5269
5270 static void
5271 remove_statement (tree stmt, bool including_defined_name)
5272 {
5273   if (TREE_CODE (stmt) == PHI_NODE)
5274     {
5275       if (!including_defined_name)
5276         {
5277           /* Prevent the ssa name defined by the statement from being removed.  */
5278           SET_PHI_RESULT (stmt, NULL);
5279         }
5280       remove_phi_node (stmt, NULL_TREE);
5281     }
5282   else
5283     {
5284       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5285
5286       bsi_remove (&bsi, true);
5287     }
5288 }
5289
5290 /* Rewrites USE (definition of iv used in a nonlinear expression)
5291    using candidate CAND.  */
5292
5293 static void
5294 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5295                             struct iv_use *use, struct iv_cand *cand)
5296 {
5297   tree comp;
5298   tree op, stmts, tgt, ass;
5299   block_stmt_iterator bsi, pbsi;
5300
5301   /* An important special case -- if we are asked to express value of
5302      the original iv by itself, just exit; there is no need to
5303      introduce a new computation (that might also need casting the
5304      variable to unsigned and back).  */
5305   if (cand->pos == IP_ORIGINAL
5306       && cand->incremented_at == use->stmt)
5307     {
5308       tree step, ctype, utype;
5309       enum tree_code incr_code = PLUS_EXPR;
5310
5311       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5312       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5313
5314       step = cand->iv->step;
5315       ctype = TREE_TYPE (step);
5316       utype = TREE_TYPE (cand->var_after);
5317       if (TREE_CODE (step) == NEGATE_EXPR)
5318         {
5319           incr_code = MINUS_EXPR;
5320           step = TREE_OPERAND (step, 0);
5321         }
5322
5323       /* Check whether we may leave the computation unchanged.
5324          This is the case only if it does not rely on other
5325          computations in the loop -- otherwise, the computation
5326          we rely upon may be removed in remove_unused_ivs,
5327          thus leading to ICE.  */
5328       op = TREE_OPERAND (use->stmt, 1);
5329       if (TREE_CODE (op) == PLUS_EXPR
5330           || TREE_CODE (op) == MINUS_EXPR)
5331         {
5332           if (TREE_OPERAND (op, 0) == cand->var_before)
5333             op = TREE_OPERAND (op, 1);
5334           else if (TREE_CODE (op) == PLUS_EXPR
5335                    && TREE_OPERAND (op, 1) == cand->var_before)
5336             op = TREE_OPERAND (op, 0);
5337           else
5338             op = NULL_TREE;
5339         }
5340       else
5341         op = NULL_TREE;
5342
5343       if (op
5344           && (TREE_CODE (op) == INTEGER_CST
5345               || operand_equal_p (op, step, 0)))
5346         return;
5347
5348       /* Otherwise, add the necessary computations to express
5349          the iv.  */
5350       op = fold_convert (ctype, cand->var_before);
5351       comp = fold_convert (utype,
5352                            build2 (incr_code, ctype, op,
5353                                    unshare_expr (step)));
5354     }
5355   else
5356     comp = get_computation (data->current_loop, use, cand);
5357
5358   switch (TREE_CODE (use->stmt))
5359     {
5360     case PHI_NODE:
5361       tgt = PHI_RESULT (use->stmt);
5362
5363       /* If we should keep the biv, do not replace it.  */
5364       if (name_info (data, tgt)->preserve_biv)
5365         return;
5366
5367       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5368       while (!bsi_end_p (pbsi)
5369              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5370         {
5371           bsi = pbsi;
5372           bsi_next (&pbsi);
5373         }
5374       break;
5375
5376     case MODIFY_EXPR:
5377       tgt = TREE_OPERAND (use->stmt, 0);
5378       bsi = bsi_for_stmt (use->stmt);
5379       break;
5380
5381     default:
5382       gcc_unreachable ();
5383     }
5384
5385   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5386
5387   if (TREE_CODE (use->stmt) == PHI_NODE)
5388     {
5389       if (stmts)
5390         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5391       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5392       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5393       remove_statement (use->stmt, false);
5394       SSA_NAME_DEF_STMT (tgt) = ass;
5395     }
5396   else
5397     {
5398       if (stmts)
5399         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5400       TREE_OPERAND (use->stmt, 1) = op;
5401     }
5402 }
5403
5404 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5405    for_each_index.  */
5406
5407 static bool
5408 idx_remove_ssa_names (tree base, tree *idx,
5409                       void *data ATTRIBUTE_UNUSED)
5410 {
5411   tree *op;
5412
5413   if (TREE_CODE (*idx) == SSA_NAME)
5414     *idx = SSA_NAME_VAR (*idx);
5415
5416   if (TREE_CODE (base) == ARRAY_REF)
5417     {
5418       op = &TREE_OPERAND (base, 2);
5419       if (*op
5420           && TREE_CODE (*op) == SSA_NAME)
5421         *op = SSA_NAME_VAR (*op);
5422       op = &TREE_OPERAND (base, 3);
5423       if (*op
5424           && TREE_CODE (*op) == SSA_NAME)
5425         *op = SSA_NAME_VAR (*op);
5426     }
5427
5428   return true;
5429 }
5430
5431 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5432
5433 static tree
5434 unshare_and_remove_ssa_names (tree ref)
5435 {
5436   ref = unshare_expr (ref);
5437   for_each_index (&ref, idx_remove_ssa_names, NULL);
5438
5439   return ref;
5440 }
5441
5442 /* Extract the alias analysis info for the memory reference REF.  There are
5443    several ways how this information may be stored and what precisely is
5444    its semantics depending on the type of the reference, but there always is
5445    somewhere hidden one _DECL node that is used to determine the set of
5446    virtual operands for the reference.  The code below deciphers this jungle
5447    and extracts this single useful piece of information.  */
5448
5449 static tree
5450 get_ref_tag (tree ref, tree orig)
5451 {
5452   tree var = get_base_address (ref);
5453   tree aref = NULL_TREE, tag, sv;
5454   HOST_WIDE_INT offset, size, maxsize;
5455
5456   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5457     {
5458       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5459       if (ref)
5460         break;
5461     }
5462
5463   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5464     return unshare_expr (sv);
5465
5466   if (!var)
5467     return NULL_TREE;
5468
5469   if (TREE_CODE (var) == INDIRECT_REF)
5470     {
5471       /* If the base is a dereference of a pointer, first check its name memory
5472          tag.  If it does not have one, use its symbol memory tag.  */
5473       var = TREE_OPERAND (var, 0);
5474       if (TREE_CODE (var) != SSA_NAME)
5475         return NULL_TREE;
5476
5477       if (SSA_NAME_PTR_INFO (var))
5478         {
5479           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5480           if (tag)
5481             return tag;
5482         }
5483
5484       var = SSA_NAME_VAR (var);
5485       tag = var_ann (var)->symbol_mem_tag;
5486       gcc_assert (tag != NULL_TREE);
5487       return tag;
5488     }
5489   else
5490     {
5491       if (!DECL_P (var))
5492         return NULL_TREE;
5493
5494       tag = var_ann (var)->symbol_mem_tag;
5495       if (tag)
5496         return tag;
5497
5498       return var;
5499     }
5500 }
5501
5502 /* Copies the reference information from OLD_REF to NEW_REF.  */
5503
5504 static void
5505 copy_ref_info (tree new_ref, tree old_ref)
5506 {
5507   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5508     copy_mem_ref_info (new_ref, old_ref);
5509   else
5510     {
5511       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5512       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5513     }
5514 }
5515
5516 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5517
5518 static void
5519 rewrite_use_address (struct ivopts_data *data,
5520                      struct iv_use *use, struct iv_cand *cand)
5521 {
5522   struct affine_tree_combination aff;
5523   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5524   tree ref;
5525
5526   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5527   unshare_aff_combination (&aff);
5528
5529   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5530   copy_ref_info (ref, *use->op_p);
5531   *use->op_p = ref;
5532 }
5533
5534 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5535    candidate CAND.  */
5536
5537 static void
5538 rewrite_use_compare (struct ivopts_data *data,
5539                      struct iv_use *use, struct iv_cand *cand)
5540 {
5541   tree comp;
5542   tree *op_p, cond, op, stmts, bound;
5543   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5544   enum tree_code compare;
5545   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5546
5547   bound = cp->value;
5548   if (bound)
5549     {
5550       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5551       tree var_type = TREE_TYPE (var);
5552
5553       compare = iv_elimination_compare (data, use);
5554       bound = fold_convert (var_type, bound);
5555       op = force_gimple_operand (unshare_expr (bound), &stmts,
5556                                  true, NULL_TREE);
5557
5558       if (stmts)
5559         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5560
5561       *use->op_p = build2 (compare, boolean_type_node, var, op);
5562       update_stmt (use->stmt);
5563       return;
5564     }
5565
5566   /* The induction variable elimination failed; just express the original
5567      giv.  */
5568   comp = get_computation (data->current_loop, use, cand);
5569
5570   cond = *use->op_p;
5571   op_p = &TREE_OPERAND (cond, 0);
5572   if (TREE_CODE (*op_p) != SSA_NAME
5573       || zero_p (get_iv (data, *op_p)->step))
5574     op_p = &TREE_OPERAND (cond, 1);
5575
5576   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5577   if (stmts)
5578     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5579
5580   *op_p = op;
5581 }
5582
5583 /* Rewrites USE using candidate CAND.  */
5584
5585 static void
5586 rewrite_use (struct ivopts_data *data,
5587              struct iv_use *use, struct iv_cand *cand)
5588 {
5589   switch (use->type)
5590     {
5591       case USE_NONLINEAR_EXPR:
5592         rewrite_use_nonlinear_expr (data, use, cand);
5593         break;
5594
5595       case USE_ADDRESS:
5596         rewrite_use_address (data, use, cand);
5597         break;
5598
5599       case USE_COMPARE:
5600         rewrite_use_compare (data, use, cand);
5601         break;
5602
5603       default:
5604         gcc_unreachable ();
5605     }
5606   mark_new_vars_to_rename (use->stmt);
5607 }
5608
5609 /* Rewrite the uses using the selected induction variables.  */
5610
5611 static void
5612 rewrite_uses (struct ivopts_data *data)
5613 {
5614   unsigned i;
5615   struct iv_cand *cand;
5616   struct iv_use *use;
5617
5618   for (i = 0; i < n_iv_uses (data); i++)
5619     {
5620       use = iv_use (data, i);
5621       cand = use->selected;
5622       gcc_assert (cand);
5623
5624       rewrite_use (data, use, cand);
5625     }
5626 }
5627
5628 /* Removes the ivs that are not used after rewriting.  */
5629
5630 static void
5631 remove_unused_ivs (struct ivopts_data *data)
5632 {
5633   unsigned j;
5634   bitmap_iterator bi;
5635
5636   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5637     {
5638       struct version_info *info;
5639
5640       info = ver_info (data, j);
5641       if (info->iv
5642           && !zero_p (info->iv->step)
5643           && !info->inv_id
5644           && !info->iv->have_use_for
5645           && !info->preserve_biv)
5646         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5647     }
5648 }
5649
5650 /* Frees data allocated by the optimization of a single loop.  */
5651
5652 static void
5653 free_loop_data (struct ivopts_data *data)
5654 {
5655   unsigned i, j;
5656   bitmap_iterator bi;
5657   tree obj;
5658
5659   htab_empty (data->niters);
5660
5661   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5662     {
5663       struct version_info *info;
5664
5665       info = ver_info (data, i);
5666       if (info->iv)
5667         free (info->iv);
5668       info->iv = NULL;
5669       info->has_nonlin_use = false;
5670       info->preserve_biv = false;
5671       info->inv_id = 0;
5672     }
5673   bitmap_clear (data->relevant);
5674   bitmap_clear (data->important_candidates);
5675
5676   for (i = 0; i < n_iv_uses (data); i++)
5677     {
5678       struct iv_use *use = iv_use (data, i);
5679
5680       free (use->iv);
5681       BITMAP_FREE (use->related_cands);
5682       for (j = 0; j < use->n_map_members; j++)
5683         if (use->cost_map[j].depends_on)
5684           BITMAP_FREE (use->cost_map[j].depends_on);
5685       free (use->cost_map);
5686       free (use);
5687     }
5688   VEC_truncate (iv_use_p, data->iv_uses, 0);
5689
5690   for (i = 0; i < n_iv_cands (data); i++)
5691     {
5692       struct iv_cand *cand = iv_cand (data, i);
5693
5694       if (cand->iv)
5695         free (cand->iv);
5696       if (cand->depends_on)
5697         BITMAP_FREE (cand->depends_on);
5698       free (cand);
5699     }
5700   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5701
5702   if (data->version_info_size < num_ssa_names)
5703     {
5704       data->version_info_size = 2 * num_ssa_names;
5705       free (data->version_info);
5706       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5707     }
5708
5709   data->max_inv_id = 0;
5710
5711   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5712     SET_DECL_RTL (obj, NULL_RTX);
5713
5714   VEC_truncate (tree, decl_rtl_to_reset, 0);
5715 }
5716
5717 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5718    loop tree.  */
5719
5720 static void
5721 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5722 {
5723   free_loop_data (data);
5724   free (data->version_info);
5725   BITMAP_FREE (data->relevant);
5726   BITMAP_FREE (data->important_candidates);
5727   htab_delete (data->niters);
5728
5729   VEC_free (tree, heap, decl_rtl_to_reset);
5730   VEC_free (iv_use_p, heap, data->iv_uses);
5731   VEC_free (iv_cand_p, heap, data->iv_candidates);
5732 }
5733
5734 /* Optimizes the LOOP.  Returns true if anything changed.  */
5735
5736 static bool
5737 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5738 {
5739   bool changed = false;
5740   struct iv_ca *iv_ca;
5741   edge exit;
5742
5743   data->current_loop = loop;
5744
5745   if (dump_file && (dump_flags & TDF_DETAILS))
5746     {
5747       fprintf (dump_file, "Processing loop %d\n", loop->num);
5748
5749       exit = single_dom_exit (loop);
5750       if (exit)
5751         {
5752           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5753                    exit->src->index, exit->dest->index);
5754           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5755           fprintf (dump_file, "\n");
5756         }
5757
5758       fprintf (dump_file, "\n");
5759     }
5760
5761   /* For each ssa name determines whether it behaves as an induction variable
5762      in some loop.  */
5763   if (!find_induction_variables (data))
5764     goto finish;
5765
5766   /* Finds interesting uses (item 1).  */
5767   find_interesting_uses (data);
5768   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5769     goto finish;
5770
5771   /* Finds candidates for the induction variables (item 2).  */
5772   find_iv_candidates (data);
5773
5774   /* Calculates the costs (item 3, part 1).  */
5775   determine_use_iv_costs (data);
5776   determine_iv_costs (data);
5777   determine_set_costs (data);
5778
5779   /* Find the optimal set of induction variables (item 3, part 2).  */
5780   iv_ca = find_optimal_iv_set (data);
5781   if (!iv_ca)
5782     goto finish;
5783   changed = true;
5784
5785   /* Create the new induction variables (item 4, part 1).  */
5786   create_new_ivs (data, iv_ca);
5787   iv_ca_free (&iv_ca);
5788
5789   /* Rewrite the uses (item 4, part 2).  */
5790   rewrite_uses (data);
5791
5792   /* Remove the ivs that are unused after rewriting.  */
5793   remove_unused_ivs (data);
5794
5795   /* We have changed the structure of induction variables; it might happen
5796      that definitions in the scev database refer to some of them that were
5797      eliminated.  */
5798   scev_reset ();
5799
5800 finish:
5801   free_loop_data (data);
5802
5803   return changed;
5804 }
5805
5806 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5807
5808 void
5809 tree_ssa_iv_optimize (struct loops *loops)
5810 {
5811   struct loop *loop;
5812   struct ivopts_data data;
5813
5814   tree_ssa_iv_optimize_init (&data);
5815
5816   /* Optimize the loops starting with the innermost ones.  */
5817   loop = loops->tree_root;
5818   while (loop->inner)
5819     loop = loop->inner;
5820
5821   /* Scan the loops, inner ones first.  */
5822   while (loop != loops->tree_root)
5823     {
5824       if (dump_file && (dump_flags & TDF_DETAILS))
5825         flow_loop_dump (loop, dump_file, NULL, 1);
5826
5827       tree_ssa_iv_optimize_loop (&data, loop);
5828
5829       if (loop->next)
5830         {
5831           loop = loop->next;
5832           while (loop->inner)
5833             loop = loop->inner;
5834         }
5835       else
5836         loop = loop->outer;
5837     }
5838
5839   tree_ssa_iv_optimize_finalize (&data);
5840 }