gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Types of uses.  */
 125 enum use_type
 126 {
 127   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 128   USE_ADDRESS,          /* Use in an address.  */
 129   USE_COMPARE           /* Use is a compare.  */
 130 };
 131
 132 /* The candidate - cost pair.  */
 133 struct cost_pair
 134 {
 135   struct iv_cand *cand; /* The candidate.  */
 136   unsigned cost;        /* The cost.  */
 137   bitmap depends_on;    /* The list of invariants that have to be
 138                            preserved.  */
 139   tree value;           /* For final value elimination, the expression for
 140                            the final value of the iv.  For iv elimination,
 141                            the new bound to compare with.  */
 142 };
 143
 144 /* Use.  */
 145 struct iv_use
 146 {
 147   unsigned id;          /* The id of the use.  */
 148   enum use_type type;   /* Type of the use.  */
 149   struct iv *iv;        /* The induction variable it is based on.  */
 150   tree stmt;            /* Statement in that it occurs.  */
 151   tree *op_p;           /* The place where it occurs.  */
 152   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 153                            important ones.  */
 154
 155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 156   struct cost_pair *cost_map;
 157                         /* The costs wrto the iv candidates.  */
 158
 159   struct iv_cand *selected;
 160                         /* The selected candidate.  */
 161 };
 162
 163 /* The position where the iv is computed.  */
 164 enum iv_position
 165 {
 166   IP_NORMAL,            /* At the end, just before the exit condition.  */
 167   IP_END,               /* At the end of the latch block.  */
 168   IP_ORIGINAL           /* The original biv.  */
 169 };
 170
 171 /* The induction variable candidate.  */
 172 struct iv_cand
 173 {
 174   unsigned id;          /* The number of the candidate.  */
 175   bool important;       /* Whether this is an "important" candidate, i.e. such
 176                            that it should be considered by all uses.  */
 177   enum iv_position pos; /* Where it is computed.  */
 178   tree incremented_at;  /* For original biv, the statement where it is
 179                            incremented.  */
 180   tree var_before;      /* The variable used for it before increment.  */
 181   tree var_after;       /* The variable used for it after increment.  */
 182   struct iv *iv;        /* The value of the candidate.  NULL for
 183                            "pseudocandidate" used to indicate the possibility
 184                            to replace the final value of an iv by direct
 185                            computation of the value.  */
 186   unsigned cost;        /* Cost of the candidate.  */
 187   bitmap depends_on;    /* The list of invariants that are used in step of the
 188                            biv.  */
 189 };
 190
 191 /* The data used by the induction variable optimizations.  */
 192
 193 typedef struct iv_use *iv_use_p;
 194 DEF_VEC_P(iv_use_p);
 195 DEF_VEC_ALLOC_P(iv_use_p,heap);
 196
 197 typedef struct iv_cand *iv_cand_p;
 198 DEF_VEC_P(iv_cand_p);
 199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 200
 201 struct ivopts_data
 202 {
 203   /* The currently optimized loop.  */
 204   struct loop *current_loop;
 205
 206   /* Number of registers used in it.  */
 207   unsigned regs_used;
 208
 209   /* Numbers of iterations for all exits of the current loop.  */
 210   htab_t niters;
 211
 212   /* The size of version_info array allocated.  */
 213   unsigned version_info_size;
 214
 215   /* The array of information for the ssa names.  */
 216   struct version_info *version_info;
 217
 218   /* The bitmap of indices in version_info whose value was changed.  */
 219   bitmap relevant;
 220
 221   /* The maximum invariant id.  */
 222   unsigned max_inv_id;
 223
 224   /* The uses of induction variables.  */
 225   VEC(iv_use_p,heap) *iv_uses;
 226
 227   /* The candidates.  */
 228   VEC(iv_cand_p,heap) *iv_candidates;
 229
 230   /* A bitmap of important candidates.  */
 231   bitmap important_candidates;
 232
 233   /* Whether to consider just related and important candidates when replacing a
 234      use.  */
 235   bool consider_all_candidates;
 236 };
 237
 238 /* An assignment of iv candidates to uses.  */
 239
 240 struct iv_ca
 241 {
 242   /* The number of uses covered by the assignment.  */
 243   unsigned upto;
 244
 245   /* Number of uses that cannot be expressed by the candidates in the set.  */
 246   unsigned bad_uses;
 247
 248   /* Candidate assigned to a use, together with the related costs.  */
 249   struct cost_pair **cand_for_use;
 250
 251   /* Number of times each candidate is used.  */
 252   unsigned *n_cand_uses;
 253
 254   /* The candidates used.  */
 255   bitmap cands;
 256
 257   /* The number of candidates in the set.  */
 258   unsigned n_cands;
 259
 260   /* Total number of registers needed.  */
 261   unsigned n_regs;
 262
 263   /* Total cost of expressing uses.  */
 264   unsigned cand_use_cost;
 265
 266   /* Total cost of candidates.  */
 267   unsigned cand_cost;
 268
 269   /* Number of times each invariant is used.  */
 270   unsigned *n_invariant_uses;
 271
 272   /* Total cost of the assignment.  */
 273   unsigned cost;
 274 };
 275
 276 /* Difference of two iv candidate assignments.  */
 277
 278 struct iv_ca_delta
 279 {
 280   /* Changed use.  */
 281   struct iv_use *use;
 282
 283   /* An old assignment (for rollback purposes).  */
 284   struct cost_pair *old_cp;
 285
 286   /* A new assignment.  */
 287   struct cost_pair *new_cp;
 288
 289   /* Next change in the list.  */
 290   struct iv_ca_delta *next_change;
 291 };
 292
 293 /* Bound on number of candidates below that all candidates are considered.  */
 294
 295 #define CONSIDER_ALL_CANDIDATES_BOUND \
 296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 297
 298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 299    optimizing such a loop would help, and it would take ages).  */
 300
 301 #define MAX_CONSIDERED_USES \
 302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 303
 304 /* If there are at most this number of ivs in the set, try removing unnecessary
 305    ivs from the set always.  */
 306
 307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 309
 310 /* The list of trees for that the decl_rtl field must be reset is stored
 311    here.  */
 312
 313 static VEC(tree,heap) *decl_rtl_to_reset;
 314
 315 /* Number of uses recorded in DATA.  */
 316
 317 static inline unsigned
 318 n_iv_uses (struct ivopts_data *data)
 319 {
 320   return VEC_length (iv_use_p, data->iv_uses);
 321 }
 322
 323 /* Ith use recorded in DATA.  */
 324
 325 static inline struct iv_use *
 326 iv_use (struct ivopts_data *data, unsigned i)
 327 {
 328   return VEC_index (iv_use_p, data->iv_uses, i);
 329 }
 330
 331 /* Number of candidates recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_cands (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_cand_p, data->iv_candidates);
 337 }
 338
 339 /* Ith candidate recorded in DATA.  */
 340
 341 static inline struct iv_cand *
 342 iv_cand (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_cand_p, data->iv_candidates, i);
 345 }
 346
 347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 348
 349 edge
 350 single_dom_exit (struct loop *loop)
 351 {
 352   edge exit = loop->single_exit;
 353
 354   if (!exit)
 355     return NULL;
 356
 357   if (!just_once_each_iteration_p (loop, exit->src))
 358     return NULL;
 359
 360   return exit;
 361 }
 362
 363 /* Dumps information about the induction variable IV to FILE.  */
 364
 365 extern void dump_iv (FILE *, struct iv *);
 366 void
 367 dump_iv (FILE *file, struct iv *iv)
 368 {
 369   if (iv->ssa_name)
 370     {
 371       fprintf (file, "ssa name ");
 372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 373       fprintf (file, "\n");
 374     }
 375
 376   fprintf (file, "  type ");
 377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 378   fprintf (file, "\n");
 379
 380   if (iv->step)
 381     {
 382       fprintf (file, "  base ");
 383       print_generic_expr (file, iv->base, TDF_SLIM);
 384       fprintf (file, "\n");
 385
 386       fprintf (file, "  step ");
 387       print_generic_expr (file, iv->step, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390   else
 391     {
 392       fprintf (file, "  invariant ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395     }
 396
 397   if (iv->base_object)
 398     {
 399       fprintf (file, "  base object ");
 400       print_generic_expr (file, iv->base_object, TDF_SLIM);
 401       fprintf (file, "\n");
 402     }
 403
 404   if (iv->biv_p)
 405     fprintf (file, "  is a biv\n");
 406 }
 407
 408 /* Dumps information about the USE to FILE.  */
 409
 410 extern void dump_use (FILE *, struct iv_use *);
 411 void
 412 dump_use (FILE *file, struct iv_use *use)
 413 {
 414   fprintf (file, "use %d\n", use->id);
 415
 416   switch (use->type)
 417     {
 418     case USE_NONLINEAR_EXPR:
 419       fprintf (file, "  generic\n");
 420       break;
 421
 422     case USE_ADDRESS:
 423       fprintf (file, "  address\n");
 424       break;
 425
 426     case USE_COMPARE:
 427       fprintf (file, "  compare\n");
 428       break;
 429
 430     default:
 431       gcc_unreachable ();
 432     }
 433
 434   fprintf (file, "  in statement ");
 435   print_generic_expr (file, use->stmt, TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   fprintf (file, "  at position ");
 439   if (use->op_p)
 440     print_generic_expr (file, *use->op_p, TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   dump_iv (file, use->iv);
 444
 445   if (use->related_cands)
 446     {
 447       fprintf (file, "  related candidates ");
 448       dump_bitmap (file, use->related_cands);
 449     }
 450 }
 451
 452 /* Dumps information about the uses to FILE.  */
 453
 454 extern void dump_uses (FILE *, struct ivopts_data *);
 455 void
 456 dump_uses (FILE *file, struct ivopts_data *data)
 457 {
 458   unsigned i;
 459   struct iv_use *use;
 460
 461   for (i = 0; i < n_iv_uses (data); i++)
 462     {
 463       use = iv_use (data, i);
 464
 465       dump_use (file, use);
 466       fprintf (file, "\n");
 467     }
 468 }
 469
 470 /* Dumps information about induction variable candidate CAND to FILE.  */
 471
 472 extern void dump_cand (FILE *, struct iv_cand *);
 473 void
 474 dump_cand (FILE *file, struct iv_cand *cand)
 475 {
 476   struct iv *iv = cand->iv;
 477
 478   fprintf (file, "candidate %d%s\n",
 479            cand->id, cand->important ? " (important)" : "");
 480
 481   if (cand->depends_on)
 482     {
 483       fprintf (file, "  depends on ");
 484       dump_bitmap (file, cand->depends_on);
 485     }
 486
 487   if (!iv)
 488     {
 489       fprintf (file, "  final value replacement\n");
 490       return;
 491     }
 492
 493   switch (cand->pos)
 494     {
 495     case IP_NORMAL:
 496       fprintf (file, "  incremented before exit test\n");
 497       break;
 498
 499     case IP_END:
 500       fprintf (file, "  incremented at end\n");
 501       break;
 502
 503     case IP_ORIGINAL:
 504       fprintf (file, "  original biv\n");
 505       break;
 506     }
 507
 508   dump_iv (file, iv);
 509 }
 510
 511 /* Returns the info for ssa version VER.  */
 512
 513 static inline struct version_info *
 514 ver_info (struct ivopts_data *data, unsigned ver)
 515 {
 516   return data->version_info + ver;
 517 }
 518
 519 /* Returns the info for ssa name NAME.  */
 520
 521 static inline struct version_info *
 522 name_info (struct ivopts_data *data, tree name)
 523 {
 524   return ver_info (data, SSA_NAME_VERSION (name));
 525 }
 526
 527 /* Checks whether there exists number X such that X * B = A, counting modulo
 528    2^BITS.  */
 529
 530 static bool
 531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 532         HOST_WIDE_INT *x)
 533 {
 534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 535   unsigned HOST_WIDE_INT inv, ex, val;
 536   unsigned i;
 537
 538   a &= mask;
 539   b &= mask;
 540
 541   /* First divide the whole equation by 2 as long as possible.  */
 542   while (!(a & 1) && !(b & 1))
 543     {
 544       a >>= 1;
 545       b >>= 1;
 546       bits--;
 547       mask >>= 1;
 548     }
 549
 550   if (!(b & 1))
 551     {
 552       /* If b is still even, a is odd and there is no such x.  */
 553       return false;
 554     }
 555
 556   /* Find the inverse of b.  We compute it as
 557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 558   inv = 1;
 559   ex = b;
 560   for (i = 0; i < bits - 1; i++)
 561     {
 562       inv = (inv * ex) & mask;
 563       ex = (ex * ex) & mask;
 564     }
 565
 566   val = (a * inv) & mask;
 567
 568   gcc_assert (((val * b) & mask) == a);
 569
 570   if ((val >> (bits - 1)) & 1)
 571     val |= ~mask;
 572
 573   *x = val;
 574
 575   return true;
 576 }
 577
 578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 579    emitted in LOOP.  */
 580
 581 static bool
 582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 583 {
 584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 585
 586   gcc_assert (bb);
 587
 588   if (sbb == loop->latch)
 589     return true;
 590
 591   if (sbb != bb)
 592     return false;
 593
 594   return stmt == last_stmt (bb);
 595 }
 596
 597 /* Returns true if STMT if after the place where the original induction
 598    variable CAND is incremented.  */
 599
 600 static bool
 601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 602 {
 603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 604   basic_block stmt_bb = bb_for_stmt (stmt);
 605   block_stmt_iterator bsi;
 606
 607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 608     return false;
 609
 610   if (stmt_bb != cand_bb)
 611     return true;
 612
 613   /* Scan the block from the end, since the original ivs are usually
 614      incremented at the end of the loop body.  */
 615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 616     {
 617       if (bsi_stmt (bsi) == cand->incremented_at)
 618         return false;
 619       if (bsi_stmt (bsi) == stmt)
 620         return true;
 621     }
 622 }
 623
 624 /* Returns true if STMT if after the place where the induction variable
 625    CAND is incremented in LOOP.  */
 626
 627 static bool
 628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 629 {
 630   switch (cand->pos)
 631     {
 632     case IP_END:
 633       return false;
 634
 635     case IP_NORMAL:
 636       return stmt_after_ip_normal_pos (loop, stmt);
 637
 638     case IP_ORIGINAL:
 639       return stmt_after_ip_original_pos (cand, stmt);
 640
 641     default:
 642       gcc_unreachable ();
 643     }
 644 }
 645
 646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 647
 648 static bool
 649 abnormal_ssa_name_p (tree exp)
 650 {
 651   if (!exp)
 652     return false;
 653
 654   if (TREE_CODE (exp) != SSA_NAME)
 655     return false;
 656
 657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 658 }
 659
 660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 661    abnormal phi node.  Callback for for_each_index.  */
 662
 663 static bool
 664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 665                                   void *data ATTRIBUTE_UNUSED)
 666 {
 667   if (TREE_CODE (base) == ARRAY_REF)
 668     {
 669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 670         return false;
 671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 672         return false;
 673     }
 674
 675   return !abnormal_ssa_name_p (*index);
 676 }
 677
 678 /* Returns true if EXPR contains a ssa name that occurs in an
 679    abnormal phi node.  */
 680
 681 static bool
 682 contains_abnormal_ssa_name_p (tree expr)
 683 {
 684   enum tree_code code;
 685   enum tree_code_class class;
 686
 687   if (!expr)
 688     return false;
 689
 690   code = TREE_CODE (expr);
 691   class = TREE_CODE_CLASS (code);
 692
 693   if (code == SSA_NAME)
 694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 695
 696   if (code == INTEGER_CST
 697       || is_gimple_min_invariant (expr))
 698     return false;
 699
 700   if (code == ADDR_EXPR)
 701     return !for_each_index (&TREE_OPERAND (expr, 0),
 702                             idx_contains_abnormal_ssa_name_p,
 703                             NULL);
 704
 705   switch (class)
 706     {
 707     case tcc_binary:
 708     case tcc_comparison:
 709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 710         return true;
 711
 712       /* Fallthru.  */
 713     case tcc_unary:
 714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 715         return true;
 716
 717       break;
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722
 723   return false;
 724 }
 725
 726 /* Element of the table in that we cache the numbers of iterations obtained
 727    from exits of the loop.  */
 728
 729 struct nfe_cache_elt
 730 {
 731   /* The edge for that the number of iterations is cached.  */
 732   edge exit;
 733
 734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 735      determined.  */
 736   tree niter;
 737 };
 738
 739 /* Hash function for nfe_cache_elt E.  */
 740
 741 static hashval_t
 742 nfe_hash (const void *e)
 743 {
 744   const struct nfe_cache_elt *elt = e;
 745
 746   return htab_hash_pointer (elt->exit);
 747 }
 748
 749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 750
 751 static int
 752 nfe_eq (const void *e1, const void *e2)
 753 {
 754   const struct nfe_cache_elt *elt1 = e1;
 755
 756   return elt1->exit == e2;
 757 }
 758
 759 /*  Returns tree describing number of iterations determined from
 760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 761
 762 static tree
 763 niter_for_exit (struct ivopts_data *data, edge exit)
 764 {
 765   struct nfe_cache_elt *nfe_desc;
 766   struct tree_niter_desc desc;
 767   PTR *slot;
 768
 769   slot = htab_find_slot_with_hash (data->niters, exit,
 770                                    htab_hash_pointer (exit),
 771                                    INSERT);
 772
 773   if (!*slot)
 774     {
 775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 776       nfe_desc->exit = exit;
 777
 778       /* Try to determine number of iterations.  We must know it
 779          unconditionally (i.e., without possibility of # of iterations
 780          being zero).  Also, we cannot safely work with ssa names that
 781          appear in phi nodes on abnormal edges, so that we do not create
 782          overlapping life ranges for them (PR 27283).  */
 783       if (number_of_iterations_exit (data->current_loop,
 784                                      exit, &desc, true)
 785           && zero_p (desc.may_be_zero)
 786           && !contains_abnormal_ssa_name_p (desc.niter))
 787         nfe_desc->niter = desc.niter;
 788       else
 789         nfe_desc->niter = NULL_TREE;
 790     }
 791   else
 792     nfe_desc = *slot;
 793
 794   return nfe_desc->niter;
 795 }
 796
 797 /* Returns tree describing number of iterations determined from
 798    single dominating exit of DATA->current_loop, or NULL if something
 799    goes wrong.  */
 800
 801 static tree
 802 niter_for_single_dom_exit (struct ivopts_data *data)
 803 {
 804   edge exit = single_dom_exit (data->current_loop);
 805
 806   if (!exit)
 807     return NULL;
 808
 809   return niter_for_exit (data, exit);
 810 }
 811
 812 /* Initializes data structures used by the iv optimization pass, stored
 813    in DATA.  */
 814
 815 static void
 816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 817 {
 818   data->version_info_size = 2 * num_ssa_names;
 819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 820   data->relevant = BITMAP_ALLOC (NULL);
 821   data->important_candidates = BITMAP_ALLOC (NULL);
 822   data->max_inv_id = 0;
 823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 827 }
 828
 829 /* Returns a memory object to that EXPR points.  In case we are able to
 830    determine that it does not point to any such object, NULL is returned.  */
 831
 832 static tree
 833 determine_base_object (tree expr)
 834 {
 835   enum tree_code code = TREE_CODE (expr);
 836   tree base, obj, op0, op1;
 837
 838   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 839     return NULL_TREE;
 840
 841   switch (code)
 842     {
 843     case INTEGER_CST:
 844       return NULL_TREE;
 845
 846     case ADDR_EXPR:
 847       obj = TREE_OPERAND (expr, 0);
 848       base = get_base_address (obj);
 849
 850       if (!base)
 851         return expr;
 852
 853       if (TREE_CODE (base) == INDIRECT_REF)
 854         return determine_base_object (TREE_OPERAND (base, 0));
 855
 856       return fold_convert (ptr_type_node,
 857                            build_fold_addr_expr (base));
 858
 859     case PLUS_EXPR:
 860     case MINUS_EXPR:
 861       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 862       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 863
 864       if (!op1)
 865         return op0;
 866
 867       if (!op0)
 868         return (code == PLUS_EXPR
 869                 ? op1
 870                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 871
 872       return fold_build2 (code, ptr_type_node, op0, op1);
 873
 874     case NOP_EXPR:
 875     case CONVERT_EXPR:
 876       return determine_base_object (TREE_OPERAND (expr, 0));
 877
 878     default:
 879       return fold_convert (ptr_type_node, expr);
 880     }
 881 }
 882
 883 /* Allocates an induction variable with given initial value BASE and step STEP
 884    for loop LOOP.  */
 885
 886 static struct iv *
 887 alloc_iv (tree base, tree step)
 888 {
 889   struct iv *iv = XCNEW (struct iv);
 890
 891   if (step && integer_zerop (step))
 892     step = NULL_TREE;
 893
 894   iv->base = base;
 895   iv->base_object = determine_base_object (base);
 896   iv->step = step;
 897   iv->biv_p = false;
 898   iv->have_use_for = false;
 899   iv->use_id = 0;
 900   iv->ssa_name = NULL_TREE;
 901
 902   return iv;
 903 }
 904
 905 /* Sets STEP and BASE for induction variable IV.  */
 906
 907 static void
 908 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 909 {
 910   struct version_info *info = name_info (data, iv);
 911
 912   gcc_assert (!info->iv);
 913
 914   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 915   info->iv = alloc_iv (base, step);
 916   info->iv->ssa_name = iv;
 917 }
 918
 919 /* Finds induction variable declaration for VAR.  */
 920
 921 static struct iv *
 922 get_iv (struct ivopts_data *data, tree var)
 923 {
 924   basic_block bb;
 925
 926   if (!name_info (data, var)->iv)
 927     {
 928       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 929
 930       if (!bb
 931           || !flow_bb_inside_loop_p (data->current_loop, bb))
 932         set_iv (data, var, var, NULL_TREE);
 933     }
 934
 935   return name_info (data, var)->iv;
 936 }
 937
 938 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 939    not define a simple affine biv with nonzero step.  */
 940
 941 static tree
 942 determine_biv_step (tree phi)
 943 {
 944   struct loop *loop = bb_for_stmt (phi)->loop_father;
 945   tree name = PHI_RESULT (phi);
 946   affine_iv iv;
 947
 948   if (!is_gimple_reg (name))
 949     return NULL_TREE;
 950
 951   if (!simple_iv (loop, phi, name, &iv, true))
 952     return NULL_TREE;
 953
 954   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 955 }
 956
 957 /* Finds basic ivs.  */
 958
 959 static bool
 960 find_bivs (struct ivopts_data *data)
 961 {
 962   tree phi, step, type, base;
 963   bool found = false;
 964   struct loop *loop = data->current_loop;
 965
 966   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 967     {
 968       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 969         continue;
 970
 971       step = determine_biv_step (phi);
 972       if (!step)
 973         continue;
 974
 975       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 976       base = expand_simple_operations (base);
 977       if (contains_abnormal_ssa_name_p (base)
 978           || contains_abnormal_ssa_name_p (step))
 979         continue;
 980
 981       type = TREE_TYPE (PHI_RESULT (phi));
 982       base = fold_convert (type, base);
 983       if (step)
 984         step = fold_convert (type, step);
 985
 986       set_iv (data, PHI_RESULT (phi), base, step);
 987       found = true;
 988     }
 989
 990   return found;
 991 }
 992
 993 /* Marks basic ivs.  */
 994
 995 static void
 996 mark_bivs (struct ivopts_data *data)
 997 {
 998   tree phi, var;
 999   struct iv *iv, *incr_iv;
1000   struct loop *loop = data->current_loop;
1001   basic_block incr_bb;
1002
1003   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1004     {
1005       iv = get_iv (data, PHI_RESULT (phi));
1006       if (!iv)
1007         continue;
1008
1009       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010       incr_iv = get_iv (data, var);
1011       if (!incr_iv)
1012         continue;
1013
1014       /* If the increment is in the subloop, ignore it.  */
1015       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016       if (incr_bb->loop_father != data->current_loop
1017           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018         continue;
1019
1020       iv->biv_p = true;
1021       incr_iv->biv_p = true;
1022     }
1023 }
1024
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026    parameters to IV.  */
1027
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1030 {
1031   tree lhs;
1032   struct loop *loop = data->current_loop;
1033
1034   iv->base = NULL_TREE;
1035   iv->step = NULL_TREE;
1036
1037   if (TREE_CODE (stmt) != MODIFY_EXPR)
1038     return false;
1039
1040   lhs = TREE_OPERAND (stmt, 0);
1041   if (TREE_CODE (lhs) != SSA_NAME)
1042     return false;
1043
1044   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1045     return false;
1046   iv->base = expand_simple_operations (iv->base);
1047
1048   if (contains_abnormal_ssa_name_p (iv->base)
1049       || contains_abnormal_ssa_name_p (iv->step))
1050     return false;
1051
1052   return true;
1053 }
1054
1055 /* Finds general ivs in statement STMT.  */
1056
1057 static void
1058 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1059 {
1060   affine_iv iv;
1061
1062   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1063     return;
1064
1065   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1066 }
1067
1068 /* Finds general ivs in basic block BB.  */
1069
1070 static void
1071 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1072 {
1073   block_stmt_iterator bsi;
1074
1075   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1076     find_givs_in_stmt (data, bsi_stmt (bsi));
1077 }
1078
1079 /* Finds general ivs.  */
1080
1081 static void
1082 find_givs (struct ivopts_data *data)
1083 {
1084   struct loop *loop = data->current_loop;
1085   basic_block *body = get_loop_body_in_dom_order (loop);
1086   unsigned i;
1087
1088   for (i = 0; i < loop->num_nodes; i++)
1089     find_givs_in_bb (data, body[i]);
1090   free (body);
1091 }
1092
1093 /* For each ssa name defined in LOOP determines whether it is an induction
1094    variable and if so, its initial value and step.  */
1095
1096 static bool
1097 find_induction_variables (struct ivopts_data *data)
1098 {
1099   unsigned i;
1100   bitmap_iterator bi;
1101
1102   if (!find_bivs (data))
1103     return false;
1104
1105   find_givs (data);
1106   mark_bivs (data);
1107
1108   if (dump_file && (dump_flags & TDF_DETAILS))
1109     {
1110       tree niter = niter_for_single_dom_exit (data);
1111
1112       if (niter)
1113         {
1114           fprintf (dump_file, "  number of iterations ");
1115           print_generic_expr (dump_file, niter, TDF_SLIM);
1116           fprintf (dump_file, "\n\n");
1117         };
1118
1119       fprintf (dump_file, "Induction variables:\n\n");
1120
1121       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1122         {
1123           if (ver_info (data, i)->iv)
1124             dump_iv (dump_file, ver_info (data, i)->iv);
1125         }
1126     }
1127
1128   return true;
1129 }
1130
1131 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1132
1133 static struct iv_use *
1134 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1135             tree stmt, enum use_type use_type)
1136 {
1137   struct iv_use *use = XCNEW (struct iv_use);
1138
1139   use->id = n_iv_uses (data);
1140   use->type = use_type;
1141   use->iv = iv;
1142   use->stmt = stmt;
1143   use->op_p = use_p;
1144   use->related_cands = BITMAP_ALLOC (NULL);
1145
1146   /* To avoid showing ssa name in the dumps, if it was not reset by the
1147      caller.  */
1148   iv->ssa_name = NULL_TREE;
1149
1150   if (dump_file && (dump_flags & TDF_DETAILS))
1151     dump_use (dump_file, use);
1152
1153   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1154
1155   return use;
1156 }
1157
1158 /* Checks whether OP is a loop-level invariant and if so, records it.
1159    NONLINEAR_USE is true if the invariant is used in a way we do not
1160    handle specially.  */
1161
1162 static void
1163 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1164 {
1165   basic_block bb;
1166   struct version_info *info;
1167
1168   if (TREE_CODE (op) != SSA_NAME
1169       || !is_gimple_reg (op))
1170     return;
1171
1172   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1173   if (bb
1174       && flow_bb_inside_loop_p (data->current_loop, bb))
1175     return;
1176
1177   info = name_info (data, op);
1178   info->name = op;
1179   info->has_nonlin_use |= nonlinear_use;
1180   if (!info->inv_id)
1181     info->inv_id = ++data->max_inv_id;
1182   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1183 }
1184
1185 /* Checks whether the use OP is interesting and if so, records it.  */
1186
1187 static struct iv_use *
1188 find_interesting_uses_op (struct ivopts_data *data, tree op)
1189 {
1190   struct iv *iv;
1191   struct iv *civ;
1192   tree stmt;
1193   struct iv_use *use;
1194
1195   if (TREE_CODE (op) != SSA_NAME)
1196     return NULL;
1197
1198   iv = get_iv (data, op);
1199   if (!iv)
1200     return NULL;
1201
1202   if (iv->have_use_for)
1203     {
1204       use = iv_use (data, iv->use_id);
1205
1206       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1207       return use;
1208     }
1209
1210   if (zero_p (iv->step))
1211     {
1212       record_invariant (data, op, true);
1213       return NULL;
1214     }
1215   iv->have_use_for = true;
1216
1217   civ = XNEW (struct iv);
1218   *civ = *iv;
1219
1220   stmt = SSA_NAME_DEF_STMT (op);
1221   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1222               || TREE_CODE (stmt) == MODIFY_EXPR);
1223
1224   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1225   iv->use_id = use->id;
1226
1227   return use;
1228 }
1229
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231    and if so, records it.  */
1232
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1235 {
1236   tree *op0_p;
1237   tree *op1_p;
1238   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1239   struct iv const_iv;
1240   tree zero = integer_zero_node;
1241
1242   const_iv.step = NULL_TREE;
1243
1244   if (TREE_CODE (*cond_p) != SSA_NAME
1245       && !COMPARISON_CLASS_P (*cond_p))
1246     return;
1247
1248   if (TREE_CODE (*cond_p) == SSA_NAME)
1249     {
1250       op0_p = cond_p;
1251       op1_p = &zero;
1252     }
1253   else
1254     {
1255       op0_p = &TREE_OPERAND (*cond_p, 0);
1256       op1_p = &TREE_OPERAND (*cond_p, 1);
1257     }
1258
1259   if (TREE_CODE (*op0_p) == SSA_NAME)
1260     iv0 = get_iv (data, *op0_p);
1261   else
1262     iv0 = &const_iv;
1263
1264   if (TREE_CODE (*op1_p) == SSA_NAME)
1265     iv1 = get_iv (data, *op1_p);
1266   else
1267     iv1 = &const_iv;
1268
1269   if (/* When comparing with non-invariant value, we may not do any senseful
1270          induction variable elimination.  */
1271       (!iv0 || !iv1)
1272       /* Eliminating condition based on two ivs would be nontrivial.
1273          ??? TODO -- it is not really important to handle this case.  */
1274       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1275     {
1276       find_interesting_uses_op (data, *op0_p);
1277       find_interesting_uses_op (data, *op1_p);
1278       return;
1279     }
1280
1281   if (zero_p (iv0->step) && zero_p (iv1->step))
1282     {
1283       /* If both are invariants, this is a work for unswitching.  */
1284       return;
1285     }
1286
1287   civ = XNEW (struct iv);
1288   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1289   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1290 }
1291
1292 /* Returns true if expression EXPR is obviously invariant in LOOP,
1293    i.e. if all its operands are defined outside of the LOOP.  */
1294
1295 bool
1296 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1297 {
1298   basic_block def_bb;
1299   unsigned i, len;
1300
1301   if (is_gimple_min_invariant (expr))
1302     return true;
1303
1304   if (TREE_CODE (expr) == SSA_NAME)
1305     {
1306       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1307       if (def_bb
1308           && flow_bb_inside_loop_p (loop, def_bb))
1309         return false;
1310
1311       return true;
1312     }
1313
1314   if (!EXPR_P (expr))
1315     return false;
1316
1317   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1318   for (i = 0; i < len; i++)
1319     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1320       return false;
1321
1322   return true;
1323 }
1324
1325 /* Cumulates the steps of indices into DATA and replaces their values with the
1326    initial ones.  Returns false when the value of the index cannot be determined.
1327    Callback for for_each_index.  */
1328
1329 struct ifs_ivopts_data
1330 {
1331   struct ivopts_data *ivopts_data;
1332   tree stmt;
1333   tree *step_p;
1334 };
1335
1336 static bool
1337 idx_find_step (tree base, tree *idx, void *data)
1338 {
1339   struct ifs_ivopts_data *dta = data;
1340   struct iv *iv;
1341   tree step, iv_step, lbound, off;
1342   struct loop *loop = dta->ivopts_data->current_loop;
1343
1344   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1345       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1346     return false;
1347
1348   /* If base is a component ref, require that the offset of the reference
1349      be invariant.  */
1350   if (TREE_CODE (base) == COMPONENT_REF)
1351     {
1352       off = component_ref_field_offset (base);
1353       return expr_invariant_in_loop_p (loop, off);
1354     }
1355
1356   /* If base is array, first check whether we will be able to move the
1357      reference out of the loop (in order to take its address in strength
1358      reduction).  In order for this to work we need both lower bound
1359      and step to be loop invariants.  */
1360   if (TREE_CODE (base) == ARRAY_REF)
1361     {
1362       step = array_ref_element_size (base);
1363       lbound = array_ref_low_bound (base);
1364
1365       if (!expr_invariant_in_loop_p (loop, step)
1366           || !expr_invariant_in_loop_p (loop, lbound))
1367         return false;
1368     }
1369
1370   if (TREE_CODE (*idx) != SSA_NAME)
1371     return true;
1372
1373   iv = get_iv (dta->ivopts_data, *idx);
1374   if (!iv)
1375     return false;
1376
1377   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1378           *&x[0], which is not folded and does not trigger the
1379           ARRAY_REF path below.  */
1380   *idx = iv->base;
1381
1382   if (!iv->step)
1383     return true;
1384
1385   if (TREE_CODE (base) == ARRAY_REF)
1386     {
1387       step = array_ref_element_size (base);
1388
1389       /* We only handle addresses whose step is an integer constant.  */
1390       if (TREE_CODE (step) != INTEGER_CST)
1391         return false;
1392     }
1393   else
1394     /* The step for pointer arithmetics already is 1 byte.  */
1395     step = build_int_cst (sizetype, 1);
1396
1397   /* FIXME: convert_step should not be used outside chrec_convert: fix
1398      this by calling chrec_convert.  */
1399   iv_step = convert_step (dta->ivopts_data->current_loop,
1400                           sizetype, iv->base, iv->step, dta->stmt);
1401
1402   if (!iv_step)
1403     {
1404       /* The index might wrap.  */
1405       return false;
1406     }
1407
1408   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1409
1410   if (!*dta->step_p)
1411     *dta->step_p = step;
1412   else
1413     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1414
1415   return true;
1416 }
1417
1418 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1419    object is passed to it in DATA.  */
1420
1421 static bool
1422 idx_record_use (tree base, tree *idx,
1423                 void *data)
1424 {
1425   find_interesting_uses_op (data, *idx);
1426   if (TREE_CODE (base) == ARRAY_REF)
1427     {
1428       find_interesting_uses_op (data, array_ref_element_size (base));
1429       find_interesting_uses_op (data, array_ref_low_bound (base));
1430     }
1431   return true;
1432 }
1433
1434 /* Returns true if memory reference REF may be unaligned.  */
1435
1436 static bool
1437 may_be_unaligned_p (tree ref)
1438 {
1439   tree base;
1440   tree base_type;
1441   HOST_WIDE_INT bitsize;
1442   HOST_WIDE_INT bitpos;
1443   tree toffset;
1444   enum machine_mode mode;
1445   int unsignedp, volatilep;
1446   unsigned base_align;
1447
1448   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1449      thus they are not misaligned.  */
1450   if (TREE_CODE (ref) == TARGET_MEM_REF)
1451     return false;
1452
1453   /* The test below is basically copy of what expr.c:normal_inner_ref
1454      does to check whether the object must be loaded by parts when
1455      STRICT_ALIGNMENT is true.  */
1456   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1457                               &unsignedp, &volatilep, true);
1458   base_type = TREE_TYPE (base);
1459   base_align = TYPE_ALIGN (base_type);
1460
1461   if (mode != BLKmode
1462       && (base_align < GET_MODE_ALIGNMENT (mode)
1463           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1464           || bitpos % BITS_PER_UNIT != 0))
1465     return true;
1466
1467   return false;
1468 }
1469
1470 /* Finds addresses in *OP_P inside STMT.  */
1471
1472 static void
1473 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1474 {
1475   tree base = *op_p, step = NULL;
1476   struct iv *civ;
1477   struct ifs_ivopts_data ifs_ivopts_data;
1478
1479   /* Do not play with volatile memory references.  A bit too conservative,
1480      perhaps, but safe.  */
1481   if (stmt_ann (stmt)->has_volatile_ops)
1482     goto fail;
1483
1484   /* Ignore bitfields for now.  Not really something terribly complicated
1485      to handle.  TODO.  */
1486   if (TREE_CODE (base) == BIT_FIELD_REF
1487       || (TREE_CODE (base) == COMPONENT_REF
1488           && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
1489     goto fail;
1490
1491   if (STRICT_ALIGNMENT
1492       && may_be_unaligned_p (base))
1493     goto fail;
1494
1495   base = unshare_expr (base);
1496
1497   if (TREE_CODE (base) == TARGET_MEM_REF)
1498     {
1499       tree type = build_pointer_type (TREE_TYPE (base));
1500       tree astep;
1501
1502       if (TMR_BASE (base)
1503           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1504         {
1505           civ = get_iv (data, TMR_BASE (base));
1506           if (!civ)
1507             goto fail;
1508
1509           TMR_BASE (base) = civ->base;
1510           step = civ->step;
1511         }
1512       if (TMR_INDEX (base)
1513           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1514         {
1515           civ = get_iv (data, TMR_INDEX (base));
1516           if (!civ)
1517             goto fail;
1518
1519           TMR_INDEX (base) = civ->base;
1520           astep = civ->step;
1521
1522           if (astep)
1523             {
1524               if (TMR_STEP (base))
1525                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1526
1527               if (step)
1528                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1529               else
1530                 step = astep;
1531             }
1532         }
1533
1534       if (zero_p (step))
1535         goto fail;
1536       base = tree_mem_ref_addr (type, base);
1537     }
1538   else
1539     {
1540       ifs_ivopts_data.ivopts_data = data;
1541       ifs_ivopts_data.stmt = stmt;
1542       ifs_ivopts_data.step_p = &step;
1543       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1544           || zero_p (step))
1545         goto fail;
1546
1547       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1548       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1549
1550       base = build_fold_addr_expr (base);
1551
1552       /* Substituting bases of IVs into the base expression might
1553          have caused folding opportunities.  */
1554       if (TREE_CODE (base) == ADDR_EXPR)
1555         {
1556           tree *ref = &TREE_OPERAND (base, 0);
1557           while (handled_component_p (*ref))
1558             ref = &TREE_OPERAND (*ref, 0);
1559           if (TREE_CODE (*ref) == INDIRECT_REF)
1560             *ref = fold_indirect_ref (*ref);
1561         }
1562     }
1563
1564   civ = alloc_iv (base, step);
1565   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1566   return;
1567
1568 fail:
1569   for_each_index (op_p, idx_record_use, data);
1570 }
1571
1572 /* Finds and records invariants used in STMT.  */
1573
1574 static void
1575 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1576 {
1577   ssa_op_iter iter;
1578   use_operand_p use_p;
1579   tree op;
1580
1581   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1582     {
1583       op = USE_FROM_PTR (use_p);
1584       record_invariant (data, op, false);
1585     }
1586 }
1587
1588 /* Finds interesting uses of induction variables in the statement STMT.  */
1589
1590 static void
1591 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1592 {
1593   struct iv *iv;
1594   tree op, lhs, rhs;
1595   ssa_op_iter iter;
1596   use_operand_p use_p;
1597
1598   find_invariants_stmt (data, stmt);
1599
1600   if (TREE_CODE (stmt) == COND_EXPR)
1601     {
1602       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1603       return;
1604     }
1605
1606   if (TREE_CODE (stmt) == MODIFY_EXPR)
1607     {
1608       lhs = TREE_OPERAND (stmt, 0);
1609       rhs = TREE_OPERAND (stmt, 1);
1610
1611       if (TREE_CODE (lhs) == SSA_NAME)
1612         {
1613           /* If the statement defines an induction variable, the uses are not
1614              interesting by themselves.  */
1615
1616           iv = get_iv (data, lhs);
1617
1618           if (iv && !zero_p (iv->step))
1619             return;
1620         }
1621
1622       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1623         {
1624         case tcc_comparison:
1625           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1626           return;
1627
1628         case tcc_reference:
1629           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1630           if (REFERENCE_CLASS_P (lhs))
1631             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1632           return;
1633
1634         default: ;
1635         }
1636
1637       if (REFERENCE_CLASS_P (lhs)
1638           && is_gimple_val (rhs))
1639         {
1640           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1641           find_interesting_uses_op (data, rhs);
1642           return;
1643         }
1644
1645       /* TODO -- we should also handle address uses of type
1646
1647          memory = call (whatever);
1648
1649          and
1650
1651          call (memory).  */
1652     }
1653
1654   if (TREE_CODE (stmt) == PHI_NODE
1655       && bb_for_stmt (stmt) == data->current_loop->header)
1656     {
1657       lhs = PHI_RESULT (stmt);
1658       iv = get_iv (data, lhs);
1659
1660       if (iv && !zero_p (iv->step))
1661         return;
1662     }
1663
1664   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1665     {
1666       op = USE_FROM_PTR (use_p);
1667
1668       if (TREE_CODE (op) != SSA_NAME)
1669         continue;
1670
1671       iv = get_iv (data, op);
1672       if (!iv)
1673         continue;
1674
1675       find_interesting_uses_op (data, op);
1676     }
1677 }
1678
1679 /* Finds interesting uses of induction variables outside of loops
1680    on loop exit edge EXIT.  */
1681
1682 static void
1683 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1684 {
1685   tree phi, def;
1686
1687   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1688     {
1689       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1690       find_interesting_uses_op (data, def);
1691     }
1692 }
1693
1694 /* Finds uses of the induction variables that are interesting.  */
1695
1696 static void
1697 find_interesting_uses (struct ivopts_data *data)
1698 {
1699   basic_block bb;
1700   block_stmt_iterator bsi;
1701   tree phi;
1702   basic_block *body = get_loop_body (data->current_loop);
1703   unsigned i;
1704   struct version_info *info;
1705   edge e;
1706
1707   if (dump_file && (dump_flags & TDF_DETAILS))
1708     fprintf (dump_file, "Uses:\n\n");
1709
1710   for (i = 0; i < data->current_loop->num_nodes; i++)
1711     {
1712       edge_iterator ei;
1713       bb = body[i];
1714
1715       FOR_EACH_EDGE (e, ei, bb->succs)
1716         if (e->dest != EXIT_BLOCK_PTR
1717             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1718           find_interesting_uses_outside (data, e);
1719
1720       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1721         find_interesting_uses_stmt (data, phi);
1722       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1723         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1724     }
1725
1726   if (dump_file && (dump_flags & TDF_DETAILS))
1727     {
1728       bitmap_iterator bi;
1729
1730       fprintf (dump_file, "\n");
1731
1732       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1733         {
1734           info = ver_info (data, i);
1735           if (info->inv_id)
1736             {
1737               fprintf (dump_file, "  ");
1738               print_generic_expr (dump_file, info->name, TDF_SLIM);
1739               fprintf (dump_file, " is invariant (%d)%s\n",
1740                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1741             }
1742         }
1743
1744       fprintf (dump_file, "\n");
1745     }
1746
1747   free (body);
1748 }
1749
1750 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1751    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1752    we are at the top-level of the processed address.  */
1753
1754 static tree
1755 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1756                 unsigned HOST_WIDE_INT *offset)
1757 {
1758   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1759   enum tree_code code;
1760   tree type, orig_type = TREE_TYPE (expr);
1761   unsigned HOST_WIDE_INT off0, off1, st;
1762   tree orig_expr = expr;
1763
1764   STRIP_NOPS (expr);
1765
1766   type = TREE_TYPE (expr);
1767   code = TREE_CODE (expr);
1768   *offset = 0;
1769
1770   switch (code)
1771     {
1772     case INTEGER_CST:
1773       if (!cst_and_fits_in_hwi (expr)
1774           || zero_p (expr))
1775         return orig_expr;
1776
1777       *offset = int_cst_value (expr);
1778       return build_int_cst (orig_type, 0);
1779
1780     case PLUS_EXPR:
1781     case MINUS_EXPR:
1782       op0 = TREE_OPERAND (expr, 0);
1783       op1 = TREE_OPERAND (expr, 1);
1784
1785       op0 = strip_offset_1 (op0, false, false, &off0);
1786       op1 = strip_offset_1 (op1, false, false, &off1);
1787
1788       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1789       if (op0 == TREE_OPERAND (expr, 0)
1790           && op1 == TREE_OPERAND (expr, 1))
1791         return orig_expr;
1792
1793       if (zero_p (op1))
1794         expr = op0;
1795       else if (zero_p (op0))
1796         {
1797           if (code == PLUS_EXPR)
1798             expr = op1;
1799           else
1800             expr = fold_build1 (NEGATE_EXPR, type, op1);
1801         }
1802       else
1803         expr = fold_build2 (code, type, op0, op1);
1804
1805       return fold_convert (orig_type, expr);
1806
1807     case ARRAY_REF:
1808       if (!inside_addr)
1809         return orig_expr;
1810
1811       step = array_ref_element_size (expr);
1812       if (!cst_and_fits_in_hwi (step))
1813         break;
1814
1815       st = int_cst_value (step);
1816       op1 = TREE_OPERAND (expr, 1);
1817       op1 = strip_offset_1 (op1, false, false, &off1);
1818       *offset = off1 * st;
1819
1820       if (top_compref
1821           && zero_p (op1))
1822         {
1823           /* Strip the component reference completely.  */
1824           op0 = TREE_OPERAND (expr, 0);
1825           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1826           *offset += off0;
1827           return op0;
1828         }
1829       break;
1830
1831     case COMPONENT_REF:
1832       if (!inside_addr)
1833         return orig_expr;
1834
1835       tmp = component_ref_field_offset (expr);
1836       if (top_compref
1837           && cst_and_fits_in_hwi (tmp))
1838         {
1839           /* Strip the component reference completely.  */
1840           op0 = TREE_OPERAND (expr, 0);
1841           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1842           *offset = off0 + int_cst_value (tmp);
1843           return op0;
1844         }
1845       break;
1846
1847     case ADDR_EXPR:
1848       op0 = TREE_OPERAND (expr, 0);
1849       op0 = strip_offset_1 (op0, true, true, &off0);
1850       *offset += off0;
1851
1852       if (op0 == TREE_OPERAND (expr, 0))
1853         return orig_expr;
1854
1855       expr = build_fold_addr_expr (op0);
1856       return fold_convert (orig_type, expr);
1857
1858     case INDIRECT_REF:
1859       inside_addr = false;
1860       break;
1861
1862     default:
1863       return orig_expr;
1864     }
1865
1866   /* Default handling of expressions for that we want to recurse into
1867      the first operand.  */
1868   op0 = TREE_OPERAND (expr, 0);
1869   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1870   *offset += off0;
1871
1872   if (op0 == TREE_OPERAND (expr, 0)
1873       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1874     return orig_expr;
1875
1876   expr = copy_node (expr);
1877   TREE_OPERAND (expr, 0) = op0;
1878   if (op1)
1879     TREE_OPERAND (expr, 1) = op1;
1880
1881   /* Inside address, we might strip the top level component references,
1882      thus changing type of the expression.  Handling of ADDR_EXPR
1883      will fix that.  */
1884   expr = fold_convert (orig_type, expr);
1885
1886   return expr;
1887 }
1888
1889 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1890
1891 static tree
1892 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1893 {
1894   return strip_offset_1 (expr, false, false, offset);
1895 }
1896
1897 /* Returns variant of TYPE that can be used as base for different uses.
1898    For integer types, we return unsigned variant of the type, which
1899    avoids problems with overflows.  For pointer types, we return void *.  */
1900
1901 static tree
1902 generic_type_for (tree type)
1903 {
1904   if (POINTER_TYPE_P (type))
1905     return ptr_type_node;
1906
1907   if (TYPE_UNSIGNED (type))
1908     return type;
1909
1910   return unsigned_type_for (type);
1911 }
1912
1913 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1914    the bitmap to that we should store it.  */
1915
1916 static struct ivopts_data *fd_ivopts_data;
1917 static tree
1918 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1919 {
1920   bitmap *depends_on = data;
1921   struct version_info *info;
1922
1923   if (TREE_CODE (*expr_p) != SSA_NAME)
1924     return NULL_TREE;
1925   info = name_info (fd_ivopts_data, *expr_p);
1926
1927   if (!info->inv_id || info->has_nonlin_use)
1928     return NULL_TREE;
1929
1930   if (!*depends_on)
1931     *depends_on = BITMAP_ALLOC (NULL);
1932   bitmap_set_bit (*depends_on, info->inv_id);
1933
1934   return NULL_TREE;
1935 }
1936
1937 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1938    position to POS.  If USE is not NULL, the candidate is set as related to
1939    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1940    replacement of the final value of the iv by a direct computation.  */
1941
1942 static struct iv_cand *
1943 add_candidate_1 (struct ivopts_data *data,
1944                  tree base, tree step, bool important, enum iv_position pos,
1945                  struct iv_use *use, tree incremented_at)
1946 {
1947   unsigned i;
1948   struct iv_cand *cand = NULL;
1949   tree type, orig_type;
1950
1951   if (base)
1952     {
1953       orig_type = TREE_TYPE (base);
1954       type = generic_type_for (orig_type);
1955       if (type != orig_type)
1956         {
1957           base = fold_convert (type, base);
1958           if (step)
1959             step = fold_convert (type, step);
1960         }
1961     }
1962
1963   for (i = 0; i < n_iv_cands (data); i++)
1964     {
1965       cand = iv_cand (data, i);
1966
1967       if (cand->pos != pos)
1968         continue;
1969
1970       if (cand->incremented_at != incremented_at)
1971         continue;
1972
1973       if (!cand->iv)
1974         {
1975           if (!base && !step)
1976             break;
1977
1978           continue;
1979         }
1980
1981       if (!base && !step)
1982         continue;
1983
1984       if (!operand_equal_p (base, cand->iv->base, 0))
1985         continue;
1986
1987       if (zero_p (cand->iv->step))
1988         {
1989           if (zero_p (step))
1990             break;
1991         }
1992       else
1993         {
1994           if (step && operand_equal_p (step, cand->iv->step, 0))
1995             break;
1996         }
1997     }
1998
1999   if (i == n_iv_cands (data))
2000     {
2001       cand = XCNEW (struct iv_cand);
2002       cand->id = i;
2003
2004       if (!base && !step)
2005         cand->iv = NULL;
2006       else
2007         cand->iv = alloc_iv (base, step);
2008
2009       cand->pos = pos;
2010       if (pos != IP_ORIGINAL && cand->iv)
2011         {
2012           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2013           cand->var_after = cand->var_before;
2014         }
2015       cand->important = important;
2016       cand->incremented_at = incremented_at;
2017       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2018
2019       if (step
2020           && TREE_CODE (step) != INTEGER_CST)
2021         {
2022           fd_ivopts_data = data;
2023           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2024         }
2025
2026       if (dump_file && (dump_flags & TDF_DETAILS))
2027         dump_cand (dump_file, cand);
2028     }
2029
2030   if (important && !cand->important)
2031     {
2032       cand->important = true;
2033       if (dump_file && (dump_flags & TDF_DETAILS))
2034         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2035     }
2036
2037   if (use)
2038     {
2039       bitmap_set_bit (use->related_cands, i);
2040       if (dump_file && (dump_flags & TDF_DETAILS))
2041         fprintf (dump_file, "Candidate %d is related to use %d\n",
2042                  cand->id, use->id);
2043     }
2044
2045   return cand;
2046 }
2047
2048 /* Returns true if incrementing the induction variable at the end of the LOOP
2049    is allowed.
2050
2051    The purpose is to avoid splitting latch edge with a biv increment, thus
2052    creating a jump, possibly confusing other optimization passes and leaving
2053    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2054    is not available (so we do not have a better alternative), or if the latch
2055    edge is already nonempty.  */
2056
2057 static bool
2058 allow_ip_end_pos_p (struct loop *loop)
2059 {
2060   if (!ip_normal_pos (loop))
2061     return true;
2062
2063   if (!empty_block_p (ip_end_pos (loop)))
2064     return true;
2065
2066   return false;
2067 }
2068
2069 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2070    position to POS.  If USE is not NULL, the candidate is set as related to
2071    it.  The candidate computation is scheduled on all available positions.  */
2072
2073 static void
2074 add_candidate (struct ivopts_data *data,
2075                tree base, tree step, bool important, struct iv_use *use)
2076 {
2077   if (ip_normal_pos (data->current_loop))
2078     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2079   if (ip_end_pos (data->current_loop)
2080       && allow_ip_end_pos_p (data->current_loop))
2081     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2082 }
2083
2084 /* Add a standard "0 + 1 * iteration" iv candidate for a
2085    type with SIZE bits.  */
2086
2087 static void
2088 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2089                                      unsigned int size)
2090 {
2091   tree type = lang_hooks.types.type_for_size (size, true);
2092   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2093                  true, NULL);
2094 }
2095
2096 /* Adds standard iv candidates.  */
2097
2098 static void
2099 add_standard_iv_candidates (struct ivopts_data *data)
2100 {
2101   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2102
2103   /* The same for a double-integer type if it is still fast enough.  */
2104   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2105     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2106 }
2107
2108
2109 /* Adds candidates bases on the old induction variable IV.  */
2110
2111 static void
2112 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2113 {
2114   tree phi, def;
2115   struct iv_cand *cand;
2116
2117   add_candidate (data, iv->base, iv->step, true, NULL);
2118
2119   /* The same, but with initial value zero.  */
2120   add_candidate (data,
2121                  build_int_cst (TREE_TYPE (iv->base), 0),
2122                  iv->step, true, NULL);
2123
2124   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2125   if (TREE_CODE (phi) == PHI_NODE)
2126     {
2127       /* Additionally record the possibility of leaving the original iv
2128          untouched.  */
2129       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2130       cand = add_candidate_1 (data,
2131                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2132                               SSA_NAME_DEF_STMT (def));
2133       cand->var_before = iv->ssa_name;
2134       cand->var_after = def;
2135     }
2136 }
2137
2138 /* Adds candidates based on the old induction variables.  */
2139
2140 static void
2141 add_old_ivs_candidates (struct ivopts_data *data)
2142 {
2143   unsigned i;
2144   struct iv *iv;
2145   bitmap_iterator bi;
2146
2147   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2148     {
2149       iv = ver_info (data, i)->iv;
2150       if (iv && iv->biv_p && !zero_p (iv->step))
2151         add_old_iv_candidates (data, iv);
2152     }
2153 }
2154
2155 /* Adds candidates based on the value of the induction variable IV and USE.  */
2156
2157 static void
2158 add_iv_value_candidates (struct ivopts_data *data,
2159                          struct iv *iv, struct iv_use *use)
2160 {
2161   unsigned HOST_WIDE_INT offset;
2162   tree base;
2163
2164   add_candidate (data, iv->base, iv->step, false, use);
2165
2166   /* The same, but with initial value zero.  Make such variable important,
2167      since it is generic enough so that possibly many uses may be based
2168      on it.  */
2169   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2170                  iv->step, true, use);
2171
2172   /* Third, try removing the constant offset.  */
2173   base = strip_offset (iv->base, &offset);
2174   if (offset)
2175     add_candidate (data, base, iv->step, false, use);
2176 }
2177
2178 /* Adds candidates based on the uses.  */
2179
2180 static void
2181 add_derived_ivs_candidates (struct ivopts_data *data)
2182 {
2183   unsigned i;
2184
2185   for (i = 0; i < n_iv_uses (data); i++)
2186     {
2187       struct iv_use *use = iv_use (data, i);
2188
2189       if (!use)
2190         continue;
2191
2192       switch (use->type)
2193         {
2194         case USE_NONLINEAR_EXPR:
2195         case USE_COMPARE:
2196         case USE_ADDRESS:
2197           /* Just add the ivs based on the value of the iv used here.  */
2198           add_iv_value_candidates (data, use->iv, use);
2199           break;
2200
2201         default:
2202           gcc_unreachable ();
2203         }
2204     }
2205 }
2206
2207 /* Record important candidates and add them to related_cands bitmaps
2208    if needed.  */
2209
2210 static void
2211 record_important_candidates (struct ivopts_data *data)
2212 {
2213   unsigned i;
2214   struct iv_use *use;
2215
2216   for (i = 0; i < n_iv_cands (data); i++)
2217     {
2218       struct iv_cand *cand = iv_cand (data, i);
2219
2220       if (cand->important)
2221         bitmap_set_bit (data->important_candidates, i);
2222     }
2223
2224   data->consider_all_candidates = (n_iv_cands (data)
2225                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2226
2227   if (data->consider_all_candidates)
2228     {
2229       /* We will not need "related_cands" bitmaps in this case,
2230          so release them to decrease peak memory consumption.  */
2231       for (i = 0; i < n_iv_uses (data); i++)
2232         {
2233           use = iv_use (data, i);
2234           BITMAP_FREE (use->related_cands);
2235         }
2236     }
2237   else
2238     {
2239       /* Add important candidates to the related_cands bitmaps.  */
2240       for (i = 0; i < n_iv_uses (data); i++)
2241         bitmap_ior_into (iv_use (data, i)->related_cands,
2242                          data->important_candidates);
2243     }
2244 }
2245
2246 /* Finds the candidates for the induction variables.  */
2247
2248 static void
2249 find_iv_candidates (struct ivopts_data *data)
2250 {
2251   /* Add commonly used ivs.  */
2252   add_standard_iv_candidates (data);
2253
2254   /* Add old induction variables.  */
2255   add_old_ivs_candidates (data);
2256
2257   /* Add induction variables derived from uses.  */
2258   add_derived_ivs_candidates (data);
2259
2260   /* Record the important candidates.  */
2261   record_important_candidates (data);
2262 }
2263
2264 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2265    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2266    we allocate a simple list to every use.  */
2267
2268 static void
2269 alloc_use_cost_map (struct ivopts_data *data)
2270 {
2271   unsigned i, size, s, j;
2272
2273   for (i = 0; i < n_iv_uses (data); i++)
2274     {
2275       struct iv_use *use = iv_use (data, i);
2276       bitmap_iterator bi;
2277
2278       if (data->consider_all_candidates)
2279         size = n_iv_cands (data);
2280       else
2281         {
2282           s = 0;
2283           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2284             {
2285               s++;
2286             }
2287
2288           /* Round up to the power of two, so that moduling by it is fast.  */
2289           for (size = 1; size < s; size <<= 1)
2290             continue;
2291         }
2292
2293       use->n_map_members = size;
2294       use->cost_map = XCNEWVEC (struct cost_pair, size);
2295     }
2296 }
2297
2298 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2299    on invariants DEPENDS_ON and that the value used in expressing it
2300    is VALUE.*/
2301
2302 static void
2303 set_use_iv_cost (struct ivopts_data *data,
2304                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2305                  bitmap depends_on, tree value)
2306 {
2307   unsigned i, s;
2308
2309   if (cost == INFTY)
2310     {
2311       BITMAP_FREE (depends_on);
2312       return;
2313     }
2314
2315   if (data->consider_all_candidates)
2316     {
2317       use->cost_map[cand->id].cand = cand;
2318       use->cost_map[cand->id].cost = cost;
2319       use->cost_map[cand->id].depends_on = depends_on;
2320       use->cost_map[cand->id].value = value;
2321       return;
2322     }
2323
2324   /* n_map_members is a power of two, so this computes modulo.  */
2325   s = cand->id & (use->n_map_members - 1);
2326   for (i = s; i < use->n_map_members; i++)
2327     if (!use->cost_map[i].cand)
2328       goto found;
2329   for (i = 0; i < s; i++)
2330     if (!use->cost_map[i].cand)
2331       goto found;
2332
2333   gcc_unreachable ();
2334
2335 found:
2336   use->cost_map[i].cand = cand;
2337   use->cost_map[i].cost = cost;
2338   use->cost_map[i].depends_on = depends_on;
2339   use->cost_map[i].value = value;
2340 }
2341
2342 /* Gets cost of (USE, CANDIDATE) pair.  */
2343
2344 static struct cost_pair *
2345 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2346                  struct iv_cand *cand)
2347 {
2348   unsigned i, s;
2349   struct cost_pair *ret;
2350
2351   if (!cand)
2352     return NULL;
2353
2354   if (data->consider_all_candidates)
2355     {
2356       ret = use->cost_map + cand->id;
2357       if (!ret->cand)
2358         return NULL;
2359
2360       return ret;
2361     }
2362
2363   /* n_map_members is a power of two, so this computes modulo.  */
2364   s = cand->id & (use->n_map_members - 1);
2365   for (i = s; i < use->n_map_members; i++)
2366     if (use->cost_map[i].cand == cand)
2367       return use->cost_map + i;
2368
2369   for (i = 0; i < s; i++)
2370     if (use->cost_map[i].cand == cand)
2371       return use->cost_map + i;
2372
2373   return NULL;
2374 }
2375
2376 /* Returns estimate on cost of computing SEQ.  */
2377
2378 static unsigned
2379 seq_cost (rtx seq)
2380 {
2381   unsigned cost = 0;
2382   rtx set;
2383
2384   for (; seq; seq = NEXT_INSN (seq))
2385     {
2386       set = single_set (seq);
2387       if (set)
2388         cost += rtx_cost (set, SET);
2389       else
2390         cost++;
2391     }
2392
2393   return cost;
2394 }
2395
2396 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2397 static rtx
2398 produce_memory_decl_rtl (tree obj, int *regno)
2399 {
2400   rtx x;
2401
2402   gcc_assert (obj);
2403   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2404     {
2405       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2406       x = gen_rtx_SYMBOL_REF (Pmode, name);
2407     }
2408   else
2409     x = gen_raw_REG (Pmode, (*regno)++);
2410
2411   return gen_rtx_MEM (DECL_MODE (obj), x);
2412 }
2413
2414 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2415    walk_tree.  DATA contains the actual fake register number.  */
2416
2417 static tree
2418 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2419 {
2420   tree obj = NULL_TREE;
2421   rtx x = NULL_RTX;
2422   int *regno = data;
2423
2424   switch (TREE_CODE (*expr_p))
2425     {
2426     case ADDR_EXPR:
2427       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2428            handled_component_p (*expr_p);
2429            expr_p = &TREE_OPERAND (*expr_p, 0))
2430         continue;
2431       obj = *expr_p;
2432       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2433         x = produce_memory_decl_rtl (obj, regno);
2434       break;
2435
2436     case SSA_NAME:
2437       *ws = 0;
2438       obj = SSA_NAME_VAR (*expr_p);
2439       if (!DECL_RTL_SET_P (obj))
2440         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2441       break;
2442
2443     case VAR_DECL:
2444     case PARM_DECL:
2445     case RESULT_DECL:
2446       *ws = 0;
2447       obj = *expr_p;
2448
2449       if (DECL_RTL_SET_P (obj))
2450         break;
2451
2452       if (DECL_MODE (obj) == BLKmode)
2453         x = produce_memory_decl_rtl (obj, regno);
2454       else
2455         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2456
2457       break;
2458
2459     default:
2460       break;
2461     }
2462
2463   if (x)
2464     {
2465       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2466       SET_DECL_RTL (obj, x);
2467     }
2468
2469   return NULL_TREE;
2470 }
2471
2472 /* Determines cost of the computation of EXPR.  */
2473
2474 static unsigned
2475 computation_cost (tree expr)
2476 {
2477   rtx seq, rslt;
2478   tree type = TREE_TYPE (expr);
2479   unsigned cost;
2480   /* Avoid using hard regs in ways which may be unsupported.  */
2481   int regno = LAST_VIRTUAL_REGISTER + 1;
2482
2483   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2484   start_sequence ();
2485   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2486   seq = get_insns ();
2487   end_sequence ();
2488
2489   cost = seq_cost (seq);
2490   if (MEM_P (rslt))
2491     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2492
2493   return cost;
2494 }
2495
2496 /* Returns variable containing the value of candidate CAND at statement AT.  */
2497
2498 static tree
2499 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2500 {
2501   if (stmt_after_increment (loop, cand, stmt))
2502     return cand->var_after;
2503   else
2504     return cand->var_before;
2505 }
2506
2507 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2508    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2509
2510 int
2511 tree_int_cst_sign_bit (tree t)
2512 {
2513   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2514   unsigned HOST_WIDE_INT w;
2515
2516   if (bitno < HOST_BITS_PER_WIDE_INT)
2517     w = TREE_INT_CST_LOW (t);
2518   else
2519     {
2520       w = TREE_INT_CST_HIGH (t);
2521       bitno -= HOST_BITS_PER_WIDE_INT;
2522     }
2523
2524   return (w >> bitno) & 1;
2525 }
2526
2527 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2528    return cst.  Otherwise return NULL_TREE.  */
2529
2530 static tree
2531 constant_multiple_of (tree type, tree top, tree bot)
2532 {
2533   tree res, mby, p0, p1;
2534   enum tree_code code;
2535   bool negate;
2536
2537   STRIP_NOPS (top);
2538   STRIP_NOPS (bot);
2539
2540   if (operand_equal_p (top, bot, 0))
2541     return build_int_cst (type, 1);
2542
2543   code = TREE_CODE (top);
2544   switch (code)
2545     {
2546     case MULT_EXPR:
2547       mby = TREE_OPERAND (top, 1);
2548       if (TREE_CODE (mby) != INTEGER_CST)
2549         return NULL_TREE;
2550
2551       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2552       if (!res)
2553         return NULL_TREE;
2554
2555       return fold_binary_to_constant (MULT_EXPR, type, res,
2556                                       fold_convert (type, mby));
2557
2558     case PLUS_EXPR:
2559     case MINUS_EXPR:
2560       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2561       if (!p0)
2562         return NULL_TREE;
2563       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2564       if (!p1)
2565         return NULL_TREE;
2566
2567       return fold_binary_to_constant (code, type, p0, p1);
2568
2569     case INTEGER_CST:
2570       if (TREE_CODE (bot) != INTEGER_CST)
2571         return NULL_TREE;
2572
2573       bot = fold_convert (type, bot);
2574       top = fold_convert (type, top);
2575
2576       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2577          the result afterwards.  */
2578       if (tree_int_cst_sign_bit (bot))
2579         {
2580           negate = true;
2581           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2582         }
2583       else
2584         negate = false;
2585
2586       /* Ditto for TOP.  */
2587       if (tree_int_cst_sign_bit (top))
2588         {
2589           negate = !negate;
2590           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2591         }
2592
2593       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2594         return NULL_TREE;
2595
2596       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2597       if (negate)
2598         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2599       return res;
2600
2601     default:
2602       return NULL_TREE;
2603     }
2604 }
2605
2606 /* Sets COMB to CST.  */
2607
2608 static void
2609 aff_combination_const (struct affine_tree_combination *comb, tree type,
2610                        unsigned HOST_WIDE_INT cst)
2611 {
2612   unsigned prec = TYPE_PRECISION (type);
2613
2614   comb->type = type;
2615   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2616
2617   comb->n = 0;
2618   comb->rest = NULL_TREE;
2619   comb->offset = cst & comb->mask;
2620 }
2621
2622 /* Sets COMB to single element ELT.  */
2623
2624 static void
2625 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2626 {
2627   unsigned prec = TYPE_PRECISION (type);
2628
2629   comb->type = type;
2630   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2631
2632   comb->n = 1;
2633   comb->elts[0] = elt;
2634   comb->coefs[0] = 1;
2635   comb->rest = NULL_TREE;
2636   comb->offset = 0;
2637 }
2638
2639 /* Scales COMB by SCALE.  */
2640
2641 static void
2642 aff_combination_scale (struct affine_tree_combination *comb,
2643                        unsigned HOST_WIDE_INT scale)
2644 {
2645   unsigned i, j;
2646
2647   if (scale == 1)
2648     return;
2649
2650   if (scale == 0)
2651     {
2652       aff_combination_const (comb, comb->type, 0);
2653       return;
2654     }
2655
2656   comb->offset = (scale * comb->offset) & comb->mask;
2657   for (i = 0, j = 0; i < comb->n; i++)
2658     {
2659       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2660       comb->elts[j] = comb->elts[i];
2661       if (comb->coefs[j] != 0)
2662         j++;
2663     }
2664   comb->n = j;
2665
2666   if (comb->rest)
2667     {
2668       if (comb->n < MAX_AFF_ELTS)
2669         {
2670           comb->coefs[comb->n] = scale;
2671           comb->elts[comb->n] = comb->rest;
2672           comb->rest = NULL_TREE;
2673           comb->n++;
2674         }
2675       else
2676         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2677                                   build_int_cst_type (comb->type, scale));
2678     }
2679 }
2680
2681 /* Adds ELT * SCALE to COMB.  */
2682
2683 static void
2684 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2685                          unsigned HOST_WIDE_INT scale)
2686 {
2687   unsigned i;
2688
2689   if (scale == 0)
2690     return;
2691
2692   for (i = 0; i < comb->n; i++)
2693     if (operand_equal_p (comb->elts[i], elt, 0))
2694       {
2695         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2696         if (comb->coefs[i])
2697           return;
2698
2699         comb->n--;
2700         comb->coefs[i] = comb->coefs[comb->n];
2701         comb->elts[i] = comb->elts[comb->n];
2702
2703         if (comb->rest)
2704           {
2705             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2706             comb->coefs[comb->n] = 1;
2707             comb->elts[comb->n] = comb->rest;
2708             comb->rest = NULL_TREE;
2709             comb->n++;
2710           }
2711         return;
2712       }
2713   if (comb->n < MAX_AFF_ELTS)
2714     {
2715       comb->coefs[comb->n] = scale;
2716       comb->elts[comb->n] = elt;
2717       comb->n++;
2718       return;
2719     }
2720
2721   if (scale == 1)
2722     elt = fold_convert (comb->type, elt);
2723   else
2724     elt = fold_build2 (MULT_EXPR, comb->type,
2725                        fold_convert (comb->type, elt),
2726                        build_int_cst_type (comb->type, scale));
2727
2728   if (comb->rest)
2729     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2730   else
2731     comb->rest = elt;
2732 }
2733
2734 /* Adds COMB2 to COMB1.  */
2735
2736 static void
2737 aff_combination_add (struct affine_tree_combination *comb1,
2738                      struct affine_tree_combination *comb2)
2739 {
2740   unsigned i;
2741
2742   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2743   for (i = 0; i < comb2->n; i++)
2744     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2745   if (comb2->rest)
2746     aff_combination_add_elt (comb1, comb2->rest, 1);
2747 }
2748
2749 /* Splits EXPR into an affine combination of parts.  */
2750
2751 static void
2752 tree_to_aff_combination (tree expr, tree type,
2753                          struct affine_tree_combination *comb)
2754 {
2755   struct affine_tree_combination tmp;
2756   enum tree_code code;
2757   tree cst, core, toffset;
2758   HOST_WIDE_INT bitpos, bitsize;
2759   enum machine_mode mode;
2760   int unsignedp, volatilep;
2761
2762   STRIP_NOPS (expr);
2763
2764   code = TREE_CODE (expr);
2765   switch (code)
2766     {
2767     case INTEGER_CST:
2768       aff_combination_const (comb, type, int_cst_value (expr));
2769       return;
2770
2771     case PLUS_EXPR:
2772     case MINUS_EXPR:
2773       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2774       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2775       if (code == MINUS_EXPR)
2776         aff_combination_scale (&tmp, -1);
2777       aff_combination_add (comb, &tmp);
2778       return;
2779
2780     case MULT_EXPR:
2781       cst = TREE_OPERAND (expr, 1);
2782       if (TREE_CODE (cst) != INTEGER_CST)
2783         break;
2784       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2785       aff_combination_scale (comb, int_cst_value (cst));
2786       return;
2787
2788     case NEGATE_EXPR:
2789       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2790       aff_combination_scale (comb, -1);
2791       return;
2792
2793     case ADDR_EXPR:
2794       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2795                                   &toffset, &mode, &unsignedp, &volatilep,
2796                                   false);
2797       if (bitpos % BITS_PER_UNIT != 0)
2798         break;
2799       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2800       core = build_fold_addr_expr (core);
2801       if (TREE_CODE (core) == ADDR_EXPR)
2802         aff_combination_add_elt (comb, core, 1);
2803       else
2804         {
2805           tree_to_aff_combination (core, type, &tmp);
2806           aff_combination_add (comb, &tmp);
2807         }
2808       if (toffset)
2809         {
2810           tree_to_aff_combination (toffset, type, &tmp);
2811           aff_combination_add (comb, &tmp);
2812         }
2813       return;
2814
2815     default:
2816       break;
2817     }
2818
2819   aff_combination_elt (comb, type, expr);
2820 }
2821
2822 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2823
2824 static tree
2825 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2826                  unsigned HOST_WIDE_INT mask)
2827 {
2828   enum tree_code code;
2829
2830   scale &= mask;
2831   elt = fold_convert (type, elt);
2832
2833   if (scale == 1)
2834     {
2835       if (!expr)
2836         return elt;
2837
2838       return fold_build2 (PLUS_EXPR, type, expr, elt);
2839     }
2840
2841   if (scale == mask)
2842     {
2843       if (!expr)
2844         return fold_build1 (NEGATE_EXPR, type, elt);
2845
2846       return fold_build2 (MINUS_EXPR, type, expr, elt);
2847     }
2848
2849   if (!expr)
2850     return fold_build2 (MULT_EXPR, type, elt,
2851                         build_int_cst_type (type, scale));
2852
2853   if ((scale | (mask >> 1)) == mask)
2854     {
2855       /* Scale is negative.  */
2856       code = MINUS_EXPR;
2857       scale = (-scale) & mask;
2858     }
2859   else
2860     code = PLUS_EXPR;
2861
2862   elt = fold_build2 (MULT_EXPR, type, elt,
2863                      build_int_cst_type (type, scale));
2864   return fold_build2 (code, type, expr, elt);
2865 }
2866
2867 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2868
2869 static void
2870 unshare_aff_combination (struct affine_tree_combination *comb)
2871 {
2872   unsigned i;
2873
2874   for (i = 0; i < comb->n; i++)
2875     comb->elts[i] = unshare_expr (comb->elts[i]);
2876   if (comb->rest)
2877     comb->rest = unshare_expr (comb->rest);
2878 }
2879
2880 /* Makes tree from the affine combination COMB.  */
2881
2882 static tree
2883 aff_combination_to_tree (struct affine_tree_combination *comb)
2884 {
2885   tree type = comb->type;
2886   tree expr = comb->rest;
2887   unsigned i;
2888   unsigned HOST_WIDE_INT off, sgn;
2889
2890   /* Handle the special case produced by get_computation_aff when
2891      the type does not fit in HOST_WIDE_INT.  */
2892   if (comb->n == 0 && comb->offset == 0)
2893     return fold_convert (type, expr);
2894
2895   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2896
2897   for (i = 0; i < comb->n; i++)
2898     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2899                             comb->mask);
2900
2901   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2902     {
2903       /* Offset is negative.  */
2904       off = (-comb->offset) & comb->mask;
2905       sgn = comb->mask;
2906     }
2907   else
2908     {
2909       off = comb->offset;
2910       sgn = 1;
2911     }
2912   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2913                           comb->mask);
2914 }
2915
2916 /* Determines the expression by that USE is expressed from induction variable
2917    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2918    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2919
2920 static bool
2921 get_computation_aff (struct loop *loop,
2922                      struct iv_use *use, struct iv_cand *cand, tree at,
2923                      struct affine_tree_combination *aff)
2924 {
2925   tree ubase = use->iv->base;
2926   tree ustep = use->iv->step;
2927   tree cbase = cand->iv->base;
2928   tree cstep = cand->iv->step;
2929   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2930   tree uutype;
2931   tree expr, delta;
2932   tree ratio;
2933   unsigned HOST_WIDE_INT ustepi, cstepi;
2934   HOST_WIDE_INT ratioi;
2935   struct affine_tree_combination cbase_aff, expr_aff;
2936   tree cstep_orig = cstep, ustep_orig = ustep;
2937
2938   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2939     {
2940       /* We do not have a precision to express the values of use.  */
2941       return false;
2942     }
2943
2944   expr = var_at_stmt (loop, cand, at);
2945
2946   if (TREE_TYPE (expr) != ctype)
2947     {
2948       /* This may happen with the original ivs.  */
2949       expr = fold_convert (ctype, expr);
2950     }
2951
2952   if (TYPE_UNSIGNED (utype))
2953     uutype = utype;
2954   else
2955     {
2956       uutype = unsigned_type_for (utype);
2957       ubase = fold_convert (uutype, ubase);
2958       ustep = fold_convert (uutype, ustep);
2959     }
2960
2961   if (uutype != ctype)
2962     {
2963       expr = fold_convert (uutype, expr);
2964       cbase = fold_convert (uutype, cbase);
2965       cstep = fold_convert (uutype, cstep);
2966
2967       /* If the conversion is not noop, we must take it into account when
2968          considering the value of the step.  */
2969       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2970         cstep_orig = cstep;
2971     }
2972
2973   if (cst_and_fits_in_hwi (cstep_orig)
2974       && cst_and_fits_in_hwi (ustep_orig))
2975     {
2976       ustepi = int_cst_value (ustep_orig);
2977       cstepi = int_cst_value (cstep_orig);
2978
2979       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2980         {
2981           /* TODO maybe consider case when ustep divides cstep and the ratio is
2982              a power of 2 (so that the division is fast to execute)?  We would
2983              need to be much more careful with overflows etc. then.  */
2984           return false;
2985         }
2986
2987       ratio = build_int_cst_type (uutype, ratioi);
2988     }
2989   else
2990     {
2991       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
2992       if (!ratio)
2993         return false;
2994
2995       /* Ratioi is only used to detect special cases when the multiplicative
2996          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2997          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
2998          to integer_onep/integer_all_onesp, since the former ignores
2999          TREE_OVERFLOW.  */
3000       if (cst_and_fits_in_hwi (ratio))
3001         ratioi = int_cst_value (ratio);
3002       else if (integer_onep (ratio))
3003         ratioi = 1;
3004       else if (integer_all_onesp (ratio))
3005         ratioi = -1;
3006       else
3007         ratioi = 0;
3008     }
3009
3010   /* We may need to shift the value if we are after the increment.  */
3011   if (stmt_after_increment (loop, cand, at))
3012     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3013
3014   /* use = ubase - ratio * cbase + ratio * var.
3015
3016      In general case ubase + ratio * (var - cbase) could be better (one less
3017      multiplication), but often it is possible to eliminate redundant parts
3018      of computations from (ubase - ratio * cbase) term, and if it does not
3019      happen, fold is able to apply the distributive law to obtain this form
3020      anyway.  */
3021
3022   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3023     {
3024       /* Let's compute in trees and just return the result in AFF.  This case
3025          should not be very common, and fold itself is not that bad either,
3026          so making the aff. functions more complicated to handle this case
3027          is not that urgent.  */
3028       if (ratioi == 1)
3029         {
3030           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3031           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3032         }
3033       else if (ratioi == -1)
3034         {
3035           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3036           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3037         }
3038       else
3039         {
3040           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3041           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3042           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3043           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3044         }
3045
3046       aff->type = uutype;
3047       aff->n = 0;
3048       aff->offset = 0;
3049       aff->mask = 0;
3050       aff->rest = expr;
3051       return true;
3052     }
3053
3054   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3055      possible to compute ratioi.  */
3056   gcc_assert (ratioi);
3057
3058   tree_to_aff_combination (ubase, uutype, aff);
3059   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3060   tree_to_aff_combination (expr, uutype, &expr_aff);
3061   aff_combination_scale (&cbase_aff, -ratioi);
3062   aff_combination_scale (&expr_aff, ratioi);
3063   aff_combination_add (aff, &cbase_aff);
3064   aff_combination_add (aff, &expr_aff);
3065
3066   return true;
3067 }
3068
3069 /* Determines the expression by that USE is expressed from induction variable
3070    CAND at statement AT in LOOP.  The computation is unshared.  */
3071
3072 static tree
3073 get_computation_at (struct loop *loop,
3074                     struct iv_use *use, struct iv_cand *cand, tree at)
3075 {
3076   struct affine_tree_combination aff;
3077   tree type = TREE_TYPE (use->iv->base);
3078
3079   if (!get_computation_aff (loop, use, cand, at, &aff))
3080     return NULL_TREE;
3081   unshare_aff_combination (&aff);
3082   return fold_convert (type, aff_combination_to_tree (&aff));
3083 }
3084
3085 /* Determines the expression by that USE is expressed from induction variable
3086    CAND in LOOP.  The computation is unshared.  */
3087
3088 static tree
3089 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3090 {
3091   return get_computation_at (loop, use, cand, use->stmt);
3092 }
3093
3094 /* Returns cost of addition in MODE.  */
3095
3096 static unsigned
3097 add_cost (enum machine_mode mode)
3098 {
3099   static unsigned costs[NUM_MACHINE_MODES];
3100   rtx seq;
3101   unsigned cost;
3102
3103   if (costs[mode])
3104     return costs[mode];
3105
3106   start_sequence ();
3107   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3108                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3109                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3110                  NULL_RTX);
3111   seq = get_insns ();
3112   end_sequence ();
3113
3114   cost = seq_cost (seq);
3115   if (!cost)
3116     cost = 1;
3117
3118   costs[mode] = cost;
3119
3120   if (dump_file && (dump_flags & TDF_DETAILS))
3121     fprintf (dump_file, "Addition in %s costs %d\n",
3122              GET_MODE_NAME (mode), cost);
3123   return cost;
3124 }
3125
3126 /* Entry in a hashtable of already known costs for multiplication.  */
3127 struct mbc_entry
3128 {
3129   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3130   enum machine_mode mode;       /* In mode.  */
3131   unsigned cost;                /* The cost.  */
3132 };
3133
3134 /* Counts hash value for the ENTRY.  */
3135
3136 static hashval_t
3137 mbc_entry_hash (const void *entry)
3138 {
3139   const struct mbc_entry *e = entry;
3140
3141   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3142 }
3143
3144 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3145
3146 static int
3147 mbc_entry_eq (const void *entry1, const void *entry2)
3148 {
3149   const struct mbc_entry *e1 = entry1;
3150   const struct mbc_entry *e2 = entry2;
3151
3152   return (e1->mode == e2->mode
3153           && e1->cst == e2->cst);
3154 }
3155
3156 /* Returns cost of multiplication by constant CST in MODE.  */
3157
3158 unsigned
3159 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3160 {
3161   static htab_t costs;
3162   struct mbc_entry **cached, act;
3163   rtx seq;
3164   unsigned cost;
3165
3166   if (!costs)
3167     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3168
3169   act.mode = mode;
3170   act.cst = cst;
3171   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3172   if (*cached)
3173     return (*cached)->cost;
3174
3175   *cached = XNEW (struct mbc_entry);
3176   (*cached)->mode = mode;
3177   (*cached)->cst = cst;
3178
3179   start_sequence ();
3180   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3181                gen_int_mode (cst, mode), NULL_RTX, 0);
3182   seq = get_insns ();
3183   end_sequence ();
3184
3185   cost = seq_cost (seq);
3186
3187   if (dump_file && (dump_flags & TDF_DETAILS))
3188     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3189              (int) cst, GET_MODE_NAME (mode), cost);
3190
3191   (*cached)->cost = cost;
3192
3193   return cost;
3194 }
3195
3196 /* Returns true if multiplying by RATIO is allowed in address.  */
3197
3198 bool
3199 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3200 {
3201 #define MAX_RATIO 128
3202   static sbitmap valid_mult;
3203
3204   if (!valid_mult)
3205     {
3206       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3207       rtx addr;
3208       HOST_WIDE_INT i;
3209
3210       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3211       sbitmap_zero (valid_mult);
3212       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3213       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3214         {
3215           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3216           if (memory_address_p (Pmode, addr))
3217             SET_BIT (valid_mult, i + MAX_RATIO);
3218         }
3219
3220       if (dump_file && (dump_flags & TDF_DETAILS))
3221         {
3222           fprintf (dump_file, "  allowed multipliers:");
3223           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3224             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3225               fprintf (dump_file, " %d", (int) i);
3226           fprintf (dump_file, "\n");
3227           fprintf (dump_file, "\n");
3228         }
3229     }
3230
3231   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3232     return false;
3233
3234   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3235 }
3236
3237 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3238    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3239    variable is omitted.  The created memory accesses MODE.
3240
3241    TODO -- there must be some better way.  This all is quite crude.  */
3242
3243 static unsigned
3244 get_address_cost (bool symbol_present, bool var_present,
3245                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3246 {
3247   static bool initialized = false;
3248   static HOST_WIDE_INT rat, off;
3249   static HOST_WIDE_INT min_offset, max_offset;
3250   static unsigned costs[2][2][2][2];
3251   unsigned cost, acost;
3252   rtx seq, addr, base;
3253   bool offset_p, ratio_p;
3254   rtx reg1;
3255   HOST_WIDE_INT s_offset;
3256   unsigned HOST_WIDE_INT mask;
3257   unsigned bits;
3258
3259   if (!initialized)
3260     {
3261       HOST_WIDE_INT i;
3262       initialized = true;
3263
3264       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3265
3266       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3267       for (i = 1; i <= 1 << 20; i <<= 1)
3268         {
3269           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3270           if (!memory_address_p (Pmode, addr))
3271             break;
3272         }
3273       max_offset = i >> 1;
3274       off = max_offset;
3275
3276       for (i = 1; i <= 1 << 20; i <<= 1)
3277         {
3278           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3279           if (!memory_address_p (Pmode, addr))
3280             break;
3281         }
3282       min_offset = -(i >> 1);
3283
3284       if (dump_file && (dump_flags & TDF_DETAILS))
3285         {
3286           fprintf (dump_file, "get_address_cost:\n");
3287           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3288           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3289         }
3290
3291       rat = 1;
3292       for (i = 2; i <= MAX_RATIO; i++)
3293         if (multiplier_allowed_in_address_p (i))
3294           {
3295             rat = i;
3296             break;
3297           }
3298     }
3299
3300   bits = GET_MODE_BITSIZE (Pmode);
3301   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3302   offset &= mask;
3303   if ((offset >> (bits - 1) & 1))
3304     offset |= ~mask;
3305   s_offset = offset;
3306
3307   cost = 0;
3308   offset_p = (s_offset != 0
3309               && min_offset <= s_offset && s_offset <= max_offset);
3310   ratio_p = (ratio != 1
3311              && multiplier_allowed_in_address_p (ratio));
3312
3313   if (ratio != 1 && !ratio_p)
3314     cost += multiply_by_cost (ratio, Pmode);
3315
3316   if (s_offset && !offset_p && !symbol_present)
3317     {
3318       cost += add_cost (Pmode);
3319       var_present = true;
3320     }
3321
3322   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3323   if (!acost)
3324     {
3325       int old_cse_not_expected;
3326       acost = 0;
3327
3328       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3329       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3330       if (ratio_p)
3331         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3332
3333       if (var_present)
3334         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3335
3336       if (symbol_present)
3337         {
3338           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3339           if (offset_p)
3340             base = gen_rtx_fmt_e (CONST, Pmode,
3341                                   gen_rtx_fmt_ee (PLUS, Pmode,
3342                                                   base,
3343                                                   gen_int_mode (off, Pmode)));
3344         }
3345       else if (offset_p)
3346         base = gen_int_mode (off, Pmode);
3347       else
3348         base = NULL_RTX;
3349
3350       if (base)
3351         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3352
3353       start_sequence ();
3354       /* To avoid splitting addressing modes, pretend that no cse will
3355          follow.  */
3356       old_cse_not_expected = cse_not_expected;
3357       cse_not_expected = true;
3358       addr = memory_address (Pmode, addr);
3359       cse_not_expected = old_cse_not_expected;
3360       seq = get_insns ();
3361       end_sequence ();
3362
3363       acost = seq_cost (seq);
3364       acost += address_cost (addr, Pmode);
3365
3366       if (!acost)
3367         acost = 1;
3368       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3369     }
3370
3371   return cost + acost;
3372 }
3373
3374 /* Estimates cost of forcing expression EXPR into a variable.  */
3375
3376 unsigned
3377 force_expr_to_var_cost (tree expr)
3378 {
3379   static bool costs_initialized = false;
3380   static unsigned integer_cost;
3381   static unsigned symbol_cost;
3382   static unsigned address_cost;
3383   tree op0, op1;
3384   unsigned cost0, cost1, cost;
3385   enum machine_mode mode;
3386
3387   if (!costs_initialized)
3388     {
3389       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3390       rtx x = gen_rtx_MEM (DECL_MODE (var),
3391                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3392       tree addr;
3393       tree type = build_pointer_type (integer_type_node);
3394
3395       integer_cost = computation_cost (build_int_cst (integer_type_node,
3396                                                       2000));
3397
3398       SET_DECL_RTL (var, x);
3399       TREE_STATIC (var) = 1;
3400       addr = build1 (ADDR_EXPR, type, var);
3401       symbol_cost = computation_cost (addr) + 1;
3402
3403       address_cost
3404         = computation_cost (build2 (PLUS_EXPR, type,
3405                                     addr,
3406                                     build_int_cst (type, 2000))) + 1;
3407       if (dump_file && (dump_flags & TDF_DETAILS))
3408         {
3409           fprintf (dump_file, "force_expr_to_var_cost:\n");
3410           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3411           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3412           fprintf (dump_file, "  address %d\n", (int) address_cost);
3413           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3414           fprintf (dump_file, "\n");
3415         }
3416
3417       costs_initialized = true;
3418     }
3419
3420   STRIP_NOPS (expr);
3421
3422   if (SSA_VAR_P (expr))
3423     return 0;
3424
3425   if (TREE_INVARIANT (expr))
3426     {
3427       if (TREE_CODE (expr) == INTEGER_CST)
3428         return integer_cost;
3429
3430       if (TREE_CODE (expr) == ADDR_EXPR)
3431         {
3432           tree obj = TREE_OPERAND (expr, 0);
3433
3434           if (TREE_CODE (obj) == VAR_DECL
3435               || TREE_CODE (obj) == PARM_DECL
3436               || TREE_CODE (obj) == RESULT_DECL)
3437             return symbol_cost;
3438         }
3439
3440       return address_cost;
3441     }
3442
3443   switch (TREE_CODE (expr))
3444     {
3445     case PLUS_EXPR:
3446     case MINUS_EXPR:
3447     case MULT_EXPR:
3448       op0 = TREE_OPERAND (expr, 0);
3449       op1 = TREE_OPERAND (expr, 1);
3450       STRIP_NOPS (op0);
3451       STRIP_NOPS (op1);
3452
3453       if (is_gimple_val (op0))
3454         cost0 = 0;
3455       else
3456         cost0 = force_expr_to_var_cost (op0);
3457
3458       if (is_gimple_val (op1))
3459         cost1 = 0;
3460       else
3461         cost1 = force_expr_to_var_cost (op1);
3462
3463       break;
3464
3465     default:
3466       /* Just an arbitrary value, FIXME.  */
3467       return target_spill_cost;
3468     }
3469
3470   mode = TYPE_MODE (TREE_TYPE (expr));
3471   switch (TREE_CODE (expr))
3472     {
3473     case PLUS_EXPR:
3474     case MINUS_EXPR:
3475       cost = add_cost (mode);
3476       break;
3477
3478     case MULT_EXPR:
3479       if (cst_and_fits_in_hwi (op0))
3480         cost = multiply_by_cost (int_cst_value (op0), mode);
3481       else if (cst_and_fits_in_hwi (op1))
3482         cost = multiply_by_cost (int_cst_value (op1), mode);
3483       else
3484         return target_spill_cost;
3485       break;
3486
3487     default:
3488       gcc_unreachable ();
3489     }
3490
3491   cost += cost0;
3492   cost += cost1;
3493
3494   /* Bound the cost by target_spill_cost.  The parts of complicated
3495      computations often are either loop invariant or at least can
3496      be shared between several iv uses, so letting this grow without
3497      limits would not give reasonable results.  */
3498   return cost < target_spill_cost ? cost : target_spill_cost;
3499 }
3500
3501 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3502    invariants the computation depends on.  */
3503
3504 static unsigned
3505 force_var_cost (struct ivopts_data *data,
3506                 tree expr, bitmap *depends_on)
3507 {
3508   if (depends_on)
3509     {
3510       fd_ivopts_data = data;
3511       walk_tree (&expr, find_depends, depends_on, NULL);
3512     }
3513
3514   return force_expr_to_var_cost (expr);
3515 }
3516
3517 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3518    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3519    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3520    invariants the computation depends on.  */
3521
3522 static unsigned
3523 split_address_cost (struct ivopts_data *data,
3524                     tree addr, bool *symbol_present, bool *var_present,
3525                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3526 {
3527   tree core;
3528   HOST_WIDE_INT bitsize;
3529   HOST_WIDE_INT bitpos;
3530   tree toffset;
3531   enum machine_mode mode;
3532   int unsignedp, volatilep;
3533
3534   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3535                               &unsignedp, &volatilep, false);
3536
3537   if (toffset != 0
3538       || bitpos % BITS_PER_UNIT != 0
3539       || TREE_CODE (core) != VAR_DECL)
3540     {
3541       *symbol_present = false;
3542       *var_present = true;
3543       fd_ivopts_data = data;
3544       walk_tree (&addr, find_depends, depends_on, NULL);
3545       return target_spill_cost;
3546     }
3547
3548   *offset += bitpos / BITS_PER_UNIT;
3549   if (TREE_STATIC (core)
3550       || DECL_EXTERNAL (core))
3551     {
3552       *symbol_present = true;
3553       *var_present = false;
3554       return 0;
3555     }
3556
3557   *symbol_present = false;
3558   *var_present = true;
3559   return 0;
3560 }
3561
3562 /* Estimates cost of expressing difference of addresses E1 - E2 as
3563    var + symbol + offset.  The value of offset is added to OFFSET,
3564    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3565    part is missing.  DEPENDS_ON is a set of the invariants the computation
3566    depends on.  */
3567
3568 static unsigned
3569 ptr_difference_cost (struct ivopts_data *data,
3570                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3571                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3572 {
3573   HOST_WIDE_INT diff = 0;
3574   unsigned cost;
3575
3576   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3577
3578   if (ptr_difference_const (e1, e2, &diff))
3579     {
3580       *offset += diff;
3581       *symbol_present = false;
3582       *var_present = false;
3583       return 0;
3584     }
3585
3586   if (e2 == integer_zero_node)
3587     return split_address_cost (data, TREE_OPERAND (e1, 0),
3588                                symbol_present, var_present, offset, depends_on);
3589
3590   *symbol_present = false;
3591   *var_present = true;
3592
3593   cost = force_var_cost (data, e1, depends_on);
3594   cost += force_var_cost (data, e2, depends_on);
3595   cost += add_cost (Pmode);
3596
3597   return cost;
3598 }
3599
3600 /* Estimates cost of expressing difference E1 - E2 as
3601    var + symbol + offset.  The value of offset is added to OFFSET,
3602    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3603    part is missing.  DEPENDS_ON is a set of the invariants the computation
3604    depends on.  */
3605
3606 static unsigned
3607 difference_cost (struct ivopts_data *data,
3608                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3609                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3610 {
3611   unsigned cost;
3612   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3613   unsigned HOST_WIDE_INT off1, off2;
3614
3615   e1 = strip_offset (e1, &off1);
3616   e2 = strip_offset (e2, &off2);
3617   *offset += off1 - off2;
3618
3619   STRIP_NOPS (e1);
3620   STRIP_NOPS (e2);
3621
3622   if (TREE_CODE (e1) == ADDR_EXPR)
3623     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3624                                 depends_on);
3625   *symbol_present = false;
3626
3627   if (operand_equal_p (e1, e2, 0))
3628     {
3629       *var_present = false;
3630       return 0;
3631     }
3632   *var_present = true;
3633   if (zero_p (e2))
3634     return force_var_cost (data, e1, depends_on);
3635
3636   if (zero_p (e1))
3637     {
3638       cost = force_var_cost (data, e2, depends_on);
3639       cost += multiply_by_cost (-1, mode);
3640
3641       return cost;
3642     }
3643
3644   cost = force_var_cost (data, e1, depends_on);
3645   cost += force_var_cost (data, e2, depends_on);
3646   cost += add_cost (mode);
3647
3648   return cost;
3649 }
3650
3651 /* Determines the cost of the computation by that USE is expressed
3652    from induction variable CAND.  If ADDRESS_P is true, we just need
3653    to create an address from it, otherwise we want to get it into
3654    register.  A set of invariants we depend on is stored in
3655    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3656
3657 static unsigned
3658 get_computation_cost_at (struct ivopts_data *data,
3659                          struct iv_use *use, struct iv_cand *cand,
3660                          bool address_p, bitmap *depends_on, tree at)
3661 {
3662   tree ubase = use->iv->base, ustep = use->iv->step;
3663   tree cbase, cstep;
3664   tree utype = TREE_TYPE (ubase), ctype;
3665   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3666   HOST_WIDE_INT ratio, aratio;
3667   bool var_present, symbol_present;
3668   unsigned cost = 0, n_sums;
3669
3670   *depends_on = NULL;
3671
3672   /* Only consider real candidates.  */
3673   if (!cand->iv)
3674     return INFTY;
3675
3676   cbase = cand->iv->base;
3677   cstep = cand->iv->step;
3678   ctype = TREE_TYPE (cbase);
3679
3680   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3681     {
3682       /* We do not have a precision to express the values of use.  */
3683       return INFTY;
3684     }
3685
3686   if (address_p)
3687     {
3688       /* Do not try to express address of an object with computation based
3689          on address of a different object.  This may cause problems in rtl
3690          level alias analysis (that does not expect this to be happening,
3691          as this is illegal in C), and would be unlikely to be useful
3692          anyway.  */
3693       if (use->iv->base_object
3694           && cand->iv->base_object
3695           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3696         return INFTY;
3697     }
3698
3699   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3700     {
3701       /* TODO -- add direct handling of this case.  */
3702       goto fallback;
3703     }
3704
3705   /* CSTEPI is removed from the offset in case statement is after the
3706      increment.  If the step is not constant, we use zero instead.
3707      This is a bit imprecise (there is the extra addition), but
3708      redundancy elimination is likely to transform the code so that
3709      it uses value of the variable before increment anyway,
3710      so it is not that much unrealistic.  */
3711   if (cst_and_fits_in_hwi (cstep))
3712     cstepi = int_cst_value (cstep);
3713   else
3714     cstepi = 0;
3715
3716   if (cst_and_fits_in_hwi (ustep)
3717       && cst_and_fits_in_hwi (cstep))
3718     {
3719       ustepi = int_cst_value (ustep);
3720
3721       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3722         return INFTY;
3723     }
3724   else
3725     {
3726       tree rat;
3727
3728       rat = constant_multiple_of (utype, ustep, cstep);
3729
3730       if (!rat)
3731         return INFTY;
3732
3733       if (cst_and_fits_in_hwi (rat))
3734         ratio = int_cst_value (rat);
3735       else if (integer_onep (rat))
3736         ratio = 1;
3737       else if (integer_all_onesp (rat))
3738         ratio = -1;
3739       else
3740         return INFTY;
3741     }
3742
3743   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3744      or ratio == 1, it is better to handle this like
3745
3746      ubase - ratio * cbase + ratio * var
3747
3748      (also holds in the case ratio == -1, TODO.  */
3749
3750   if (cst_and_fits_in_hwi (cbase))
3751     {
3752       offset = - ratio * int_cst_value (cbase);
3753       cost += difference_cost (data,
3754                                ubase, integer_zero_node,
3755                                &symbol_present, &var_present, &offset,
3756                                depends_on);
3757     }
3758   else if (ratio == 1)
3759     {
3760       cost += difference_cost (data,
3761                                ubase, cbase,
3762                                &symbol_present, &var_present, &offset,
3763                                depends_on);
3764     }
3765   else
3766     {
3767       cost += force_var_cost (data, cbase, depends_on);
3768       cost += add_cost (TYPE_MODE (ctype));
3769       cost += difference_cost (data,
3770                                ubase, integer_zero_node,
3771                                &symbol_present, &var_present, &offset,
3772                                depends_on);
3773     }
3774
3775   /* If we are after the increment, the value of the candidate is higher by
3776      one iteration.  */
3777   if (stmt_after_increment (data->current_loop, cand, at))
3778     offset -= ratio * cstepi;
3779
3780   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3781      (symbol/var/const parts may be omitted).  If we are looking for an address,
3782      find the cost of addressing this.  */
3783   if (address_p)
3784     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3785
3786   /* Otherwise estimate the costs for computing the expression.  */
3787   aratio = ratio > 0 ? ratio : -ratio;
3788   if (!symbol_present && !var_present && !offset)
3789     {
3790       if (ratio != 1)
3791         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3792
3793       return cost;
3794     }
3795
3796   if (aratio != 1)
3797     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3798
3799   n_sums = 1;
3800   if (var_present
3801       /* Symbol + offset should be compile-time computable.  */
3802       && (symbol_present || offset))
3803     n_sums++;
3804
3805   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3806
3807 fallback:
3808   {
3809     /* Just get the expression, expand it and measure the cost.  */
3810     tree comp = get_computation_at (data->current_loop, use, cand, at);
3811
3812     if (!comp)
3813       return INFTY;
3814
3815     if (address_p)
3816       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3817
3818     return computation_cost (comp);
3819   }
3820 }
3821
3822 /* Determines the cost of the computation by that USE is expressed
3823    from induction variable CAND.  If ADDRESS_P is true, we just need
3824    to create an address from it, otherwise we want to get it into
3825    register.  A set of invariants we depend on is stored in
3826    DEPENDS_ON.  */
3827
3828 static unsigned
3829 get_computation_cost (struct ivopts_data *data,
3830                       struct iv_use *use, struct iv_cand *cand,
3831                       bool address_p, bitmap *depends_on)
3832 {
3833   return get_computation_cost_at (data,
3834                                   use, cand, address_p, depends_on, use->stmt);
3835 }
3836
3837 /* Determines cost of basing replacement of USE on CAND in a generic
3838    expression.  */
3839
3840 static bool
3841 determine_use_iv_cost_generic (struct ivopts_data *data,
3842                                struct iv_use *use, struct iv_cand *cand)
3843 {
3844   bitmap depends_on;
3845   unsigned cost;
3846
3847   /* The simple case first -- if we need to express value of the preserved
3848      original biv, the cost is 0.  This also prevents us from counting the
3849      cost of increment twice -- once at this use and once in the cost of
3850      the candidate.  */
3851   if (cand->pos == IP_ORIGINAL
3852       && cand->incremented_at == use->stmt)
3853     {
3854       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3855       return true;
3856     }
3857
3858   cost = get_computation_cost (data, use, cand, false, &depends_on);
3859   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3860
3861   return cost != INFTY;
3862 }
3863
3864 /* Determines cost of basing replacement of USE on CAND in an address.  */
3865
3866 static bool
3867 determine_use_iv_cost_address (struct ivopts_data *data,
3868                                struct iv_use *use, struct iv_cand *cand)
3869 {
3870   bitmap depends_on;
3871   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3872
3873   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3874
3875   return cost != INFTY;
3876 }
3877
3878 /* Computes value of induction variable IV in iteration NITER.  */
3879
3880 static tree
3881 iv_value (struct iv *iv, tree niter)
3882 {
3883   tree val;
3884   tree type = TREE_TYPE (iv->base);
3885
3886   niter = fold_convert (type, niter);
3887   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3888
3889   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3890 }
3891
3892 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3893
3894 static tree
3895 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3896 {
3897   tree val = iv_value (cand->iv, niter);
3898   tree type = TREE_TYPE (cand->iv->base);
3899
3900   if (stmt_after_increment (loop, cand, at))
3901     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3902
3903   return val;
3904 }
3905
3906 /* Returns period of induction variable iv.  */
3907
3908 static tree
3909 iv_period (struct iv *iv)
3910 {
3911   tree step = iv->step, period, type;
3912   tree pow2div;
3913
3914   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3915
3916   /* Period of the iv is gcd (step, type range).  Since type range is power
3917      of two, it suffices to determine the maximum power of two that divides
3918      step.  */
3919   pow2div = num_ending_zeros (step);
3920   type = unsigned_type_for (TREE_TYPE (step));
3921
3922   period = build_low_bits_mask (type,
3923                                 (TYPE_PRECISION (type)
3924                                  - tree_low_cst (pow2div, 1)));
3925
3926   return period;
3927 }
3928
3929 /* Returns the comparison operator used when eliminating the iv USE.  */
3930
3931 static enum tree_code
3932 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3933 {
3934   struct loop *loop = data->current_loop;
3935   basic_block ex_bb;
3936   edge exit;
3937
3938   ex_bb = bb_for_stmt (use->stmt);
3939   exit = EDGE_SUCC (ex_bb, 0);
3940   if (flow_bb_inside_loop_p (loop, exit->dest))
3941     exit = EDGE_SUCC (ex_bb, 1);
3942
3943   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3944 }
3945
3946 /* Check whether it is possible to express the condition in USE by comparison
3947    of candidate CAND.  If so, store the value compared with to BOUND.  */
3948
3949 static bool
3950 may_eliminate_iv (struct ivopts_data *data,
3951                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3952 {
3953   basic_block ex_bb;
3954   edge exit;
3955   tree nit, nit_type;
3956   tree wider_type, period, per_type;
3957   struct loop *loop = data->current_loop;
3958
3959   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3960     return false;
3961
3962   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3963      for other conditions inside loop body.  */
3964   ex_bb = bb_for_stmt (use->stmt);
3965   if (use->stmt != last_stmt (ex_bb)
3966       || TREE_CODE (use->stmt) != COND_EXPR)
3967     return false;
3968   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3969     return false;
3970
3971   exit = EDGE_SUCC (ex_bb, 0);
3972   if (flow_bb_inside_loop_p (loop, exit->dest))
3973     exit = EDGE_SUCC (ex_bb, 1);
3974   if (flow_bb_inside_loop_p (loop, exit->dest))
3975     return false;
3976
3977   nit = niter_for_exit (data, exit);
3978   if (!nit)
3979     return false;
3980
3981   nit_type = TREE_TYPE (nit);
3982
3983   /* Determine whether we may use the variable to test whether niter iterations
3984      elapsed.  This is the case iff the period of the induction variable is
3985      greater than the number of iterations.  */
3986   period = iv_period (cand->iv);
3987   if (!period)
3988     return false;
3989   per_type = TREE_TYPE (period);
3990
3991   wider_type = TREE_TYPE (period);
3992   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3993     wider_type = per_type;
3994   else
3995     wider_type = nit_type;
3996
3997   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3998                                       fold_convert (wider_type, period),
3999                                       fold_convert (wider_type, nit))))
4000     return false;
4001
4002   *bound = cand_value_at (loop, cand, use->stmt, nit);
4003   return true;
4004 }
4005
4006 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4007
4008 static bool
4009 determine_use_iv_cost_condition (struct ivopts_data *data,
4010                                  struct iv_use *use, struct iv_cand *cand)
4011 {
4012   tree bound = NULL_TREE, op, cond;
4013   bitmap depends_on = NULL;
4014   unsigned cost;
4015
4016   /* Only consider real candidates.  */
4017   if (!cand->iv)
4018     {
4019       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4020       return false;
4021     }
4022
4023   if (may_eliminate_iv (data, use, cand, &bound))
4024     {
4025       cost = force_var_cost (data, bound, &depends_on);
4026
4027       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4028       return cost != INFTY;
4029     }
4030
4031   /* The induction variable elimination failed; just express the original
4032      giv.  If it is compared with an invariant, note that we cannot get
4033      rid of it.  */
4034   cost = get_computation_cost (data, use, cand, false, &depends_on);
4035
4036   cond = *use->op_p;
4037   if (TREE_CODE (cond) != SSA_NAME)
4038     {
4039       op = TREE_OPERAND (cond, 0);
4040       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4041         op = TREE_OPERAND (cond, 1);
4042       if (TREE_CODE (op) == SSA_NAME)
4043         {
4044           op = get_iv (data, op)->base;
4045           fd_ivopts_data = data;
4046           walk_tree (&op, find_depends, &depends_on, NULL);
4047         }
4048     }
4049
4050   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4051   return cost != INFTY;
4052 }
4053
4054 /* Determines cost of basing replacement of USE on CAND.  Returns false
4055    if USE cannot be based on CAND.  */
4056
4057 static bool
4058 determine_use_iv_cost (struct ivopts_data *data,
4059                        struct iv_use *use, struct iv_cand *cand)
4060 {
4061   switch (use->type)
4062     {
4063     case USE_NONLINEAR_EXPR:
4064       return determine_use_iv_cost_generic (data, use, cand);
4065
4066     case USE_ADDRESS:
4067       return determine_use_iv_cost_address (data, use, cand);
4068
4069     case USE_COMPARE:
4070       return determine_use_iv_cost_condition (data, use, cand);
4071
4072     default:
4073       gcc_unreachable ();
4074     }
4075 }
4076
4077 /* Determines costs of basing the use of the iv on an iv candidate.  */
4078
4079 static void
4080 determine_use_iv_costs (struct ivopts_data *data)
4081 {
4082   unsigned i, j;
4083   struct iv_use *use;
4084   struct iv_cand *cand;
4085   bitmap to_clear = BITMAP_ALLOC (NULL);
4086
4087   alloc_use_cost_map (data);
4088
4089   for (i = 0; i < n_iv_uses (data); i++)
4090     {
4091       use = iv_use (data, i);
4092
4093       if (data->consider_all_candidates)
4094         {
4095           for (j = 0; j < n_iv_cands (data); j++)
4096             {
4097               cand = iv_cand (data, j);
4098               determine_use_iv_cost (data, use, cand);
4099             }
4100         }
4101       else
4102         {
4103           bitmap_iterator bi;
4104
4105           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4106             {
4107               cand = iv_cand (data, j);
4108               if (!determine_use_iv_cost (data, use, cand))
4109                 bitmap_set_bit (to_clear, j);
4110             }
4111
4112           /* Remove the candidates for that the cost is infinite from
4113              the list of related candidates.  */
4114           bitmap_and_compl_into (use->related_cands, to_clear);
4115           bitmap_clear (to_clear);
4116         }
4117     }
4118
4119   BITMAP_FREE (to_clear);
4120
4121   if (dump_file && (dump_flags & TDF_DETAILS))
4122     {
4123       fprintf (dump_file, "Use-candidate costs:\n");
4124
4125       for (i = 0; i < n_iv_uses (data); i++)
4126         {
4127           use = iv_use (data, i);
4128
4129           fprintf (dump_file, "Use %d:\n", i);
4130           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4131           for (j = 0; j < use->n_map_members; j++)
4132             {
4133               if (!use->cost_map[j].cand
4134                   || use->cost_map[j].cost == INFTY)
4135                 continue;
4136
4137               fprintf (dump_file, "  %d\t%d\t",
4138                        use->cost_map[j].cand->id,
4139                        use->cost_map[j].cost);
4140               if (use->cost_map[j].depends_on)
4141                 bitmap_print (dump_file,
4142                               use->cost_map[j].depends_on, "","");
4143               fprintf (dump_file, "\n");
4144             }
4145
4146           fprintf (dump_file, "\n");
4147         }
4148       fprintf (dump_file, "\n");
4149     }
4150 }
4151
4152 /* Determines cost of the candidate CAND.  */
4153
4154 static void
4155 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4156 {
4157   unsigned cost_base, cost_step;
4158   tree base;
4159
4160   if (!cand->iv)
4161     {
4162       cand->cost = 0;
4163       return;
4164     }
4165
4166   /* There are two costs associated with the candidate -- its increment
4167      and its initialization.  The second is almost negligible for any loop
4168      that rolls enough, so we take it just very little into account.  */
4169
4170   base = cand->iv->base;
4171   cost_base = force_var_cost (data, base, NULL);
4172   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4173
4174   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4175
4176   /* Prefer the original iv unless we may gain something by replacing it;
4177      this is not really relevant for artificial ivs created by other
4178      passes.  */
4179   if (cand->pos == IP_ORIGINAL
4180       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4181     cand->cost--;
4182
4183   /* Prefer not to insert statements into latch unless there are some
4184      already (so that we do not create unnecessary jumps).  */
4185   if (cand->pos == IP_END
4186       && empty_block_p (ip_end_pos (data->current_loop)))
4187     cand->cost++;
4188 }
4189
4190 /* Determines costs of computation of the candidates.  */
4191
4192 static void
4193 determine_iv_costs (struct ivopts_data *data)
4194 {
4195   unsigned i;
4196
4197   if (dump_file && (dump_flags & TDF_DETAILS))
4198     {
4199       fprintf (dump_file, "Candidate costs:\n");
4200       fprintf (dump_file, "  cand\tcost\n");
4201     }
4202
4203   for (i = 0; i < n_iv_cands (data); i++)
4204     {
4205       struct iv_cand *cand = iv_cand (data, i);
4206
4207       determine_iv_cost (data, cand);
4208
4209       if (dump_file && (dump_flags & TDF_DETAILS))
4210         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4211     }
4212
4213 if (dump_file && (dump_flags & TDF_DETAILS))
4214       fprintf (dump_file, "\n");
4215 }
4216
4217 /* Calculates cost for having SIZE induction variables.  */
4218
4219 static unsigned
4220 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4221 {
4222   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4223 }
4224
4225 /* For each size of the induction variable set determine the penalty.  */
4226
4227 static void
4228 determine_set_costs (struct ivopts_data *data)
4229 {
4230   unsigned j, n;
4231   tree phi, op;
4232   struct loop *loop = data->current_loop;
4233   bitmap_iterator bi;
4234
4235   /* We use the following model (definitely improvable, especially the
4236      cost function -- TODO):
4237
4238      We estimate the number of registers available (using MD data), name it A.
4239
4240      We estimate the number of registers used by the loop, name it U.  This
4241      number is obtained as the number of loop phi nodes (not counting virtual
4242      registers and bivs) + the number of variables from outside of the loop.
4243
4244      We set a reserve R (free regs that are used for temporary computations,
4245      etc.).  For now the reserve is a constant 3.
4246
4247      Let I be the number of induction variables.
4248
4249      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4250         make a lot of ivs without a reason).
4251      -- if A - R < U + I <= A, the cost is I * PRES_COST
4252      -- if U + I > A, the cost is I * PRES_COST and
4253         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4254
4255   if (dump_file && (dump_flags & TDF_DETAILS))
4256     {
4257       fprintf (dump_file, "Global costs:\n");
4258       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4259       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4260       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4261       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4262     }
4263
4264   n = 0;
4265   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4266     {
4267       op = PHI_RESULT (phi);
4268
4269       if (!is_gimple_reg (op))
4270         continue;
4271
4272       if (get_iv (data, op))
4273         continue;
4274
4275       n++;
4276     }
4277
4278   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4279     {
4280       struct version_info *info = ver_info (data, j);
4281
4282       if (info->inv_id && info->has_nonlin_use)
4283         n++;
4284     }
4285
4286   data->regs_used = n;
4287   if (dump_file && (dump_flags & TDF_DETAILS))
4288     fprintf (dump_file, "  regs_used %d\n", n);
4289
4290   if (dump_file && (dump_flags & TDF_DETAILS))
4291     {
4292       fprintf (dump_file, "  cost for size:\n");
4293       fprintf (dump_file, "  ivs\tcost\n");
4294       for (j = 0; j <= 2 * target_avail_regs; j++)
4295         fprintf (dump_file, "  %d\t%d\n", j,
4296                  ivopts_global_cost_for_size (data, j));
4297       fprintf (dump_file, "\n");
4298     }
4299 }
4300
4301 /* Returns true if A is a cheaper cost pair than B.  */
4302
4303 static bool
4304 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4305 {
4306   if (!a)
4307     return false;
4308
4309   if (!b)
4310     return true;
4311
4312   if (a->cost < b->cost)
4313     return true;
4314
4315   if (a->cost > b->cost)
4316     return false;
4317
4318   /* In case the costs are the same, prefer the cheaper candidate.  */
4319   if (a->cand->cost < b->cand->cost)
4320     return true;
4321
4322   return false;
4323 }
4324
4325 /* Computes the cost field of IVS structure.  */
4326
4327 static void
4328 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4329 {
4330   unsigned cost = 0;
4331
4332   cost += ivs->cand_use_cost;
4333   cost += ivs->cand_cost;
4334   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4335
4336   ivs->cost = cost;
4337 }
4338
4339 /* Remove invariants in set INVS to set IVS.  */
4340
4341 static void
4342 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4343 {
4344   bitmap_iterator bi;
4345   unsigned iid;
4346
4347   if (!invs)
4348     return;
4349
4350   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4351     {
4352       ivs->n_invariant_uses[iid]--;
4353       if (ivs->n_invariant_uses[iid] == 0)
4354         ivs->n_regs--;
4355     }
4356 }
4357
4358 /* Set USE not to be expressed by any candidate in IVS.  */
4359
4360 static void
4361 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4362                  struct iv_use *use)
4363 {
4364   unsigned uid = use->id, cid;
4365   struct cost_pair *cp;
4366
4367   cp = ivs->cand_for_use[uid];
4368   if (!cp)
4369     return;
4370   cid = cp->cand->id;
4371
4372   ivs->bad_uses++;
4373   ivs->cand_for_use[uid] = NULL;
4374   ivs->n_cand_uses[cid]--;
4375
4376   if (ivs->n_cand_uses[cid] == 0)
4377     {
4378       bitmap_clear_bit (ivs->cands, cid);
4379       /* Do not count the pseudocandidates.  */
4380       if (cp->cand->iv)
4381         ivs->n_regs--;
4382       ivs->n_cands--;
4383       ivs->cand_cost -= cp->cand->cost;
4384
4385       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4386     }
4387
4388   ivs->cand_use_cost -= cp->cost;
4389
4390   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4391   iv_ca_recount_cost (data, ivs);
4392 }
4393
4394 /* Add invariants in set INVS to set IVS.  */
4395
4396 static void
4397 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4398 {
4399   bitmap_iterator bi;
4400   unsigned iid;
4401
4402   if (!invs)
4403     return;
4404
4405   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4406     {
4407       ivs->n_invariant_uses[iid]++;
4408       if (ivs->n_invariant_uses[iid] == 1)
4409         ivs->n_regs++;
4410     }
4411 }
4412
4413 /* Set cost pair for USE in set IVS to CP.  */
4414
4415 static void
4416 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4417               struct iv_use *use, struct cost_pair *cp)
4418 {
4419   unsigned uid = use->id, cid;
4420
4421   if (ivs->cand_for_use[uid] == cp)
4422     return;
4423
4424   if (ivs->cand_for_use[uid])
4425     iv_ca_set_no_cp (data, ivs, use);
4426
4427   if (cp)
4428     {
4429       cid = cp->cand->id;
4430
4431       ivs->bad_uses--;
4432       ivs->cand_for_use[uid] = cp;
4433       ivs->n_cand_uses[cid]++;
4434       if (ivs->n_cand_uses[cid] == 1)
4435         {
4436           bitmap_set_bit (ivs->cands, cid);
4437           /* Do not count the pseudocandidates.  */
4438           if (cp->cand->iv)
4439             ivs->n_regs++;
4440           ivs->n_cands++;
4441           ivs->cand_cost += cp->cand->cost;
4442
4443           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4444         }
4445
4446       ivs->cand_use_cost += cp->cost;
4447       iv_ca_set_add_invariants (ivs, cp->depends_on);
4448       iv_ca_recount_cost (data, ivs);
4449     }
4450 }
4451
4452 /* Extend set IVS by expressing USE by some of the candidates in it
4453    if possible.  */
4454
4455 static void
4456 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4457                struct iv_use *use)
4458 {
4459   struct cost_pair *best_cp = NULL, *cp;
4460   bitmap_iterator bi;
4461   unsigned i;
4462
4463   gcc_assert (ivs->upto >= use->id);
4464
4465   if (ivs->upto == use->id)
4466     {
4467       ivs->upto++;
4468       ivs->bad_uses++;
4469     }
4470
4471   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4472     {
4473       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4474
4475       if (cheaper_cost_pair (cp, best_cp))
4476         best_cp = cp;
4477     }
4478
4479   iv_ca_set_cp (data, ivs, use, best_cp);
4480 }
4481
4482 /* Get cost for assignment IVS.  */
4483
4484 static unsigned
4485 iv_ca_cost (struct iv_ca *ivs)
4486 {
4487   return (ivs->bad_uses ? INFTY : ivs->cost);
4488 }
4489
4490 /* Returns true if all dependences of CP are among invariants in IVS.  */
4491
4492 static bool
4493 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4494 {
4495   unsigned i;
4496   bitmap_iterator bi;
4497
4498   if (!cp->depends_on)
4499     return true;
4500
4501   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4502     {
4503       if (ivs->n_invariant_uses[i] == 0)
4504         return false;
4505     }
4506
4507   return true;
4508 }
4509
4510 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4511    it before NEXT_CHANGE.  */
4512
4513 static struct iv_ca_delta *
4514 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4515                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4516 {
4517   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4518
4519   change->use = use;
4520   change->old_cp = old_cp;
4521   change->new_cp = new_cp;
4522   change->next_change = next_change;
4523
4524   return change;
4525 }
4526
4527 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4528    are rewritten.  */
4529
4530 static struct iv_ca_delta *
4531 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4532 {
4533   struct iv_ca_delta *last;
4534
4535   if (!l2)
4536     return l1;
4537
4538   if (!l1)
4539     return l2;
4540
4541   for (last = l1; last->next_change; last = last->next_change)
4542     continue;
4543   last->next_change = l2;
4544
4545   return l1;
4546 }
4547
4548 /* Returns candidate by that USE is expressed in IVS.  */
4549
4550 static struct cost_pair *
4551 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4552 {
4553   return ivs->cand_for_use[use->id];
4554 }
4555
4556 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4557
4558 static struct iv_ca_delta *
4559 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4560 {
4561   struct iv_ca_delta *act, *next, *prev = NULL;
4562   struct cost_pair *tmp;
4563
4564   for (act = delta; act; act = next)
4565     {
4566       next = act->next_change;
4567       act->next_change = prev;
4568       prev = act;
4569
4570       tmp = act->old_cp;
4571       act->old_cp = act->new_cp;
4572       act->new_cp = tmp;
4573     }
4574
4575   return prev;
4576 }
4577
4578 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4579    reverted instead.  */
4580
4581 static void
4582 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4583                     struct iv_ca_delta *delta, bool forward)
4584 {
4585   struct cost_pair *from, *to;
4586   struct iv_ca_delta *act;
4587
4588   if (!forward)
4589     delta = iv_ca_delta_reverse (delta);
4590
4591   for (act = delta; act; act = act->next_change)
4592     {
4593       from = act->old_cp;
4594       to = act->new_cp;
4595       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4596       iv_ca_set_cp (data, ivs, act->use, to);
4597     }
4598
4599   if (!forward)
4600     iv_ca_delta_reverse (delta);
4601 }
4602
4603 /* Returns true if CAND is used in IVS.  */
4604
4605 static bool
4606 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4607 {
4608   return ivs->n_cand_uses[cand->id] > 0;
4609 }
4610
4611 /* Returns number of induction variable candidates in the set IVS.  */
4612
4613 static unsigned
4614 iv_ca_n_cands (struct iv_ca *ivs)
4615 {
4616   return ivs->n_cands;
4617 }
4618
4619 /* Free the list of changes DELTA.  */
4620
4621 static void
4622 iv_ca_delta_free (struct iv_ca_delta **delta)
4623 {
4624   struct iv_ca_delta *act, *next;
4625
4626   for (act = *delta; act; act = next)
4627     {
4628       next = act->next_change;
4629       free (act);
4630     }
4631
4632   *delta = NULL;
4633 }
4634
4635 /* Allocates new iv candidates assignment.  */
4636
4637 static struct iv_ca *
4638 iv_ca_new (struct ivopts_data *data)
4639 {
4640   struct iv_ca *nw = XNEW (struct iv_ca);
4641
4642   nw->upto = 0;
4643   nw->bad_uses = 0;
4644   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4645   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4646   nw->cands = BITMAP_ALLOC (NULL);
4647   nw->n_cands = 0;
4648   nw->n_regs = 0;
4649   nw->cand_use_cost = 0;
4650   nw->cand_cost = 0;
4651   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4652   nw->cost = 0;
4653
4654   return nw;
4655 }
4656
4657 /* Free memory occupied by the set IVS.  */
4658
4659 static void
4660 iv_ca_free (struct iv_ca **ivs)
4661 {
4662   free ((*ivs)->cand_for_use);
4663   free ((*ivs)->n_cand_uses);
4664   BITMAP_FREE ((*ivs)->cands);
4665   free ((*ivs)->n_invariant_uses);
4666   free (*ivs);
4667   *ivs = NULL;
4668 }
4669
4670 /* Dumps IVS to FILE.  */
4671
4672 static void
4673 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4674 {
4675   const char *pref = "  invariants ";
4676   unsigned i;
4677
4678   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4679   bitmap_print (file, ivs->cands, "  candidates ","\n");
4680
4681   for (i = 1; i <= data->max_inv_id; i++)
4682     if (ivs->n_invariant_uses[i])
4683       {
4684         fprintf (file, "%s%d", pref, i);
4685         pref = ", ";
4686       }
4687   fprintf (file, "\n");
4688 }
4689
4690 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4691    new set, and store differences in DELTA.  Number of induction variables
4692    in the new set is stored to N_IVS.  */
4693
4694 static unsigned
4695 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4696               struct iv_cand *cand, struct iv_ca_delta **delta,
4697               unsigned *n_ivs)
4698 {
4699   unsigned i, cost;
4700   struct iv_use *use;
4701   struct cost_pair *old_cp, *new_cp;
4702
4703   *delta = NULL;
4704   for (i = 0; i < ivs->upto; i++)
4705     {
4706       use = iv_use (data, i);
4707       old_cp = iv_ca_cand_for_use (ivs, use);
4708
4709       if (old_cp
4710           && old_cp->cand == cand)
4711         continue;
4712
4713       new_cp = get_use_iv_cost (data, use, cand);
4714       if (!new_cp)
4715         continue;
4716
4717       if (!iv_ca_has_deps (ivs, new_cp))
4718         continue;
4719
4720       if (!cheaper_cost_pair (new_cp, old_cp))
4721         continue;
4722
4723       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4724     }
4725
4726   iv_ca_delta_commit (data, ivs, *delta, true);
4727   cost = iv_ca_cost (ivs);
4728   if (n_ivs)
4729     *n_ivs = iv_ca_n_cands (ivs);
4730   iv_ca_delta_commit (data, ivs, *delta, false);
4731
4732   return cost;
4733 }
4734
4735 /* Try narrowing set IVS by removing CAND.  Return the cost of
4736    the new set and store the differences in DELTA.  */
4737
4738 static unsigned
4739 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4740               struct iv_cand *cand, struct iv_ca_delta **delta)
4741 {
4742   unsigned i, ci;
4743   struct iv_use *use;
4744   struct cost_pair *old_cp, *new_cp, *cp;
4745   bitmap_iterator bi;
4746   struct iv_cand *cnd;
4747   unsigned cost;
4748
4749   *delta = NULL;
4750   for (i = 0; i < n_iv_uses (data); i++)
4751     {
4752       use = iv_use (data, i);
4753
4754       old_cp = iv_ca_cand_for_use (ivs, use);
4755       if (old_cp->cand != cand)
4756         continue;
4757
4758       new_cp = NULL;
4759
4760       if (data->consider_all_candidates)
4761         {
4762           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4763             {
4764               if (ci == cand->id)
4765                 continue;
4766
4767               cnd = iv_cand (data, ci);
4768
4769               cp = get_use_iv_cost (data, use, cnd);
4770               if (!cp)
4771                 continue;
4772               if (!iv_ca_has_deps (ivs, cp))
4773                 continue;
4774
4775               if (!cheaper_cost_pair (cp, new_cp))
4776                 continue;
4777
4778               new_cp = cp;
4779             }
4780         }
4781       else
4782         {
4783           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4784             {
4785               if (ci == cand->id)
4786                 continue;
4787
4788               cnd = iv_cand (data, ci);
4789
4790               cp = get_use_iv_cost (data, use, cnd);
4791               if (!cp)
4792                 continue;
4793               if (!iv_ca_has_deps (ivs, cp))
4794                 continue;
4795
4796               if (!cheaper_cost_pair (cp, new_cp))
4797                 continue;
4798
4799               new_cp = cp;
4800             }
4801         }
4802
4803       if (!new_cp)
4804         {
4805           iv_ca_delta_free (delta);
4806           return INFTY;
4807         }
4808
4809       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4810     }
4811
4812   iv_ca_delta_commit (data, ivs, *delta, true);
4813   cost = iv_ca_cost (ivs);
4814   iv_ca_delta_commit (data, ivs, *delta, false);
4815
4816   return cost;
4817 }
4818
4819 /* Try optimizing the set of candidates IVS by removing candidates different
4820    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4821    differences in DELTA.  */
4822
4823 static unsigned
4824 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4825              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4826 {
4827   bitmap_iterator bi;
4828   struct iv_ca_delta *act_delta, *best_delta;
4829   unsigned i, best_cost, acost;
4830   struct iv_cand *cand;
4831
4832   best_delta = NULL;
4833   best_cost = iv_ca_cost (ivs);
4834
4835   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4836     {
4837       cand = iv_cand (data, i);
4838
4839       if (cand == except_cand)
4840         continue;
4841
4842       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4843
4844       if (acost < best_cost)
4845         {
4846           best_cost = acost;
4847           iv_ca_delta_free (&best_delta);
4848           best_delta = act_delta;
4849         }
4850       else
4851         iv_ca_delta_free (&act_delta);
4852     }
4853
4854   if (!best_delta)
4855     {
4856       *delta = NULL;
4857       return best_cost;
4858     }
4859
4860   /* Recurse to possibly remove other unnecessary ivs.  */
4861   iv_ca_delta_commit (data, ivs, best_delta, true);
4862   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4863   iv_ca_delta_commit (data, ivs, best_delta, false);
4864   *delta = iv_ca_delta_join (best_delta, *delta);
4865   return best_cost;
4866 }
4867
4868 /* Tries to extend the sets IVS in the best possible way in order
4869    to express the USE.  */
4870
4871 static bool
4872 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4873                   struct iv_use *use)
4874 {
4875   unsigned best_cost, act_cost;
4876   unsigned i;
4877   bitmap_iterator bi;
4878   struct iv_cand *cand;
4879   struct iv_ca_delta *best_delta = NULL, *act_delta;
4880   struct cost_pair *cp;
4881
4882   iv_ca_add_use (data, ivs, use);
4883   best_cost = iv_ca_cost (ivs);
4884
4885   cp = iv_ca_cand_for_use (ivs, use);
4886   if (cp)
4887     {
4888       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4889       iv_ca_set_no_cp (data, ivs, use);
4890     }
4891
4892   /* First try important candidates.  Only if it fails, try the specific ones.
4893      Rationale -- in loops with many variables the best choice often is to use
4894      just one generic biv.  If we added here many ivs specific to the uses,
4895      the optimization algorithm later would be likely to get stuck in a local
4896      minimum, thus causing us to create too many ivs.  The approach from
4897      few ivs to more seems more likely to be successful -- starting from few
4898      ivs, replacing an expensive use by a specific iv should always be a
4899      win.  */
4900   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4901     {
4902       cand = iv_cand (data, i);
4903
4904       if (iv_ca_cand_used_p (ivs, cand))
4905         continue;
4906
4907       cp = get_use_iv_cost (data, use, cand);
4908       if (!cp)
4909         continue;
4910
4911       iv_ca_set_cp (data, ivs, use, cp);
4912       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4913       iv_ca_set_no_cp (data, ivs, use);
4914       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4915
4916       if (act_cost < best_cost)
4917         {
4918           best_cost = act_cost;
4919
4920           iv_ca_delta_free (&best_delta);
4921           best_delta = act_delta;
4922         }
4923       else
4924         iv_ca_delta_free (&act_delta);
4925     }
4926
4927   if (best_cost == INFTY)
4928     {
4929       for (i = 0; i < use->n_map_members; i++)
4930         {
4931           cp = use->cost_map + i;
4932           cand = cp->cand;
4933           if (!cand)
4934             continue;
4935
4936           /* Already tried this.  */
4937           if (cand->important)
4938             continue;
4939
4940           if (iv_ca_cand_used_p (ivs, cand))
4941             continue;
4942
4943           act_delta = NULL;
4944           iv_ca_set_cp (data, ivs, use, cp);
4945           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4946           iv_ca_set_no_cp (data, ivs, use);
4947           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4948                                        cp, act_delta);
4949
4950           if (act_cost < best_cost)
4951             {
4952               best_cost = act_cost;
4953
4954               if (best_delta)
4955                 iv_ca_delta_free (&best_delta);
4956               best_delta = act_delta;
4957             }
4958           else
4959             iv_ca_delta_free (&act_delta);
4960         }
4961     }
4962
4963   iv_ca_delta_commit (data, ivs, best_delta, true);
4964   iv_ca_delta_free (&best_delta);
4965
4966   return (best_cost != INFTY);
4967 }
4968
4969 /* Finds an initial assignment of candidates to uses.  */
4970
4971 static struct iv_ca *
4972 get_initial_solution (struct ivopts_data *data)
4973 {
4974   struct iv_ca *ivs = iv_ca_new (data);
4975   unsigned i;
4976
4977   for (i = 0; i < n_iv_uses (data); i++)
4978     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4979       {
4980         iv_ca_free (&ivs);
4981         return NULL;
4982       }
4983
4984   return ivs;
4985 }
4986
4987 /* Tries to improve set of induction variables IVS.  */
4988
4989 static bool
4990 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4991 {
4992   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4993   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4994   struct iv_cand *cand;
4995
4996   /* Try extending the set of induction variables by one.  */
4997   for (i = 0; i < n_iv_cands (data); i++)
4998     {
4999       cand = iv_cand (data, i);
5000
5001       if (iv_ca_cand_used_p (ivs, cand))
5002         continue;
5003
5004       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5005       if (!act_delta)
5006         continue;
5007
5008       /* If we successfully added the candidate and the set is small enough,
5009          try optimizing it by removing other candidates.  */
5010       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5011         {
5012           iv_ca_delta_commit (data, ivs, act_delta, true);
5013           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5014           iv_ca_delta_commit (data, ivs, act_delta, false);
5015           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5016         }
5017
5018       if (acost < best_cost)
5019         {
5020           best_cost = acost;
5021           iv_ca_delta_free (&best_delta);
5022           best_delta = act_delta;
5023         }
5024       else
5025         iv_ca_delta_free (&act_delta);
5026     }
5027
5028   if (!best_delta)
5029     {
5030       /* Try removing the candidates from the set instead.  */
5031       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5032
5033       /* Nothing more we can do.  */
5034       if (!best_delta)
5035         return false;
5036     }
5037
5038   iv_ca_delta_commit (data, ivs, best_delta, true);
5039   gcc_assert (best_cost == iv_ca_cost (ivs));
5040   iv_ca_delta_free (&best_delta);
5041   return true;
5042 }
5043
5044 /* Attempts to find the optimal set of induction variables.  We do simple
5045    greedy heuristic -- we try to replace at most one candidate in the selected
5046    solution and remove the unused ivs while this improves the cost.  */
5047
5048 static struct iv_ca *
5049 find_optimal_iv_set (struct ivopts_data *data)
5050 {
5051   unsigned i;
5052   struct iv_ca *set;
5053   struct iv_use *use;
5054
5055   /* Get the initial solution.  */
5056   set = get_initial_solution (data);
5057   if (!set)
5058     {
5059       if (dump_file && (dump_flags & TDF_DETAILS))
5060         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5061       return NULL;
5062     }
5063
5064   if (dump_file && (dump_flags & TDF_DETAILS))
5065     {
5066       fprintf (dump_file, "Initial set of candidates:\n");
5067       iv_ca_dump (data, dump_file, set);
5068     }
5069
5070   while (try_improve_iv_set (data, set))
5071     {
5072       if (dump_file && (dump_flags & TDF_DETAILS))
5073         {
5074           fprintf (dump_file, "Improved to:\n");
5075           iv_ca_dump (data, dump_file, set);
5076         }
5077     }
5078
5079   if (dump_file && (dump_flags & TDF_DETAILS))
5080     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5081
5082   for (i = 0; i < n_iv_uses (data); i++)
5083     {
5084       use = iv_use (data, i);
5085       use->selected = iv_ca_cand_for_use (set, use)->cand;
5086     }
5087
5088   return set;
5089 }
5090
5091 /* Creates a new induction variable corresponding to CAND.  */
5092
5093 static void
5094 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5095 {
5096   block_stmt_iterator incr_pos;
5097   tree base;
5098   bool after = false;
5099
5100   if (!cand->iv)
5101     return;
5102
5103   switch (cand->pos)
5104     {
5105     case IP_NORMAL:
5106       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5107       break;
5108
5109     case IP_END:
5110       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5111       after = true;
5112       break;
5113
5114     case IP_ORIGINAL:
5115       /* Mark that the iv is preserved.  */
5116       name_info (data, cand->var_before)->preserve_biv = true;
5117       name_info (data, cand->var_after)->preserve_biv = true;
5118
5119       /* Rewrite the increment so that it uses var_before directly.  */
5120       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5121
5122       return;
5123     }
5124
5125   gimple_add_tmp_var (cand->var_before);
5126   add_referenced_tmp_var (cand->var_before);
5127
5128   base = unshare_expr (cand->iv->base);
5129
5130   create_iv (base, unshare_expr (cand->iv->step),
5131              cand->var_before, data->current_loop,
5132              &incr_pos, after, &cand->var_before, &cand->var_after);
5133 }
5134
5135 /* Creates new induction variables described in SET.  */
5136
5137 static void
5138 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5139 {
5140   unsigned i;
5141   struct iv_cand *cand;
5142   bitmap_iterator bi;
5143
5144   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5145     {
5146       cand = iv_cand (data, i);
5147       create_new_iv (data, cand);
5148     }
5149 }
5150
5151 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5152    is true, remove also the ssa name defined by the statement.  */
5153
5154 static void
5155 remove_statement (tree stmt, bool including_defined_name)
5156 {
5157   if (TREE_CODE (stmt) == PHI_NODE)
5158     {
5159       if (!including_defined_name)
5160         {
5161           /* Prevent the ssa name defined by the statement from being removed.  */
5162           SET_PHI_RESULT (stmt, NULL);
5163         }
5164       remove_phi_node (stmt, NULL_TREE);
5165     }
5166   else
5167     {
5168       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5169
5170       bsi_remove (&bsi, true);
5171     }
5172 }
5173
5174 /* Rewrites USE (definition of iv used in a nonlinear expression)
5175    using candidate CAND.  */
5176
5177 static void
5178 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5179                             struct iv_use *use, struct iv_cand *cand)
5180 {
5181   tree comp;
5182   tree op, stmts, tgt, ass;
5183   block_stmt_iterator bsi, pbsi;
5184
5185   /* An important special case -- if we are asked to express value of
5186      the original iv by itself, just exit; there is no need to
5187      introduce a new computation (that might also need casting the
5188      variable to unsigned and back).  */
5189   if (cand->pos == IP_ORIGINAL
5190       && cand->incremented_at == use->stmt)
5191     {
5192       tree step, ctype, utype;
5193       enum tree_code incr_code = PLUS_EXPR;
5194
5195       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5196       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5197
5198       step = cand->iv->step;
5199       ctype = TREE_TYPE (step);
5200       utype = TREE_TYPE (cand->var_after);
5201       if (TREE_CODE (step) == NEGATE_EXPR)
5202         {
5203           incr_code = MINUS_EXPR;
5204           step = TREE_OPERAND (step, 0);
5205         }
5206
5207       /* Check whether we may leave the computation unchanged.
5208          This is the case only if it does not rely on other
5209          computations in the loop -- otherwise, the computation
5210          we rely upon may be removed in remove_unused_ivs,
5211          thus leading to ICE.  */
5212       op = TREE_OPERAND (use->stmt, 1);
5213       if (TREE_CODE (op) == PLUS_EXPR
5214           || TREE_CODE (op) == MINUS_EXPR)
5215         {
5216           if (TREE_OPERAND (op, 0) == cand->var_before)
5217             op = TREE_OPERAND (op, 1);
5218           else if (TREE_CODE (op) == PLUS_EXPR
5219                    && TREE_OPERAND (op, 1) == cand->var_before)
5220             op = TREE_OPERAND (op, 0);
5221           else
5222             op = NULL_TREE;
5223         }
5224       else
5225         op = NULL_TREE;
5226
5227       if (op
5228           && (TREE_CODE (op) == INTEGER_CST
5229               || operand_equal_p (op, step, 0)))
5230         return;
5231
5232       /* Otherwise, add the necessary computations to express
5233          the iv.  */
5234       op = fold_convert (ctype, cand->var_before);
5235       comp = fold_convert (utype,
5236                            build2 (incr_code, ctype, op,
5237                                    unshare_expr (step)));
5238     }
5239   else
5240     comp = get_computation (data->current_loop, use, cand);
5241
5242   switch (TREE_CODE (use->stmt))
5243     {
5244     case PHI_NODE:
5245       tgt = PHI_RESULT (use->stmt);
5246
5247       /* If we should keep the biv, do not replace it.  */
5248       if (name_info (data, tgt)->preserve_biv)
5249         return;
5250
5251       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5252       while (!bsi_end_p (pbsi)
5253              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5254         {
5255           bsi = pbsi;
5256           bsi_next (&pbsi);
5257         }
5258       break;
5259
5260     case MODIFY_EXPR:
5261       tgt = TREE_OPERAND (use->stmt, 0);
5262       bsi = bsi_for_stmt (use->stmt);
5263       break;
5264
5265     default:
5266       gcc_unreachable ();
5267     }
5268
5269   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5270
5271   if (TREE_CODE (use->stmt) == PHI_NODE)
5272     {
5273       if (stmts)
5274         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5275       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5276       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5277       remove_statement (use->stmt, false);
5278       SSA_NAME_DEF_STMT (tgt) = ass;
5279     }
5280   else
5281     {
5282       if (stmts)
5283         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5284       TREE_OPERAND (use->stmt, 1) = op;
5285     }
5286 }
5287
5288 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5289    for_each_index.  */
5290
5291 static bool
5292 idx_remove_ssa_names (tree base, tree *idx,
5293                       void *data ATTRIBUTE_UNUSED)
5294 {
5295   tree *op;
5296
5297   if (TREE_CODE (*idx) == SSA_NAME)
5298     *idx = SSA_NAME_VAR (*idx);
5299
5300   if (TREE_CODE (base) == ARRAY_REF)
5301     {
5302       op = &TREE_OPERAND (base, 2);
5303       if (*op
5304           && TREE_CODE (*op) == SSA_NAME)
5305         *op = SSA_NAME_VAR (*op);
5306       op = &TREE_OPERAND (base, 3);
5307       if (*op
5308           && TREE_CODE (*op) == SSA_NAME)
5309         *op = SSA_NAME_VAR (*op);
5310     }
5311
5312   return true;
5313 }
5314
5315 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5316
5317 static tree
5318 unshare_and_remove_ssa_names (tree ref)
5319 {
5320   ref = unshare_expr (ref);
5321   for_each_index (&ref, idx_remove_ssa_names, NULL);
5322
5323   return ref;
5324 }
5325
5326 /* Extract the alias analysis info for the memory reference REF.  There are
5327    several ways how this information may be stored and what precisely is
5328    its semantics depending on the type of the reference, but there always is
5329    somewhere hidden one _DECL node that is used to determine the set of
5330    virtual operands for the reference.  The code below deciphers this jungle
5331    and extracts this single useful piece of information.  */
5332
5333 static tree
5334 get_ref_tag (tree ref, tree orig)
5335 {
5336   tree var = get_base_address (ref);
5337   tree aref = NULL_TREE, tag, sv;
5338   HOST_WIDE_INT offset, size, maxsize;
5339
5340   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5341     {
5342       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5343       if (ref)
5344         break;
5345     }
5346
5347   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5348     return unshare_expr (sv);
5349
5350   if (!var)
5351     return NULL_TREE;
5352
5353   if (TREE_CODE (var) == INDIRECT_REF)
5354     {
5355       /* If the base is a dereference of a pointer, first check its name memory
5356          tag.  If it does not have one, use its symbol memory tag.  */
5357       var = TREE_OPERAND (var, 0);
5358       if (TREE_CODE (var) != SSA_NAME)
5359         return NULL_TREE;
5360
5361       if (SSA_NAME_PTR_INFO (var))
5362         {
5363           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5364           if (tag)
5365             return tag;
5366         }
5367
5368       var = SSA_NAME_VAR (var);
5369       tag = var_ann (var)->symbol_mem_tag;
5370       gcc_assert (tag != NULL_TREE);
5371       return tag;
5372     }
5373   else
5374     {
5375       if (!DECL_P (var))
5376         return NULL_TREE;
5377
5378       tag = var_ann (var)->symbol_mem_tag;
5379       if (tag)
5380         return tag;
5381
5382       return var;
5383     }
5384 }
5385
5386 /* Copies the reference information from OLD_REF to NEW_REF.  */
5387
5388 static void
5389 copy_ref_info (tree new_ref, tree old_ref)
5390 {
5391   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5392     copy_mem_ref_info (new_ref, old_ref);
5393   else
5394     {
5395       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5396       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5397     }
5398 }
5399
5400 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5401
5402 static void
5403 rewrite_use_address (struct ivopts_data *data,
5404                      struct iv_use *use, struct iv_cand *cand)
5405 {
5406   struct affine_tree_combination aff;
5407   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5408   tree ref;
5409
5410   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5411   unshare_aff_combination (&aff);
5412
5413   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5414   copy_ref_info (ref, *use->op_p);
5415   *use->op_p = ref;
5416 }
5417
5418 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5419    candidate CAND.  */
5420
5421 static void
5422 rewrite_use_compare (struct ivopts_data *data,
5423                      struct iv_use *use, struct iv_cand *cand)
5424 {
5425   tree comp;
5426   tree *op_p, cond, op, stmts, bound;
5427   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5428   enum tree_code compare;
5429   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5430
5431   bound = cp->value;
5432   if (bound)
5433     {
5434       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5435       tree var_type = TREE_TYPE (var);
5436
5437       compare = iv_elimination_compare (data, use);
5438       bound = fold_convert (var_type, bound);
5439       op = force_gimple_operand (unshare_expr (bound), &stmts,
5440                                  true, NULL_TREE);
5441
5442       if (stmts)
5443         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5444
5445       *use->op_p = build2 (compare, boolean_type_node, var, op);
5446       update_stmt (use->stmt);
5447       return;
5448     }
5449
5450   /* The induction variable elimination failed; just express the original
5451      giv.  */
5452   comp = get_computation (data->current_loop, use, cand);
5453
5454   cond = *use->op_p;
5455   op_p = &TREE_OPERAND (cond, 0);
5456   if (TREE_CODE (*op_p) != SSA_NAME
5457       || zero_p (get_iv (data, *op_p)->step))
5458     op_p = &TREE_OPERAND (cond, 1);
5459
5460   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5461   if (stmts)
5462     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5463
5464   *op_p = op;
5465 }
5466
5467 /* Ensure that operand *OP_P may be used at the end of EXIT without
5468    violating loop closed ssa form.  */
5469
5470 static void
5471 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5472 {
5473   basic_block def_bb;
5474   struct loop *def_loop;
5475   tree phi, use;
5476
5477   use = USE_FROM_PTR (op_p);
5478   if (TREE_CODE (use) != SSA_NAME)
5479     return;
5480
5481   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5482   if (!def_bb)
5483     return;
5484
5485   def_loop = def_bb->loop_father;
5486   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5487     return;
5488
5489   /* Try finding a phi node that copies the value out of the loop.  */
5490   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5491     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5492       break;
5493
5494   if (!phi)
5495     {
5496       /* Create such a phi node.  */
5497       tree new_name = duplicate_ssa_name (use, NULL);
5498
5499       phi = create_phi_node (new_name, exit->dest);
5500       SSA_NAME_DEF_STMT (new_name) = phi;
5501       add_phi_arg (phi, use, exit);
5502     }
5503
5504   SET_USE (op_p, PHI_RESULT (phi));
5505 }
5506
5507 /* Ensure that operands of STMT may be used at the end of EXIT without
5508    violating loop closed ssa form.  */
5509
5510 static void
5511 protect_loop_closed_ssa_form (edge exit, tree stmt)
5512 {
5513   ssa_op_iter iter;
5514   use_operand_p use_p;
5515
5516   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5517     protect_loop_closed_ssa_form_use (exit, use_p);
5518 }
5519
5520 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5521    so that they are emitted on the correct place, and so that the loop closed
5522    ssa form is preserved.  */
5523
5524 void
5525 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5526 {
5527   tree_stmt_iterator tsi;
5528   block_stmt_iterator bsi;
5529   tree phi, stmt, def, next;
5530
5531   if (!single_pred_p (exit->dest))
5532     split_loop_exit_edge (exit);
5533
5534   /* Ensure there is label in exit->dest, so that we can
5535      insert after it.  */
5536   tree_block_label (exit->dest);
5537   bsi = bsi_after_labels (exit->dest);
5538
5539   if (TREE_CODE (stmts) == STATEMENT_LIST)
5540     {
5541       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5542         {
5543           tree stmt = tsi_stmt (tsi);
5544           bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5545           protect_loop_closed_ssa_form (exit, stmt);
5546         }
5547     }
5548   else
5549     {
5550       bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5551       protect_loop_closed_ssa_form (exit, stmts);
5552     }
5553
5554   if (!op)
5555     return;
5556
5557   for (phi = phi_nodes (exit->dest); phi; phi = next)
5558     {
5559       next = PHI_CHAIN (phi);
5560
5561       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5562         {
5563           def = PHI_RESULT (phi);
5564           remove_statement (phi, false);
5565           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5566                         def, op);
5567           SSA_NAME_DEF_STMT (def) = stmt;
5568           bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5569         }
5570     }
5571 }
5572
5573 /* Rewrites USE using candidate CAND.  */
5574
5575 static void
5576 rewrite_use (struct ivopts_data *data,
5577              struct iv_use *use, struct iv_cand *cand)
5578 {
5579   switch (use->type)
5580     {
5581       case USE_NONLINEAR_EXPR:
5582         rewrite_use_nonlinear_expr (data, use, cand);
5583         break;
5584
5585       case USE_ADDRESS:
5586         rewrite_use_address (data, use, cand);
5587         break;
5588
5589       case USE_COMPARE:
5590         rewrite_use_compare (data, use, cand);
5591         break;
5592
5593       default:
5594         gcc_unreachable ();
5595     }
5596   mark_new_vars_to_rename (use->stmt);
5597 }
5598
5599 /* Rewrite the uses using the selected induction variables.  */
5600
5601 static void
5602 rewrite_uses (struct ivopts_data *data)
5603 {
5604   unsigned i;
5605   struct iv_cand *cand;
5606   struct iv_use *use;
5607
5608   for (i = 0; i < n_iv_uses (data); i++)
5609     {
5610       use = iv_use (data, i);
5611       cand = use->selected;
5612       gcc_assert (cand);
5613
5614       rewrite_use (data, use, cand);
5615     }
5616 }
5617
5618 /* Removes the ivs that are not used after rewriting.  */
5619
5620 static void
5621 remove_unused_ivs (struct ivopts_data *data)
5622 {
5623   unsigned j;
5624   bitmap_iterator bi;
5625
5626   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5627     {
5628       struct version_info *info;
5629
5630       info = ver_info (data, j);
5631       if (info->iv
5632           && !zero_p (info->iv->step)
5633           && !info->inv_id
5634           && !info->iv->have_use_for
5635           && !info->preserve_biv)
5636         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5637     }
5638 }
5639
5640 /* Frees data allocated by the optimization of a single loop.  */
5641
5642 static void
5643 free_loop_data (struct ivopts_data *data)
5644 {
5645   unsigned i, j;
5646   bitmap_iterator bi;
5647   tree obj;
5648
5649   htab_empty (data->niters);
5650
5651   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5652     {
5653       struct version_info *info;
5654
5655       info = ver_info (data, i);
5656       if (info->iv)
5657         free (info->iv);
5658       info->iv = NULL;
5659       info->has_nonlin_use = false;
5660       info->preserve_biv = false;
5661       info->inv_id = 0;
5662     }
5663   bitmap_clear (data->relevant);
5664   bitmap_clear (data->important_candidates);
5665
5666   for (i = 0; i < n_iv_uses (data); i++)
5667     {
5668       struct iv_use *use = iv_use (data, i);
5669
5670       free (use->iv);
5671       BITMAP_FREE (use->related_cands);
5672       for (j = 0; j < use->n_map_members; j++)
5673         if (use->cost_map[j].depends_on)
5674           BITMAP_FREE (use->cost_map[j].depends_on);
5675       free (use->cost_map);
5676       free (use);
5677     }
5678   VEC_truncate (iv_use_p, data->iv_uses, 0);
5679
5680   for (i = 0; i < n_iv_cands (data); i++)
5681     {
5682       struct iv_cand *cand = iv_cand (data, i);
5683
5684       if (cand->iv)
5685         free (cand->iv);
5686       if (cand->depends_on)
5687         BITMAP_FREE (cand->depends_on);
5688       free (cand);
5689     }
5690   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5691
5692   if (data->version_info_size < num_ssa_names)
5693     {
5694       data->version_info_size = 2 * num_ssa_names;
5695       free (data->version_info);
5696       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5697     }
5698
5699   data->max_inv_id = 0;
5700
5701   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5702     SET_DECL_RTL (obj, NULL_RTX);
5703
5704   VEC_truncate (tree, decl_rtl_to_reset, 0);
5705 }
5706
5707 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5708    loop tree.  */
5709
5710 static void
5711 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5712 {
5713   free_loop_data (data);
5714   free (data->version_info);
5715   BITMAP_FREE (data->relevant);
5716   BITMAP_FREE (data->important_candidates);
5717   htab_delete (data->niters);
5718
5719   VEC_free (tree, heap, decl_rtl_to_reset);
5720   VEC_free (iv_use_p, heap, data->iv_uses);
5721   VEC_free (iv_cand_p, heap, data->iv_candidates);
5722 }
5723
5724 /* Optimizes the LOOP.  Returns true if anything changed.  */
5725
5726 static bool
5727 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5728 {
5729   bool changed = false;
5730   struct iv_ca *iv_ca;
5731   edge exit;
5732
5733   data->current_loop = loop;
5734
5735   if (dump_file && (dump_flags & TDF_DETAILS))
5736     {
5737       fprintf (dump_file, "Processing loop %d\n", loop->num);
5738
5739       exit = single_dom_exit (loop);
5740       if (exit)
5741         {
5742           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5743                    exit->src->index, exit->dest->index);
5744           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5745           fprintf (dump_file, "\n");
5746         }
5747
5748       fprintf (dump_file, "\n");
5749     }
5750
5751   /* For each ssa name determines whether it behaves as an induction variable
5752      in some loop.  */
5753   if (!find_induction_variables (data))
5754     goto finish;
5755
5756   /* Finds interesting uses (item 1).  */
5757   find_interesting_uses (data);
5758   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5759     goto finish;
5760
5761   /* Finds candidates for the induction variables (item 2).  */
5762   find_iv_candidates (data);
5763
5764   /* Calculates the costs (item 3, part 1).  */
5765   determine_use_iv_costs (data);
5766   determine_iv_costs (data);
5767   determine_set_costs (data);
5768
5769   /* Find the optimal set of induction variables (item 3, part 2).  */
5770   iv_ca = find_optimal_iv_set (data);
5771   if (!iv_ca)
5772     goto finish;
5773   changed = true;
5774
5775   /* Create the new induction variables (item 4, part 1).  */
5776   create_new_ivs (data, iv_ca);
5777   iv_ca_free (&iv_ca);
5778
5779   /* Rewrite the uses (item 4, part 2).  */
5780   rewrite_uses (data);
5781
5782   /* Remove the ivs that are unused after rewriting.  */
5783   remove_unused_ivs (data);
5784
5785   /* We have changed the structure of induction variables; it might happen
5786      that definitions in the scev database refer to some of them that were
5787      eliminated.  */
5788   scev_reset ();
5789
5790 finish:
5791   free_loop_data (data);
5792
5793   return changed;
5794 }
5795
5796 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5797
5798 void
5799 tree_ssa_iv_optimize (struct loops *loops)
5800 {
5801   struct loop *loop;
5802   struct ivopts_data data;
5803
5804   tree_ssa_iv_optimize_init (&data);
5805
5806   /* Optimize the loops starting with the innermost ones.  */
5807   loop = loops->tree_root;
5808   while (loop->inner)
5809     loop = loop->inner;
5810
5811   /* Scan the loops, inner ones first.  */
5812   while (loop != loops->tree_root)
5813     {
5814       if (dump_file && (dump_flags & TDF_DETAILS))
5815         flow_loop_dump (loop, dump_file, NULL, 1);
5816
5817       tree_ssa_iv_optimize_loop (&data, loop);
5818
5819       if (loop->next)
5820         {
5821           loop = loop->next;
5822           while (loop->inner)
5823             loop = loop->inner;
5824         }
5825       else
5826         loop = loop->outer;
5827     }
5828
5829   tree_ssa_iv_optimize_finalize (&data);
5830 }