gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Types of uses.  */
 125 enum use_type
 126 {
 127   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 128   USE_ADDRESS,          /* Use in an address.  */
 129   USE_COMPARE           /* Use is a compare.  */
 130 };
 131
 132 /* The candidate - cost pair.  */
 133 struct cost_pair
 134 {
 135   struct iv_cand *cand; /* The candidate.  */
 136   unsigned cost;        /* The cost.  */
 137   bitmap depends_on;    /* The list of invariants that have to be
 138                            preserved.  */
 139   tree value;           /* For final value elimination, the expression for
 140                            the final value of the iv.  For iv elimination,
 141                            the new bound to compare with.  */
 142 };
 143
 144 /* Use.  */
 145 struct iv_use
 146 {
 147   unsigned id;          /* The id of the use.  */
 148   enum use_type type;   /* Type of the use.  */
 149   struct iv *iv;        /* The induction variable it is based on.  */
 150   tree stmt;            /* Statement in that it occurs.  */
 151   tree *op_p;           /* The place where it occurs.  */
 152   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 153                            important ones.  */
 154
 155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 156   struct cost_pair *cost_map;
 157                         /* The costs wrto the iv candidates.  */
 158
 159   struct iv_cand *selected;
 160                         /* The selected candidate.  */
 161 };
 162
 163 /* The position where the iv is computed.  */
 164 enum iv_position
 165 {
 166   IP_NORMAL,            /* At the end, just before the exit condition.  */
 167   IP_END,               /* At the end of the latch block.  */
 168   IP_ORIGINAL           /* The original biv.  */
 169 };
 170
 171 /* The induction variable candidate.  */
 172 struct iv_cand
 173 {
 174   unsigned id;          /* The number of the candidate.  */
 175   bool important;       /* Whether this is an "important" candidate, i.e. such
 176                            that it should be considered by all uses.  */
 177   enum iv_position pos; /* Where it is computed.  */
 178   tree incremented_at;  /* For original biv, the statement where it is
 179                            incremented.  */
 180   tree var_before;      /* The variable used for it before increment.  */
 181   tree var_after;       /* The variable used for it after increment.  */
 182   struct iv *iv;        /* The value of the candidate.  NULL for
 183                            "pseudocandidate" used to indicate the possibility
 184                            to replace the final value of an iv by direct
 185                            computation of the value.  */
 186   unsigned cost;        /* Cost of the candidate.  */
 187   bitmap depends_on;    /* The list of invariants that are used in step of the
 188                            biv.  */
 189 };
 190
 191 /* The data used by the induction variable optimizations.  */
 192
 193 typedef struct iv_use *iv_use_p;
 194 DEF_VEC_P(iv_use_p);
 195 DEF_VEC_ALLOC_P(iv_use_p,heap);
 196
 197 typedef struct iv_cand *iv_cand_p;
 198 DEF_VEC_P(iv_cand_p);
 199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 200
 201 struct ivopts_data
 202 {
 203   /* The currently optimized loop.  */
 204   struct loop *current_loop;
 205
 206   /* Number of registers used in it.  */
 207   unsigned regs_used;
 208
 209   /* Numbers of iterations for all exits of the current loop.  */
 210   htab_t niters;
 211
 212   /* The size of version_info array allocated.  */
 213   unsigned version_info_size;
 214
 215   /* The array of information for the ssa names.  */
 216   struct version_info *version_info;
 217
 218   /* The bitmap of indices in version_info whose value was changed.  */
 219   bitmap relevant;
 220
 221   /* The maximum invariant id.  */
 222   unsigned max_inv_id;
 223
 224   /* The uses of induction variables.  */
 225   VEC(iv_use_p,heap) *iv_uses;
 226
 227   /* The candidates.  */
 228   VEC(iv_cand_p,heap) *iv_candidates;
 229
 230   /* A bitmap of important candidates.  */
 231   bitmap important_candidates;
 232
 233   /* Whether to consider just related and important candidates when replacing a
 234      use.  */
 235   bool consider_all_candidates;
 236 };
 237
 238 /* An assignment of iv candidates to uses.  */
 239
 240 struct iv_ca
 241 {
 242   /* The number of uses covered by the assignment.  */
 243   unsigned upto;
 244
 245   /* Number of uses that cannot be expressed by the candidates in the set.  */
 246   unsigned bad_uses;
 247
 248   /* Candidate assigned to a use, together with the related costs.  */
 249   struct cost_pair **cand_for_use;
 250
 251   /* Number of times each candidate is used.  */
 252   unsigned *n_cand_uses;
 253
 254   /* The candidates used.  */
 255   bitmap cands;
 256
 257   /* The number of candidates in the set.  */
 258   unsigned n_cands;
 259
 260   /* Total number of registers needed.  */
 261   unsigned n_regs;
 262
 263   /* Total cost of expressing uses.  */
 264   unsigned cand_use_cost;
 265
 266   /* Total cost of candidates.  */
 267   unsigned cand_cost;
 268
 269   /* Number of times each invariant is used.  */
 270   unsigned *n_invariant_uses;
 271
 272   /* Total cost of the assignment.  */
 273   unsigned cost;
 274 };
 275
 276 /* Difference of two iv candidate assignments.  */
 277
 278 struct iv_ca_delta
 279 {
 280   /* Changed use.  */
 281   struct iv_use *use;
 282
 283   /* An old assignment (for rollback purposes).  */
 284   struct cost_pair *old_cp;
 285
 286   /* A new assignment.  */
 287   struct cost_pair *new_cp;
 288
 289   /* Next change in the list.  */
 290   struct iv_ca_delta *next_change;
 291 };
 292
 293 /* Bound on number of candidates below that all candidates are considered.  */
 294
 295 #define CONSIDER_ALL_CANDIDATES_BOUND \
 296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 297
 298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 299    optimizing such a loop would help, and it would take ages).  */
 300
 301 #define MAX_CONSIDERED_USES \
 302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 303
 304 /* If there are at most this number of ivs in the set, try removing unnecessary
 305    ivs from the set always.  */
 306
 307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 309
 310 /* The list of trees for that the decl_rtl field must be reset is stored
 311    here.  */
 312
 313 static VEC(tree,heap) *decl_rtl_to_reset;
 314
 315 /* Number of uses recorded in DATA.  */
 316
 317 static inline unsigned
 318 n_iv_uses (struct ivopts_data *data)
 319 {
 320   return VEC_length (iv_use_p, data->iv_uses);
 321 }
 322
 323 /* Ith use recorded in DATA.  */
 324
 325 static inline struct iv_use *
 326 iv_use (struct ivopts_data *data, unsigned i)
 327 {
 328   return VEC_index (iv_use_p, data->iv_uses, i);
 329 }
 330
 331 /* Number of candidates recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_cands (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_cand_p, data->iv_candidates);
 337 }
 338
 339 /* Ith candidate recorded in DATA.  */
 340
 341 static inline struct iv_cand *
 342 iv_cand (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_cand_p, data->iv_candidates, i);
 345 }
 346
 347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 348
 349 edge
 350 single_dom_exit (struct loop *loop)
 351 {
 352   edge exit = loop->single_exit;
 353
 354   if (!exit)
 355     return NULL;
 356
 357   if (!just_once_each_iteration_p (loop, exit->src))
 358     return NULL;
 359
 360   return exit;
 361 }
 362
 363 /* Dumps information about the induction variable IV to FILE.  */
 364
 365 extern void dump_iv (FILE *, struct iv *);
 366 void
 367 dump_iv (FILE *file, struct iv *iv)
 368 {
 369   if (iv->ssa_name)
 370     {
 371       fprintf (file, "ssa name ");
 372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 373       fprintf (file, "\n");
 374     }
 375
 376   fprintf (file, "  type ");
 377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 378   fprintf (file, "\n");
 379
 380   if (iv->step)
 381     {
 382       fprintf (file, "  base ");
 383       print_generic_expr (file, iv->base, TDF_SLIM);
 384       fprintf (file, "\n");
 385
 386       fprintf (file, "  step ");
 387       print_generic_expr (file, iv->step, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390   else
 391     {
 392       fprintf (file, "  invariant ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395     }
 396
 397   if (iv->base_object)
 398     {
 399       fprintf (file, "  base object ");
 400       print_generic_expr (file, iv->base_object, TDF_SLIM);
 401       fprintf (file, "\n");
 402     }
 403
 404   if (iv->biv_p)
 405     fprintf (file, "  is a biv\n");
 406 }
 407
 408 /* Dumps information about the USE to FILE.  */
 409
 410 extern void dump_use (FILE *, struct iv_use *);
 411 void
 412 dump_use (FILE *file, struct iv_use *use)
 413 {
 414   fprintf (file, "use %d\n", use->id);
 415
 416   switch (use->type)
 417     {
 418     case USE_NONLINEAR_EXPR:
 419       fprintf (file, "  generic\n");
 420       break;
 421
 422     case USE_ADDRESS:
 423       fprintf (file, "  address\n");
 424       break;
 425
 426     case USE_COMPARE:
 427       fprintf (file, "  compare\n");
 428       break;
 429
 430     default:
 431       gcc_unreachable ();
 432     }
 433
 434   fprintf (file, "  in statement ");
 435   print_generic_expr (file, use->stmt, TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   fprintf (file, "  at position ");
 439   if (use->op_p)
 440     print_generic_expr (file, *use->op_p, TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   dump_iv (file, use->iv);
 444
 445   if (use->related_cands)
 446     {
 447       fprintf (file, "  related candidates ");
 448       dump_bitmap (file, use->related_cands);
 449     }
 450 }
 451
 452 /* Dumps information about the uses to FILE.  */
 453
 454 extern void dump_uses (FILE *, struct ivopts_data *);
 455 void
 456 dump_uses (FILE *file, struct ivopts_data *data)
 457 {
 458   unsigned i;
 459   struct iv_use *use;
 460
 461   for (i = 0; i < n_iv_uses (data); i++)
 462     {
 463       use = iv_use (data, i);
 464
 465       dump_use (file, use);
 466       fprintf (file, "\n");
 467     }
 468 }
 469
 470 /* Dumps information about induction variable candidate CAND to FILE.  */
 471
 472 extern void dump_cand (FILE *, struct iv_cand *);
 473 void
 474 dump_cand (FILE *file, struct iv_cand *cand)
 475 {
 476   struct iv *iv = cand->iv;
 477
 478   fprintf (file, "candidate %d%s\n",
 479            cand->id, cand->important ? " (important)" : "");
 480
 481   if (cand->depends_on)
 482     {
 483       fprintf (file, "  depends on ");
 484       dump_bitmap (file, cand->depends_on);
 485     }
 486
 487   if (!iv)
 488     {
 489       fprintf (file, "  final value replacement\n");
 490       return;
 491     }
 492
 493   switch (cand->pos)
 494     {
 495     case IP_NORMAL:
 496       fprintf (file, "  incremented before exit test\n");
 497       break;
 498
 499     case IP_END:
 500       fprintf (file, "  incremented at end\n");
 501       break;
 502
 503     case IP_ORIGINAL:
 504       fprintf (file, "  original biv\n");
 505       break;
 506     }
 507
 508   dump_iv (file, iv);
 509 }
 510
 511 /* Returns the info for ssa version VER.  */
 512
 513 static inline struct version_info *
 514 ver_info (struct ivopts_data *data, unsigned ver)
 515 {
 516   return data->version_info + ver;
 517 }
 518
 519 /* Returns the info for ssa name NAME.  */
 520
 521 static inline struct version_info *
 522 name_info (struct ivopts_data *data, tree name)
 523 {
 524   return ver_info (data, SSA_NAME_VERSION (name));
 525 }
 526
 527 /* Checks whether there exists number X such that X * B = A, counting modulo
 528    2^BITS.  */
 529
 530 static bool
 531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 532         HOST_WIDE_INT *x)
 533 {
 534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 535   unsigned HOST_WIDE_INT inv, ex, val;
 536   unsigned i;
 537
 538   a &= mask;
 539   b &= mask;
 540
 541   /* First divide the whole equation by 2 as long as possible.  */
 542   while (!(a & 1) && !(b & 1))
 543     {
 544       a >>= 1;
 545       b >>= 1;
 546       bits--;
 547       mask >>= 1;
 548     }
 549
 550   if (!(b & 1))
 551     {
 552       /* If b is still even, a is odd and there is no such x.  */
 553       return false;
 554     }
 555
 556   /* Find the inverse of b.  We compute it as
 557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 558   inv = 1;
 559   ex = b;
 560   for (i = 0; i < bits - 1; i++)
 561     {
 562       inv = (inv * ex) & mask;
 563       ex = (ex * ex) & mask;
 564     }
 565
 566   val = (a * inv) & mask;
 567
 568   gcc_assert (((val * b) & mask) == a);
 569
 570   if ((val >> (bits - 1)) & 1)
 571     val |= ~mask;
 572
 573   *x = val;
 574
 575   return true;
 576 }
 577
 578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 579    emitted in LOOP.  */
 580
 581 static bool
 582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 583 {
 584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 585
 586   gcc_assert (bb);
 587
 588   if (sbb == loop->latch)
 589     return true;
 590
 591   if (sbb != bb)
 592     return false;
 593
 594   return stmt == last_stmt (bb);
 595 }
 596
 597 /* Returns true if STMT if after the place where the original induction
 598    variable CAND is incremented.  */
 599
 600 static bool
 601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 602 {
 603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 604   basic_block stmt_bb = bb_for_stmt (stmt);
 605   block_stmt_iterator bsi;
 606
 607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 608     return false;
 609
 610   if (stmt_bb != cand_bb)
 611     return true;
 612
 613   /* Scan the block from the end, since the original ivs are usually
 614      incremented at the end of the loop body.  */
 615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 616     {
 617       if (bsi_stmt (bsi) == cand->incremented_at)
 618         return false;
 619       if (bsi_stmt (bsi) == stmt)
 620         return true;
 621     }
 622 }
 623
 624 /* Returns true if STMT if after the place where the induction variable
 625    CAND is incremented in LOOP.  */
 626
 627 static bool
 628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 629 {
 630   switch (cand->pos)
 631     {
 632     case IP_END:
 633       return false;
 634
 635     case IP_NORMAL:
 636       return stmt_after_ip_normal_pos (loop, stmt);
 637
 638     case IP_ORIGINAL:
 639       return stmt_after_ip_original_pos (cand, stmt);
 640
 641     default:
 642       gcc_unreachable ();
 643     }
 644 }
 645
 646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 647
 648 static bool
 649 abnormal_ssa_name_p (tree exp)
 650 {
 651   if (!exp)
 652     return false;
 653
 654   if (TREE_CODE (exp) != SSA_NAME)
 655     return false;
 656
 657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 658 }
 659
 660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 661    abnormal phi node.  Callback for for_each_index.  */
 662
 663 static bool
 664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 665                                   void *data ATTRIBUTE_UNUSED)
 666 {
 667   if (TREE_CODE (base) == ARRAY_REF)
 668     {
 669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 670         return false;
 671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 672         return false;
 673     }
 674
 675   return !abnormal_ssa_name_p (*index);
 676 }
 677
 678 /* Returns true if EXPR contains a ssa name that occurs in an
 679    abnormal phi node.  */
 680
 681 bool
 682 contains_abnormal_ssa_name_p (tree expr)
 683 {
 684   enum tree_code code;
 685   enum tree_code_class class;
 686
 687   if (!expr)
 688     return false;
 689
 690   code = TREE_CODE (expr);
 691   class = TREE_CODE_CLASS (code);
 692
 693   if (code == SSA_NAME)
 694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 695
 696   if (code == INTEGER_CST
 697       || is_gimple_min_invariant (expr))
 698     return false;
 699
 700   if (code == ADDR_EXPR)
 701     return !for_each_index (&TREE_OPERAND (expr, 0),
 702                             idx_contains_abnormal_ssa_name_p,
 703                             NULL);
 704
 705   switch (class)
 706     {
 707     case tcc_binary:
 708     case tcc_comparison:
 709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 710         return true;
 711
 712       /* Fallthru.  */
 713     case tcc_unary:
 714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 715         return true;
 716
 717       break;
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722
 723   return false;
 724 }
 725
 726 /* Element of the table in that we cache the numbers of iterations obtained
 727    from exits of the loop.  */
 728
 729 struct nfe_cache_elt
 730 {
 731   /* The edge for that the number of iterations is cached.  */
 732   edge exit;
 733
 734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 735      determined.  */
 736   tree niter;
 737 };
 738
 739 /* Hash function for nfe_cache_elt E.  */
 740
 741 static hashval_t
 742 nfe_hash (const void *e)
 743 {
 744   const struct nfe_cache_elt *elt = e;
 745
 746   return htab_hash_pointer (elt->exit);
 747 }
 748
 749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 750
 751 static int
 752 nfe_eq (const void *e1, const void *e2)
 753 {
 754   const struct nfe_cache_elt *elt1 = e1;
 755
 756   return elt1->exit == e2;
 757 }
 758
 759 /*  Returns tree describing number of iterations determined from
 760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 761
 762 static tree
 763 niter_for_exit (struct ivopts_data *data, edge exit)
 764 {
 765   struct nfe_cache_elt *nfe_desc;
 766   struct tree_niter_desc desc;
 767   PTR *slot;
 768
 769   slot = htab_find_slot_with_hash (data->niters, exit,
 770                                    htab_hash_pointer (exit),
 771                                    INSERT);
 772
 773   if (!*slot)
 774     {
 775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 776       nfe_desc->exit = exit;
 777
 778       /* Try to determine number of iterations.  We must know it
 779          unconditionally (i.e., without possibility of # of iterations
 780          being zero).  Also, we cannot safely work with ssa names that
 781          appear in phi nodes on abnormal edges, so that we do not create
 782          overlapping life ranges for them (PR 27283).  */
 783       if (number_of_iterations_exit (data->current_loop,
 784                                      exit, &desc, true)
 785           && zero_p (desc.may_be_zero)
 786           && !contains_abnormal_ssa_name_p (desc.niter))
 787         nfe_desc->niter = desc.niter;
 788       else
 789         nfe_desc->niter = NULL_TREE;
 790     }
 791   else
 792     nfe_desc = *slot;
 793
 794   return nfe_desc->niter;
 795 }
 796
 797 /* Returns tree describing number of iterations determined from
 798    single dominating exit of DATA->current_loop, or NULL if something
 799    goes wrong.  */
 800
 801 static tree
 802 niter_for_single_dom_exit (struct ivopts_data *data)
 803 {
 804   edge exit = single_dom_exit (data->current_loop);
 805
 806   if (!exit)
 807     return NULL;
 808
 809   return niter_for_exit (data, exit);
 810 }
 811
 812 /* Initializes data structures used by the iv optimization pass, stored
 813    in DATA.  */
 814
 815 static void
 816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 817 {
 818   data->version_info_size = 2 * num_ssa_names;
 819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 820   data->relevant = BITMAP_ALLOC (NULL);
 821   data->important_candidates = BITMAP_ALLOC (NULL);
 822   data->max_inv_id = 0;
 823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 827 }
 828
 829 /* Returns a memory object to that EXPR points.  In case we are able to
 830    determine that it does not point to any such object, NULL is returned.  */
 831
 832 static tree
 833 determine_base_object (tree expr)
 834 {
 835   enum tree_code code = TREE_CODE (expr);
 836   tree base, obj, op0, op1;
 837
 838   /* If this is a pointer casted to any type, we need to determine
 839      the base object for the pointer; so handle conversions before
 840      throwing away non-pointer expressions.  */
 841   if (TREE_CODE (expr) == NOP_EXPR
 842       || TREE_CODE (expr) == CONVERT_EXPR)
 843     return determine_base_object (TREE_OPERAND (expr, 0));
 844
 845   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 846     return NULL_TREE;
 847
 848   switch (code)
 849     {
 850     case INTEGER_CST:
 851       return NULL_TREE;
 852
 853     case ADDR_EXPR:
 854       obj = TREE_OPERAND (expr, 0);
 855       base = get_base_address (obj);
 856
 857       if (!base)
 858         return expr;
 859
 860       if (TREE_CODE (base) == INDIRECT_REF)
 861         return determine_base_object (TREE_OPERAND (base, 0));
 862
 863       return fold_convert (ptr_type_node,
 864                            build_fold_addr_expr (base));
 865
 866     case PLUS_EXPR:
 867     case MINUS_EXPR:
 868       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 869       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 870
 871       if (!op1)
 872         return op0;
 873
 874       if (!op0)
 875         return (code == PLUS_EXPR
 876                 ? op1
 877                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 878
 879       return fold_build2 (code, ptr_type_node, op0, op1);
 880
 881     default:
 882       return fold_convert (ptr_type_node, expr);
 883     }
 884 }
 885
 886 /* Allocates an induction variable with given initial value BASE and step STEP
 887    for loop LOOP.  */
 888
 889 static struct iv *
 890 alloc_iv (tree base, tree step)
 891 {
 892   struct iv *iv = XCNEW (struct iv);
 893
 894   if (step && integer_zerop (step))
 895     step = NULL_TREE;
 896
 897   iv->base = base;
 898   iv->base_object = determine_base_object (base);
 899   iv->step = step;
 900   iv->biv_p = false;
 901   iv->have_use_for = false;
 902   iv->use_id = 0;
 903   iv->ssa_name = NULL_TREE;
 904
 905   return iv;
 906 }
 907
 908 /* Sets STEP and BASE for induction variable IV.  */
 909
 910 static void
 911 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 912 {
 913   struct version_info *info = name_info (data, iv);
 914
 915   gcc_assert (!info->iv);
 916
 917   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 918   info->iv = alloc_iv (base, step);
 919   info->iv->ssa_name = iv;
 920 }
 921
 922 /* Finds induction variable declaration for VAR.  */
 923
 924 static struct iv *
 925 get_iv (struct ivopts_data *data, tree var)
 926 {
 927   basic_block bb;
 928
 929   if (!name_info (data, var)->iv)
 930     {
 931       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 932
 933       if (!bb
 934           || !flow_bb_inside_loop_p (data->current_loop, bb))
 935         set_iv (data, var, var, NULL_TREE);
 936     }
 937
 938   return name_info (data, var)->iv;
 939 }
 940
 941 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 942    not define a simple affine biv with nonzero step.  */
 943
 944 static tree
 945 determine_biv_step (tree phi)
 946 {
 947   struct loop *loop = bb_for_stmt (phi)->loop_father;
 948   tree name = PHI_RESULT (phi);
 949   affine_iv iv;
 950
 951   if (!is_gimple_reg (name))
 952     return NULL_TREE;
 953
 954   if (!simple_iv (loop, phi, name, &iv, true))
 955     return NULL_TREE;
 956
 957   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 958 }
 959
 960 /* Finds basic ivs.  */
 961
 962 static bool
 963 find_bivs (struct ivopts_data *data)
 964 {
 965   tree phi, step, type, base;
 966   bool found = false;
 967   struct loop *loop = data->current_loop;
 968
 969   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 970     {
 971       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 972         continue;
 973
 974       step = determine_biv_step (phi);
 975       if (!step)
 976         continue;
 977
 978       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 979       base = expand_simple_operations (base);
 980       if (contains_abnormal_ssa_name_p (base)
 981           || contains_abnormal_ssa_name_p (step))
 982         continue;
 983
 984       type = TREE_TYPE (PHI_RESULT (phi));
 985       base = fold_convert (type, base);
 986       if (step)
 987         step = fold_convert (type, step);
 988
 989       set_iv (data, PHI_RESULT (phi), base, step);
 990       found = true;
 991     }
 992
 993   return found;
 994 }
 995
 996 /* Marks basic ivs.  */
 997
 998 static void
 999 mark_bivs (struct ivopts_data *data)
1000 {
1001   tree phi, var;
1002   struct iv *iv, *incr_iv;
1003   struct loop *loop = data->current_loop;
1004   basic_block incr_bb;
1005
1006   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007     {
1008       iv = get_iv (data, PHI_RESULT (phi));
1009       if (!iv)
1010         continue;
1011
1012       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1013       incr_iv = get_iv (data, var);
1014       if (!incr_iv)
1015         continue;
1016
1017       /* If the increment is in the subloop, ignore it.  */
1018       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1019       if (incr_bb->loop_father != data->current_loop
1020           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021         continue;
1022
1023       iv->biv_p = true;
1024       incr_iv->biv_p = true;
1025     }
1026 }
1027
1028 /* Checks whether STMT defines a linear induction variable and stores its
1029    parameters to IV.  */
1030
1031 static bool
1032 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1033 {
1034   tree lhs;
1035   struct loop *loop = data->current_loop;
1036
1037   iv->base = NULL_TREE;
1038   iv->step = NULL_TREE;
1039
1040   if (TREE_CODE (stmt) != MODIFY_EXPR)
1041     return false;
1042
1043   lhs = TREE_OPERAND (stmt, 0);
1044   if (TREE_CODE (lhs) != SSA_NAME)
1045     return false;
1046
1047   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1048     return false;
1049   iv->base = expand_simple_operations (iv->base);
1050
1051   if (contains_abnormal_ssa_name_p (iv->base)
1052       || contains_abnormal_ssa_name_p (iv->step))
1053     return false;
1054
1055   return true;
1056 }
1057
1058 /* Finds general ivs in statement STMT.  */
1059
1060 static void
1061 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1062 {
1063   affine_iv iv;
1064
1065   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1066     return;
1067
1068   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1069 }
1070
1071 /* Finds general ivs in basic block BB.  */
1072
1073 static void
1074 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1075 {
1076   block_stmt_iterator bsi;
1077
1078   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1079     find_givs_in_stmt (data, bsi_stmt (bsi));
1080 }
1081
1082 /* Finds general ivs.  */
1083
1084 static void
1085 find_givs (struct ivopts_data *data)
1086 {
1087   struct loop *loop = data->current_loop;
1088   basic_block *body = get_loop_body_in_dom_order (loop);
1089   unsigned i;
1090
1091   for (i = 0; i < loop->num_nodes; i++)
1092     find_givs_in_bb (data, body[i]);
1093   free (body);
1094 }
1095
1096 /* For each ssa name defined in LOOP determines whether it is an induction
1097    variable and if so, its initial value and step.  */
1098
1099 static bool
1100 find_induction_variables (struct ivopts_data *data)
1101 {
1102   unsigned i;
1103   bitmap_iterator bi;
1104
1105   if (!find_bivs (data))
1106     return false;
1107
1108   find_givs (data);
1109   mark_bivs (data);
1110
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     {
1113       tree niter = niter_for_single_dom_exit (data);
1114
1115       if (niter)
1116         {
1117           fprintf (dump_file, "  number of iterations ");
1118           print_generic_expr (dump_file, niter, TDF_SLIM);
1119           fprintf (dump_file, "\n\n");
1120         };
1121
1122       fprintf (dump_file, "Induction variables:\n\n");
1123
1124       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125         {
1126           if (ver_info (data, i)->iv)
1127             dump_iv (dump_file, ver_info (data, i)->iv);
1128         }
1129     }
1130
1131   return true;
1132 }
1133
1134 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1135
1136 static struct iv_use *
1137 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1138             tree stmt, enum use_type use_type)
1139 {
1140   struct iv_use *use = XCNEW (struct iv_use);
1141
1142   use->id = n_iv_uses (data);
1143   use->type = use_type;
1144   use->iv = iv;
1145   use->stmt = stmt;
1146   use->op_p = use_p;
1147   use->related_cands = BITMAP_ALLOC (NULL);
1148
1149   /* To avoid showing ssa name in the dumps, if it was not reset by the
1150      caller.  */
1151   iv->ssa_name = NULL_TREE;
1152
1153   if (dump_file && (dump_flags & TDF_DETAILS))
1154     dump_use (dump_file, use);
1155
1156   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157
1158   return use;
1159 }
1160
1161 /* Checks whether OP is a loop-level invariant and if so, records it.
1162    NONLINEAR_USE is true if the invariant is used in a way we do not
1163    handle specially.  */
1164
1165 static void
1166 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 {
1168   basic_block bb;
1169   struct version_info *info;
1170
1171   if (TREE_CODE (op) != SSA_NAME
1172       || !is_gimple_reg (op))
1173     return;
1174
1175   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1176   if (bb
1177       && flow_bb_inside_loop_p (data->current_loop, bb))
1178     return;
1179
1180   info = name_info (data, op);
1181   info->name = op;
1182   info->has_nonlin_use |= nonlinear_use;
1183   if (!info->inv_id)
1184     info->inv_id = ++data->max_inv_id;
1185   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1186 }
1187
1188 /* Checks whether the use OP is interesting and if so, records it.  */
1189
1190 static struct iv_use *
1191 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 {
1193   struct iv *iv;
1194   struct iv *civ;
1195   tree stmt;
1196   struct iv_use *use;
1197
1198   if (TREE_CODE (op) != SSA_NAME)
1199     return NULL;
1200
1201   iv = get_iv (data, op);
1202   if (!iv)
1203     return NULL;
1204
1205   if (iv->have_use_for)
1206     {
1207       use = iv_use (data, iv->use_id);
1208
1209       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1210       return use;
1211     }
1212
1213   if (zero_p (iv->step))
1214     {
1215       record_invariant (data, op, true);
1216       return NULL;
1217     }
1218   iv->have_use_for = true;
1219
1220   civ = XNEW (struct iv);
1221   *civ = *iv;
1222
1223   stmt = SSA_NAME_DEF_STMT (op);
1224   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1225               || TREE_CODE (stmt) == MODIFY_EXPR);
1226
1227   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1228   iv->use_id = use->id;
1229
1230   return use;
1231 }
1232
1233 /* Checks whether the condition *COND_P in STMT is interesting
1234    and if so, records it.  */
1235
1236 static void
1237 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 {
1239   tree *op0_p;
1240   tree *op1_p;
1241   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1242   struct iv const_iv;
1243   tree zero = integer_zero_node;
1244
1245   const_iv.step = NULL_TREE;
1246
1247   if (TREE_CODE (*cond_p) != SSA_NAME
1248       && !COMPARISON_CLASS_P (*cond_p))
1249     return;
1250
1251   if (TREE_CODE (*cond_p) == SSA_NAME)
1252     {
1253       op0_p = cond_p;
1254       op1_p = &zero;
1255     }
1256   else
1257     {
1258       op0_p = &TREE_OPERAND (*cond_p, 0);
1259       op1_p = &TREE_OPERAND (*cond_p, 1);
1260     }
1261
1262   if (TREE_CODE (*op0_p) == SSA_NAME)
1263     iv0 = get_iv (data, *op0_p);
1264   else
1265     iv0 = &const_iv;
1266
1267   if (TREE_CODE (*op1_p) == SSA_NAME)
1268     iv1 = get_iv (data, *op1_p);
1269   else
1270     iv1 = &const_iv;
1271
1272   if (/* When comparing with non-invariant value, we may not do any senseful
1273          induction variable elimination.  */
1274       (!iv0 || !iv1)
1275       /* Eliminating condition based on two ivs would be nontrivial.
1276          ??? TODO -- it is not really important to handle this case.  */
1277       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278     {
1279       find_interesting_uses_op (data, *op0_p);
1280       find_interesting_uses_op (data, *op1_p);
1281       return;
1282     }
1283
1284   if (zero_p (iv0->step) && zero_p (iv1->step))
1285     {
1286       /* If both are invariants, this is a work for unswitching.  */
1287       return;
1288     }
1289
1290   civ = XNEW (struct iv);
1291   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1292   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1293 }
1294
1295 /* Returns true if expression EXPR is obviously invariant in LOOP,
1296    i.e. if all its operands are defined outside of the LOOP.  */
1297
1298 bool
1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 {
1301   basic_block def_bb;
1302   unsigned i, len;
1303
1304   if (is_gimple_min_invariant (expr))
1305     return true;
1306
1307   if (TREE_CODE (expr) == SSA_NAME)
1308     {
1309       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1310       if (def_bb
1311           && flow_bb_inside_loop_p (loop, def_bb))
1312         return false;
1313
1314       return true;
1315     }
1316
1317   if (!EXPR_P (expr))
1318     return false;
1319
1320   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1321   for (i = 0; i < len; i++)
1322     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1323       return false;
1324
1325   return true;
1326 }
1327
1328 /* Cumulates the steps of indices into DATA and replaces their values with the
1329    initial ones.  Returns false when the value of the index cannot be determined.
1330    Callback for for_each_index.  */
1331
1332 struct ifs_ivopts_data
1333 {
1334   struct ivopts_data *ivopts_data;
1335   tree stmt;
1336   tree *step_p;
1337 };
1338
1339 static bool
1340 idx_find_step (tree base, tree *idx, void *data)
1341 {
1342   struct ifs_ivopts_data *dta = data;
1343   struct iv *iv;
1344   tree step, iv_base, iv_step, lbound, off;
1345   struct loop *loop = dta->ivopts_data->current_loop;
1346
1347   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1348       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1349     return false;
1350
1351   /* If base is a component ref, require that the offset of the reference
1352      be invariant.  */
1353   if (TREE_CODE (base) == COMPONENT_REF)
1354     {
1355       off = component_ref_field_offset (base);
1356       return expr_invariant_in_loop_p (loop, off);
1357     }
1358
1359   /* If base is array, first check whether we will be able to move the
1360      reference out of the loop (in order to take its address in strength
1361      reduction).  In order for this to work we need both lower bound
1362      and step to be loop invariants.  */
1363   if (TREE_CODE (base) == ARRAY_REF)
1364     {
1365       step = array_ref_element_size (base);
1366       lbound = array_ref_low_bound (base);
1367
1368       if (!expr_invariant_in_loop_p (loop, step)
1369           || !expr_invariant_in_loop_p (loop, lbound))
1370         return false;
1371     }
1372
1373   if (TREE_CODE (*idx) != SSA_NAME)
1374     return true;
1375
1376   iv = get_iv (dta->ivopts_data, *idx);
1377   if (!iv)
1378     return false;
1379
1380   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1381           *&x[0], which is not folded and does not trigger the
1382           ARRAY_REF path below.  */
1383   *idx = iv->base;
1384
1385   if (!iv->step)
1386     return true;
1387
1388   if (TREE_CODE (base) == ARRAY_REF)
1389     {
1390       step = array_ref_element_size (base);
1391
1392       /* We only handle addresses whose step is an integer constant.  */
1393       if (TREE_CODE (step) != INTEGER_CST)
1394         return false;
1395     }
1396   else
1397     /* The step for pointer arithmetics already is 1 byte.  */
1398     step = build_int_cst (sizetype, 1);
1399
1400   iv_base = iv->base;
1401   iv_step = iv->step;
1402   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1403                             sizetype, &iv_base, &iv_step, dta->stmt,
1404                             false))
1405     {
1406       /* The index might wrap.  */
1407       return false;
1408     }
1409
1410   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1411
1412   if (!*dta->step_p)
1413     *dta->step_p = step;
1414   else
1415     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1416
1417   return true;
1418 }
1419
1420 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1421    object is passed to it in DATA.  */
1422
1423 static bool
1424 idx_record_use (tree base, tree *idx,
1425                 void *data)
1426 {
1427   find_interesting_uses_op (data, *idx);
1428   if (TREE_CODE (base) == ARRAY_REF)
1429     {
1430       find_interesting_uses_op (data, array_ref_element_size (base));
1431       find_interesting_uses_op (data, array_ref_low_bound (base));
1432     }
1433   return true;
1434 }
1435
1436 /* Returns true if memory reference REF may be unaligned.  */
1437
1438 static bool
1439 may_be_unaligned_p (tree ref)
1440 {
1441   tree base;
1442   tree base_type;
1443   HOST_WIDE_INT bitsize;
1444   HOST_WIDE_INT bitpos;
1445   tree toffset;
1446   enum machine_mode mode;
1447   int unsignedp, volatilep;
1448   unsigned base_align;
1449
1450   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451      thus they are not misaligned.  */
1452   if (TREE_CODE (ref) == TARGET_MEM_REF)
1453     return false;
1454
1455   /* The test below is basically copy of what expr.c:normal_inner_ref
1456      does to check whether the object must be loaded by parts when
1457      STRICT_ALIGNMENT is true.  */
1458   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1459                               &unsignedp, &volatilep, true);
1460   base_type = TREE_TYPE (base);
1461   base_align = TYPE_ALIGN (base_type);
1462
1463   if (mode != BLKmode
1464       && (base_align < GET_MODE_ALIGNMENT (mode)
1465           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1466           || bitpos % BITS_PER_UNIT != 0))
1467     return true;
1468
1469   return false;
1470 }
1471
1472 /* Return true if EXPR may be non-addressable.   */
1473
1474 static bool
1475 may_be_nonaddressable_p (tree expr)
1476 {
1477   switch (TREE_CODE (expr))
1478     {
1479     case COMPONENT_REF:
1480       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1481              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1482
1483     case ARRAY_REF:
1484     case ARRAY_RANGE_REF:
1485       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1486
1487     case VIEW_CONVERT_EXPR:
1488       /* This kind of view-conversions may wrap non-addressable objects
1489          and make them look addressable.  After some processing the
1490          non-addressability may be uncovered again, causing ADDR_EXPRs
1491          of inappropriate objects to be built.  */
1492       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1493              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1494
1495     default:
1496       break;
1497     }
1498
1499   return false;
1500 }
1501
1502 /* Finds addresses in *OP_P inside STMT.  */
1503
1504 static void
1505 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 {
1507   tree base = *op_p, step = NULL;
1508   struct iv *civ;
1509   struct ifs_ivopts_data ifs_ivopts_data;
1510
1511   /* Do not play with volatile memory references.  A bit too conservative,
1512      perhaps, but safe.  */
1513   if (stmt_ann (stmt)->has_volatile_ops)
1514     goto fail;
1515
1516   /* Ignore bitfields for now.  Not really something terribly complicated
1517      to handle.  TODO.  */
1518   if (TREE_CODE (base) == BIT_FIELD_REF)
1519     goto fail;
1520
1521   if (may_be_nonaddressable_p (base))
1522     goto fail;
1523
1524   if (STRICT_ALIGNMENT
1525       && may_be_unaligned_p (base))
1526     goto fail;
1527
1528   base = unshare_expr (base);
1529
1530   if (TREE_CODE (base) == TARGET_MEM_REF)
1531     {
1532       tree type = build_pointer_type (TREE_TYPE (base));
1533       tree astep;
1534
1535       if (TMR_BASE (base)
1536           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1537         {
1538           civ = get_iv (data, TMR_BASE (base));
1539           if (!civ)
1540             goto fail;
1541
1542           TMR_BASE (base) = civ->base;
1543           step = civ->step;
1544         }
1545       if (TMR_INDEX (base)
1546           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1547         {
1548           civ = get_iv (data, TMR_INDEX (base));
1549           if (!civ)
1550             goto fail;
1551
1552           TMR_INDEX (base) = civ->base;
1553           astep = civ->step;
1554
1555           if (astep)
1556             {
1557               if (TMR_STEP (base))
1558                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1559
1560               if (step)
1561                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1562               else
1563                 step = astep;
1564             }
1565         }
1566
1567       if (zero_p (step))
1568         goto fail;
1569       base = tree_mem_ref_addr (type, base);
1570     }
1571   else
1572     {
1573       ifs_ivopts_data.ivopts_data = data;
1574       ifs_ivopts_data.stmt = stmt;
1575       ifs_ivopts_data.step_p = &step;
1576       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1577           || zero_p (step))
1578         goto fail;
1579
1580       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1581       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1582
1583       base = build_fold_addr_expr (base);
1584
1585       /* Substituting bases of IVs into the base expression might
1586          have caused folding opportunities.  */
1587       if (TREE_CODE (base) == ADDR_EXPR)
1588         {
1589           tree *ref = &TREE_OPERAND (base, 0);
1590           while (handled_component_p (*ref))
1591             ref = &TREE_OPERAND (*ref, 0);
1592           if (TREE_CODE (*ref) == INDIRECT_REF)
1593             *ref = fold_indirect_ref (*ref);
1594         }
1595     }
1596
1597   civ = alloc_iv (base, step);
1598   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599   return;
1600
1601 fail:
1602   for_each_index (op_p, idx_record_use, data);
1603 }
1604
1605 /* Finds and records invariants used in STMT.  */
1606
1607 static void
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 {
1610   ssa_op_iter iter;
1611   use_operand_p use_p;
1612   tree op;
1613
1614   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615     {
1616       op = USE_FROM_PTR (use_p);
1617       record_invariant (data, op, false);
1618     }
1619 }
1620
1621 /* Finds interesting uses of induction variables in the statement STMT.  */
1622
1623 static void
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 {
1626   struct iv *iv;
1627   tree op, lhs, rhs;
1628   ssa_op_iter iter;
1629   use_operand_p use_p;
1630
1631   find_invariants_stmt (data, stmt);
1632
1633   if (TREE_CODE (stmt) == COND_EXPR)
1634     {
1635       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636       return;
1637     }
1638
1639   if (TREE_CODE (stmt) == MODIFY_EXPR)
1640     {
1641       lhs = TREE_OPERAND (stmt, 0);
1642       rhs = TREE_OPERAND (stmt, 1);
1643
1644       if (TREE_CODE (lhs) == SSA_NAME)
1645         {
1646           /* If the statement defines an induction variable, the uses are not
1647              interesting by themselves.  */
1648
1649           iv = get_iv (data, lhs);
1650
1651           if (iv && !zero_p (iv->step))
1652             return;
1653         }
1654
1655       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656         {
1657         case tcc_comparison:
1658           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659           return;
1660
1661         case tcc_reference:
1662           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663           if (REFERENCE_CLASS_P (lhs))
1664             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665           return;
1666
1667         default: ;
1668         }
1669
1670       if (REFERENCE_CLASS_P (lhs)
1671           && is_gimple_val (rhs))
1672         {
1673           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674           find_interesting_uses_op (data, rhs);
1675           return;
1676         }
1677
1678       /* TODO -- we should also handle address uses of type
1679
1680          memory = call (whatever);
1681
1682          and
1683
1684          call (memory).  */
1685     }
1686
1687   if (TREE_CODE (stmt) == PHI_NODE
1688       && bb_for_stmt (stmt) == data->current_loop->header)
1689     {
1690       lhs = PHI_RESULT (stmt);
1691       iv = get_iv (data, lhs);
1692
1693       if (iv && !zero_p (iv->step))
1694         return;
1695     }
1696
1697   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698     {
1699       op = USE_FROM_PTR (use_p);
1700
1701       if (TREE_CODE (op) != SSA_NAME)
1702         continue;
1703
1704       iv = get_iv (data, op);
1705       if (!iv)
1706         continue;
1707
1708       find_interesting_uses_op (data, op);
1709     }
1710 }
1711
1712 /* Finds interesting uses of induction variables outside of loops
1713    on loop exit edge EXIT.  */
1714
1715 static void
1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 {
1718   tree phi, def;
1719
1720   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721     {
1722       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723       find_interesting_uses_op (data, def);
1724     }
1725 }
1726
1727 /* Finds uses of the induction variables that are interesting.  */
1728
1729 static void
1730 find_interesting_uses (struct ivopts_data *data)
1731 {
1732   basic_block bb;
1733   block_stmt_iterator bsi;
1734   tree phi;
1735   basic_block *body = get_loop_body (data->current_loop);
1736   unsigned i;
1737   struct version_info *info;
1738   edge e;
1739
1740   if (dump_file && (dump_flags & TDF_DETAILS))
1741     fprintf (dump_file, "Uses:\n\n");
1742
1743   for (i = 0; i < data->current_loop->num_nodes; i++)
1744     {
1745       edge_iterator ei;
1746       bb = body[i];
1747
1748       FOR_EACH_EDGE (e, ei, bb->succs)
1749         if (e->dest != EXIT_BLOCK_PTR
1750             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751           find_interesting_uses_outside (data, e);
1752
1753       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754         find_interesting_uses_stmt (data, phi);
1755       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757     }
1758
1759   if (dump_file && (dump_flags & TDF_DETAILS))
1760     {
1761       bitmap_iterator bi;
1762
1763       fprintf (dump_file, "\n");
1764
1765       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766         {
1767           info = ver_info (data, i);
1768           if (info->inv_id)
1769             {
1770               fprintf (dump_file, "  ");
1771               print_generic_expr (dump_file, info->name, TDF_SLIM);
1772               fprintf (dump_file, " is invariant (%d)%s\n",
1773                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774             }
1775         }
1776
1777       fprintf (dump_file, "\n");
1778     }
1779
1780   free (body);
1781 }
1782
1783 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1784    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1785    we are at the top-level of the processed address.  */
1786
1787 static tree
1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789                 unsigned HOST_WIDE_INT *offset)
1790 {
1791   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792   enum tree_code code;
1793   tree type, orig_type = TREE_TYPE (expr);
1794   unsigned HOST_WIDE_INT off0, off1, st;
1795   tree orig_expr = expr;
1796
1797   STRIP_NOPS (expr);
1798
1799   type = TREE_TYPE (expr);
1800   code = TREE_CODE (expr);
1801   *offset = 0;
1802
1803   switch (code)
1804     {
1805     case INTEGER_CST:
1806       if (!cst_and_fits_in_hwi (expr)
1807           || zero_p (expr))
1808         return orig_expr;
1809
1810       *offset = int_cst_value (expr);
1811       return build_int_cst (orig_type, 0);
1812
1813     case PLUS_EXPR:
1814     case MINUS_EXPR:
1815       op0 = TREE_OPERAND (expr, 0);
1816       op1 = TREE_OPERAND (expr, 1);
1817
1818       op0 = strip_offset_1 (op0, false, false, &off0);
1819       op1 = strip_offset_1 (op1, false, false, &off1);
1820
1821       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822       if (op0 == TREE_OPERAND (expr, 0)
1823           && op1 == TREE_OPERAND (expr, 1))
1824         return orig_expr;
1825
1826       if (zero_p (op1))
1827         expr = op0;
1828       else if (zero_p (op0))
1829         {
1830           if (code == PLUS_EXPR)
1831             expr = op1;
1832           else
1833             expr = fold_build1 (NEGATE_EXPR, type, op1);
1834         }
1835       else
1836         expr = fold_build2 (code, type, op0, op1);
1837
1838       return fold_convert (orig_type, expr);
1839
1840     case ARRAY_REF:
1841       if (!inside_addr)
1842         return orig_expr;
1843
1844       step = array_ref_element_size (expr);
1845       if (!cst_and_fits_in_hwi (step))
1846         break;
1847
1848       st = int_cst_value (step);
1849       op1 = TREE_OPERAND (expr, 1);
1850       op1 = strip_offset_1 (op1, false, false, &off1);
1851       *offset = off1 * st;
1852
1853       if (top_compref
1854           && zero_p (op1))
1855         {
1856           /* Strip the component reference completely.  */
1857           op0 = TREE_OPERAND (expr, 0);
1858           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859           *offset += off0;
1860           return op0;
1861         }
1862       break;
1863
1864     case COMPONENT_REF:
1865       if (!inside_addr)
1866         return orig_expr;
1867
1868       tmp = component_ref_field_offset (expr);
1869       if (top_compref
1870           && cst_and_fits_in_hwi (tmp))
1871         {
1872           /* Strip the component reference completely.  */
1873           op0 = TREE_OPERAND (expr, 0);
1874           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875           *offset = off0 + int_cst_value (tmp);
1876           return op0;
1877         }
1878       break;
1879
1880     case ADDR_EXPR:
1881       op0 = TREE_OPERAND (expr, 0);
1882       op0 = strip_offset_1 (op0, true, true, &off0);
1883       *offset += off0;
1884
1885       if (op0 == TREE_OPERAND (expr, 0))
1886         return orig_expr;
1887
1888       expr = build_fold_addr_expr (op0);
1889       return fold_convert (orig_type, expr);
1890
1891     case INDIRECT_REF:
1892       inside_addr = false;
1893       break;
1894
1895     default:
1896       return orig_expr;
1897     }
1898
1899   /* Default handling of expressions for that we want to recurse into
1900      the first operand.  */
1901   op0 = TREE_OPERAND (expr, 0);
1902   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903   *offset += off0;
1904
1905   if (op0 == TREE_OPERAND (expr, 0)
1906       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907     return orig_expr;
1908
1909   expr = copy_node (expr);
1910   TREE_OPERAND (expr, 0) = op0;
1911   if (op1)
1912     TREE_OPERAND (expr, 1) = op1;
1913
1914   /* Inside address, we might strip the top level component references,
1915      thus changing type of the expression.  Handling of ADDR_EXPR
1916      will fix that.  */
1917   expr = fold_convert (orig_type, expr);
1918
1919   return expr;
1920 }
1921
1922 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1923
1924 static tree
1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 {
1927   return strip_offset_1 (expr, false, false, offset);
1928 }
1929
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931    We return unsigned type with the same precision, which avoids problems
1932    with overflows.  */
1933
1934 static tree
1935 generic_type_for (tree type)
1936 {
1937   if (POINTER_TYPE_P (type))
1938     return unsigned_type_for (type);
1939
1940   if (TYPE_UNSIGNED (type))
1941     return type;
1942
1943   return unsigned_type_for (type);
1944 }
1945
1946 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1947    the bitmap to that we should store it.  */
1948
1949 static struct ivopts_data *fd_ivopts_data;
1950 static tree
1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 {
1953   bitmap *depends_on = data;
1954   struct version_info *info;
1955
1956   if (TREE_CODE (*expr_p) != SSA_NAME)
1957     return NULL_TREE;
1958   info = name_info (fd_ivopts_data, *expr_p);
1959
1960   if (!info->inv_id || info->has_nonlin_use)
1961     return NULL_TREE;
1962
1963   if (!*depends_on)
1964     *depends_on = BITMAP_ALLOC (NULL);
1965   bitmap_set_bit (*depends_on, info->inv_id);
1966
1967   return NULL_TREE;
1968 }
1969
1970 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1971    position to POS.  If USE is not NULL, the candidate is set as related to
1972    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1973    replacement of the final value of the iv by a direct computation.  */
1974
1975 static struct iv_cand *
1976 add_candidate_1 (struct ivopts_data *data,
1977                  tree base, tree step, bool important, enum iv_position pos,
1978                  struct iv_use *use, tree incremented_at)
1979 {
1980   unsigned i;
1981   struct iv_cand *cand = NULL;
1982   tree type, orig_type;
1983
1984   if (base)
1985     {
1986       orig_type = TREE_TYPE (base);
1987       type = generic_type_for (orig_type);
1988       if (type != orig_type)
1989         {
1990           base = fold_convert (type, base);
1991           if (step)
1992             step = fold_convert (type, step);
1993         }
1994     }
1995
1996   for (i = 0; i < n_iv_cands (data); i++)
1997     {
1998       cand = iv_cand (data, i);
1999
2000       if (cand->pos != pos)
2001         continue;
2002
2003       if (cand->incremented_at != incremented_at)
2004         continue;
2005
2006       if (!cand->iv)
2007         {
2008           if (!base && !step)
2009             break;
2010
2011           continue;
2012         }
2013
2014       if (!base && !step)
2015         continue;
2016
2017       if (!operand_equal_p (base, cand->iv->base, 0))
2018         continue;
2019
2020       if (zero_p (cand->iv->step))
2021         {
2022           if (zero_p (step))
2023             break;
2024         }
2025       else
2026         {
2027           if (step && operand_equal_p (step, cand->iv->step, 0))
2028             break;
2029         }
2030     }
2031
2032   if (i == n_iv_cands (data))
2033     {
2034       cand = XCNEW (struct iv_cand);
2035       cand->id = i;
2036
2037       if (!base && !step)
2038         cand->iv = NULL;
2039       else
2040         cand->iv = alloc_iv (base, step);
2041
2042       cand->pos = pos;
2043       if (pos != IP_ORIGINAL && cand->iv)
2044         {
2045           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046           cand->var_after = cand->var_before;
2047         }
2048       cand->important = important;
2049       cand->incremented_at = incremented_at;
2050       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051
2052       if (step
2053           && TREE_CODE (step) != INTEGER_CST)
2054         {
2055           fd_ivopts_data = data;
2056           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057         }
2058
2059       if (dump_file && (dump_flags & TDF_DETAILS))
2060         dump_cand (dump_file, cand);
2061     }
2062
2063   if (important && !cand->important)
2064     {
2065       cand->important = true;
2066       if (dump_file && (dump_flags & TDF_DETAILS))
2067         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068     }
2069
2070   if (use)
2071     {
2072       bitmap_set_bit (use->related_cands, i);
2073       if (dump_file && (dump_flags & TDF_DETAILS))
2074         fprintf (dump_file, "Candidate %d is related to use %d\n",
2075                  cand->id, use->id);
2076     }
2077
2078   return cand;
2079 }
2080
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2082    is allowed.
2083
2084    The purpose is to avoid splitting latch edge with a biv increment, thus
2085    creating a jump, possibly confusing other optimization passes and leaving
2086    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2087    is not available (so we do not have a better alternative), or if the latch
2088    edge is already nonempty.  */
2089
2090 static bool
2091 allow_ip_end_pos_p (struct loop *loop)
2092 {
2093   if (!ip_normal_pos (loop))
2094     return true;
2095
2096   if (!empty_block_p (ip_end_pos (loop)))
2097     return true;
2098
2099   return false;
2100 }
2101
2102 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2103    position to POS.  If USE is not NULL, the candidate is set as related to
2104    it.  The candidate computation is scheduled on all available positions.  */
2105
2106 static void
2107 add_candidate (struct ivopts_data *data,
2108                tree base, tree step, bool important, struct iv_use *use)
2109 {
2110   if (ip_normal_pos (data->current_loop))
2111     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112   if (ip_end_pos (data->current_loop)
2113       && allow_ip_end_pos_p (data->current_loop))
2114     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115 }
2116
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118    type with SIZE bits.  */
2119
2120 static void
2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122                                      unsigned int size)
2123 {
2124   tree type = lang_hooks.types.type_for_size (size, true);
2125   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126                  true, NULL);
2127 }
2128
2129 /* Adds standard iv candidates.  */
2130
2131 static void
2132 add_standard_iv_candidates (struct ivopts_data *data)
2133 {
2134   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135
2136   /* The same for a double-integer type if it is still fast enough.  */
2137   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139 }
2140
2141
2142 /* Adds candidates bases on the old induction variable IV.  */
2143
2144 static void
2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 {
2147   tree phi, def;
2148   struct iv_cand *cand;
2149
2150   add_candidate (data, iv->base, iv->step, true, NULL);
2151
2152   /* The same, but with initial value zero.  */
2153   add_candidate (data,
2154                  build_int_cst (TREE_TYPE (iv->base), 0),
2155                  iv->step, true, NULL);
2156
2157   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158   if (TREE_CODE (phi) == PHI_NODE)
2159     {
2160       /* Additionally record the possibility of leaving the original iv
2161          untouched.  */
2162       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163       cand = add_candidate_1 (data,
2164                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165                               SSA_NAME_DEF_STMT (def));
2166       cand->var_before = iv->ssa_name;
2167       cand->var_after = def;
2168     }
2169 }
2170
2171 /* Adds candidates based on the old induction variables.  */
2172
2173 static void
2174 add_old_ivs_candidates (struct ivopts_data *data)
2175 {
2176   unsigned i;
2177   struct iv *iv;
2178   bitmap_iterator bi;
2179
2180   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181     {
2182       iv = ver_info (data, i)->iv;
2183       if (iv && iv->biv_p && !zero_p (iv->step))
2184         add_old_iv_candidates (data, iv);
2185     }
2186 }
2187
2188 /* Adds candidates based on the value of the induction variable IV and USE.  */
2189
2190 static void
2191 add_iv_value_candidates (struct ivopts_data *data,
2192                          struct iv *iv, struct iv_use *use)
2193 {
2194   unsigned HOST_WIDE_INT offset;
2195   tree base;
2196
2197   add_candidate (data, iv->base, iv->step, false, use);
2198
2199   /* The same, but with initial value zero.  Make such variable important,
2200      since it is generic enough so that possibly many uses may be based
2201      on it.  */
2202   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203                  iv->step, true, use);
2204
2205   /* Third, try removing the constant offset.  */
2206   base = strip_offset (iv->base, &offset);
2207   if (offset)
2208     add_candidate (data, base, iv->step, false, use);
2209 }
2210
2211 /* Adds candidates based on the uses.  */
2212
2213 static void
2214 add_derived_ivs_candidates (struct ivopts_data *data)
2215 {
2216   unsigned i;
2217
2218   for (i = 0; i < n_iv_uses (data); i++)
2219     {
2220       struct iv_use *use = iv_use (data, i);
2221
2222       if (!use)
2223         continue;
2224
2225       switch (use->type)
2226         {
2227         case USE_NONLINEAR_EXPR:
2228         case USE_COMPARE:
2229         case USE_ADDRESS:
2230           /* Just add the ivs based on the value of the iv used here.  */
2231           add_iv_value_candidates (data, use->iv, use);
2232           break;
2233
2234         default:
2235           gcc_unreachable ();
2236         }
2237     }
2238 }
2239
2240 /* Record important candidates and add them to related_cands bitmaps
2241    if needed.  */
2242
2243 static void
2244 record_important_candidates (struct ivopts_data *data)
2245 {
2246   unsigned i;
2247   struct iv_use *use;
2248
2249   for (i = 0; i < n_iv_cands (data); i++)
2250     {
2251       struct iv_cand *cand = iv_cand (data, i);
2252
2253       if (cand->important)
2254         bitmap_set_bit (data->important_candidates, i);
2255     }
2256
2257   data->consider_all_candidates = (n_iv_cands (data)
2258                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2259
2260   if (data->consider_all_candidates)
2261     {
2262       /* We will not need "related_cands" bitmaps in this case,
2263          so release them to decrease peak memory consumption.  */
2264       for (i = 0; i < n_iv_uses (data); i++)
2265         {
2266           use = iv_use (data, i);
2267           BITMAP_FREE (use->related_cands);
2268         }
2269     }
2270   else
2271     {
2272       /* Add important candidates to the related_cands bitmaps.  */
2273       for (i = 0; i < n_iv_uses (data); i++)
2274         bitmap_ior_into (iv_use (data, i)->related_cands,
2275                          data->important_candidates);
2276     }
2277 }
2278
2279 /* Finds the candidates for the induction variables.  */
2280
2281 static void
2282 find_iv_candidates (struct ivopts_data *data)
2283 {
2284   /* Add commonly used ivs.  */
2285   add_standard_iv_candidates (data);
2286
2287   /* Add old induction variables.  */
2288   add_old_ivs_candidates (data);
2289
2290   /* Add induction variables derived from uses.  */
2291   add_derived_ivs_candidates (data);
2292
2293   /* Record the important candidates.  */
2294   record_important_candidates (data);
2295 }
2296
2297 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2298    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2299    we allocate a simple list to every use.  */
2300
2301 static void
2302 alloc_use_cost_map (struct ivopts_data *data)
2303 {
2304   unsigned i, size, s, j;
2305
2306   for (i = 0; i < n_iv_uses (data); i++)
2307     {
2308       struct iv_use *use = iv_use (data, i);
2309       bitmap_iterator bi;
2310
2311       if (data->consider_all_candidates)
2312         size = n_iv_cands (data);
2313       else
2314         {
2315           s = 0;
2316           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2317             {
2318               s++;
2319             }
2320
2321           /* Round up to the power of two, so that moduling by it is fast.  */
2322           for (size = 1; size < s; size <<= 1)
2323             continue;
2324         }
2325
2326       use->n_map_members = size;
2327       use->cost_map = XCNEWVEC (struct cost_pair, size);
2328     }
2329 }
2330
2331 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2332    on invariants DEPENDS_ON and that the value used in expressing it
2333    is VALUE.*/
2334
2335 static void
2336 set_use_iv_cost (struct ivopts_data *data,
2337                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2338                  bitmap depends_on, tree value)
2339 {
2340   unsigned i, s;
2341
2342   if (cost == INFTY)
2343     {
2344       BITMAP_FREE (depends_on);
2345       return;
2346     }
2347
2348   if (data->consider_all_candidates)
2349     {
2350       use->cost_map[cand->id].cand = cand;
2351       use->cost_map[cand->id].cost = cost;
2352       use->cost_map[cand->id].depends_on = depends_on;
2353       use->cost_map[cand->id].value = value;
2354       return;
2355     }
2356
2357   /* n_map_members is a power of two, so this computes modulo.  */
2358   s = cand->id & (use->n_map_members - 1);
2359   for (i = s; i < use->n_map_members; i++)
2360     if (!use->cost_map[i].cand)
2361       goto found;
2362   for (i = 0; i < s; i++)
2363     if (!use->cost_map[i].cand)
2364       goto found;
2365
2366   gcc_unreachable ();
2367
2368 found:
2369   use->cost_map[i].cand = cand;
2370   use->cost_map[i].cost = cost;
2371   use->cost_map[i].depends_on = depends_on;
2372   use->cost_map[i].value = value;
2373 }
2374
2375 /* Gets cost of (USE, CANDIDATE) pair.  */
2376
2377 static struct cost_pair *
2378 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2379                  struct iv_cand *cand)
2380 {
2381   unsigned i, s;
2382   struct cost_pair *ret;
2383
2384   if (!cand)
2385     return NULL;
2386
2387   if (data->consider_all_candidates)
2388     {
2389       ret = use->cost_map + cand->id;
2390       if (!ret->cand)
2391         return NULL;
2392
2393       return ret;
2394     }
2395
2396   /* n_map_members is a power of two, so this computes modulo.  */
2397   s = cand->id & (use->n_map_members - 1);
2398   for (i = s; i < use->n_map_members; i++)
2399     if (use->cost_map[i].cand == cand)
2400       return use->cost_map + i;
2401
2402   for (i = 0; i < s; i++)
2403     if (use->cost_map[i].cand == cand)
2404       return use->cost_map + i;
2405
2406   return NULL;
2407 }
2408
2409 /* Returns estimate on cost of computing SEQ.  */
2410
2411 static unsigned
2412 seq_cost (rtx seq)
2413 {
2414   unsigned cost = 0;
2415   rtx set;
2416
2417   for (; seq; seq = NEXT_INSN (seq))
2418     {
2419       set = single_set (seq);
2420       if (set)
2421         cost += rtx_cost (set, SET);
2422       else
2423         cost++;
2424     }
2425
2426   return cost;
2427 }
2428
2429 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2430 static rtx
2431 produce_memory_decl_rtl (tree obj, int *regno)
2432 {
2433   rtx x;
2434
2435   gcc_assert (obj);
2436   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2437     {
2438       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2439       x = gen_rtx_SYMBOL_REF (Pmode, name);
2440     }
2441   else
2442     x = gen_raw_REG (Pmode, (*regno)++);
2443
2444   return gen_rtx_MEM (DECL_MODE (obj), x);
2445 }
2446
2447 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2448    walk_tree.  DATA contains the actual fake register number.  */
2449
2450 static tree
2451 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2452 {
2453   tree obj = NULL_TREE;
2454   rtx x = NULL_RTX;
2455   int *regno = data;
2456
2457   switch (TREE_CODE (*expr_p))
2458     {
2459     case ADDR_EXPR:
2460       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2461            handled_component_p (*expr_p);
2462            expr_p = &TREE_OPERAND (*expr_p, 0))
2463         continue;
2464       obj = *expr_p;
2465       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2466         x = produce_memory_decl_rtl (obj, regno);
2467       break;
2468
2469     case SSA_NAME:
2470       *ws = 0;
2471       obj = SSA_NAME_VAR (*expr_p);
2472       if (!DECL_RTL_SET_P (obj))
2473         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2474       break;
2475
2476     case VAR_DECL:
2477     case PARM_DECL:
2478     case RESULT_DECL:
2479       *ws = 0;
2480       obj = *expr_p;
2481
2482       if (DECL_RTL_SET_P (obj))
2483         break;
2484
2485       if (DECL_MODE (obj) == BLKmode)
2486         x = produce_memory_decl_rtl (obj, regno);
2487       else
2488         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2489
2490       break;
2491
2492     default:
2493       break;
2494     }
2495
2496   if (x)
2497     {
2498       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2499       SET_DECL_RTL (obj, x);
2500     }
2501
2502   return NULL_TREE;
2503 }
2504
2505 /* Determines cost of the computation of EXPR.  */
2506
2507 static unsigned
2508 computation_cost (tree expr)
2509 {
2510   rtx seq, rslt;
2511   tree type = TREE_TYPE (expr);
2512   unsigned cost;
2513   /* Avoid using hard regs in ways which may be unsupported.  */
2514   int regno = LAST_VIRTUAL_REGISTER + 1;
2515
2516   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2517   start_sequence ();
2518   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2519   seq = get_insns ();
2520   end_sequence ();
2521
2522   cost = seq_cost (seq);
2523   if (MEM_P (rslt))
2524     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2525
2526   return cost;
2527 }
2528
2529 /* Returns variable containing the value of candidate CAND at statement AT.  */
2530
2531 static tree
2532 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2533 {
2534   if (stmt_after_increment (loop, cand, stmt))
2535     return cand->var_after;
2536   else
2537     return cand->var_before;
2538 }
2539
2540 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2541    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2542
2543 int
2544 tree_int_cst_sign_bit (tree t)
2545 {
2546   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2547   unsigned HOST_WIDE_INT w;
2548
2549   if (bitno < HOST_BITS_PER_WIDE_INT)
2550     w = TREE_INT_CST_LOW (t);
2551   else
2552     {
2553       w = TREE_INT_CST_HIGH (t);
2554       bitno -= HOST_BITS_PER_WIDE_INT;
2555     }
2556
2557   return (w >> bitno) & 1;
2558 }
2559
2560 /* If we can prove that TOP = cst * BOT for some constant cst,
2561    store cst to MUL and return true.  Otherwise return false.
2562    The returned value is always sign-extended, regardless of the
2563    signedness of TOP and BOT.  */
2564
2565 static bool
2566 constant_multiple_of (tree top, tree bot, double_int *mul)
2567 {
2568   tree mby;
2569   enum tree_code code;
2570   double_int res, p0, p1;
2571   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2572
2573   STRIP_NOPS (top);
2574   STRIP_NOPS (bot);
2575
2576   if (operand_equal_p (top, bot, 0))
2577     {
2578       *mul = double_int_one;
2579       return true;
2580     }
2581
2582   code = TREE_CODE (top);
2583   switch (code)
2584     {
2585     case MULT_EXPR:
2586       mby = TREE_OPERAND (top, 1);
2587       if (TREE_CODE (mby) != INTEGER_CST)
2588         return false;
2589
2590       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2591         return false;
2592
2593       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2594                               precision);
2595       return true;
2596
2597     case PLUS_EXPR:
2598     case MINUS_EXPR:
2599       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2600           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2601         return false;
2602
2603       if (code == MINUS_EXPR)
2604         p1 = double_int_neg (p1);
2605       *mul = double_int_sext (double_int_add (p0, p1), precision);
2606       return true;
2607
2608     case INTEGER_CST:
2609       if (TREE_CODE (bot) != INTEGER_CST)
2610         return false;
2611
2612       p0 = double_int_sext (tree_to_double_int (bot), precision);
2613       p1 = double_int_sext (tree_to_double_int (top), precision);
2614       if (double_int_zero_p (p1))
2615         return false;
2616       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2617                               precision);
2618       return double_int_zero_p (res);
2619
2620     default:
2621       return false;
2622     }
2623 }
2624
2625 /* Sets COMB to CST.  */
2626
2627 static void
2628 aff_combination_const (struct affine_tree_combination *comb, tree type,
2629                        unsigned HOST_WIDE_INT cst)
2630 {
2631   unsigned prec = TYPE_PRECISION (type);
2632
2633   comb->type = type;
2634   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2635
2636   comb->n = 0;
2637   comb->rest = NULL_TREE;
2638   comb->offset = cst & comb->mask;
2639 }
2640
2641 /* Sets COMB to single element ELT.  */
2642
2643 static void
2644 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2645 {
2646   unsigned prec = TYPE_PRECISION (type);
2647
2648   comb->type = type;
2649   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2650
2651   comb->n = 1;
2652   comb->elts[0] = elt;
2653   comb->coefs[0] = 1;
2654   comb->rest = NULL_TREE;
2655   comb->offset = 0;
2656 }
2657
2658 /* Scales COMB by SCALE.  */
2659
2660 static void
2661 aff_combination_scale (struct affine_tree_combination *comb,
2662                        unsigned HOST_WIDE_INT scale)
2663 {
2664   unsigned i, j;
2665
2666   if (scale == 1)
2667     return;
2668
2669   if (scale == 0)
2670     {
2671       aff_combination_const (comb, comb->type, 0);
2672       return;
2673     }
2674
2675   comb->offset = (scale * comb->offset) & comb->mask;
2676   for (i = 0, j = 0; i < comb->n; i++)
2677     {
2678       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2679       comb->elts[j] = comb->elts[i];
2680       if (comb->coefs[j] != 0)
2681         j++;
2682     }
2683   comb->n = j;
2684
2685   if (comb->rest)
2686     {
2687       if (comb->n < MAX_AFF_ELTS)
2688         {
2689           comb->coefs[comb->n] = scale;
2690           comb->elts[comb->n] = comb->rest;
2691           comb->rest = NULL_TREE;
2692           comb->n++;
2693         }
2694       else
2695         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2696                                   build_int_cst_type (comb->type, scale));
2697     }
2698 }
2699
2700 /* Adds ELT * SCALE to COMB.  */
2701
2702 static void
2703 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2704                          unsigned HOST_WIDE_INT scale)
2705 {
2706   unsigned i;
2707
2708   if (scale == 0)
2709     return;
2710
2711   for (i = 0; i < comb->n; i++)
2712     if (operand_equal_p (comb->elts[i], elt, 0))
2713       {
2714         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2715         if (comb->coefs[i])
2716           return;
2717
2718         comb->n--;
2719         comb->coefs[i] = comb->coefs[comb->n];
2720         comb->elts[i] = comb->elts[comb->n];
2721
2722         if (comb->rest)
2723           {
2724             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2725             comb->coefs[comb->n] = 1;
2726             comb->elts[comb->n] = comb->rest;
2727             comb->rest = NULL_TREE;
2728             comb->n++;
2729           }
2730         return;
2731       }
2732   if (comb->n < MAX_AFF_ELTS)
2733     {
2734       comb->coefs[comb->n] = scale;
2735       comb->elts[comb->n] = elt;
2736       comb->n++;
2737       return;
2738     }
2739
2740   if (scale == 1)
2741     elt = fold_convert (comb->type, elt);
2742   else
2743     elt = fold_build2 (MULT_EXPR, comb->type,
2744                        fold_convert (comb->type, elt),
2745                        build_int_cst_type (comb->type, scale));
2746
2747   if (comb->rest)
2748     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2749   else
2750     comb->rest = elt;
2751 }
2752
2753 /* Adds COMB2 to COMB1.  */
2754
2755 static void
2756 aff_combination_add (struct affine_tree_combination *comb1,
2757                      struct affine_tree_combination *comb2)
2758 {
2759   unsigned i;
2760
2761   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2762   for (i = 0; i < comb2->n; i++)
2763     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2764   if (comb2->rest)
2765     aff_combination_add_elt (comb1, comb2->rest, 1);
2766 }
2767
2768 /* Convert COMB to TYPE.  */
2769
2770 static void
2771 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2772 {
2773   unsigned prec = TYPE_PRECISION (type);
2774   unsigned i;
2775
2776   /* If the precision of both types is the same, it suffices to change the type
2777      of the whole combination -- the elements are allowed to have another type
2778      equivalent wrto STRIP_NOPS.  */
2779   if (prec == TYPE_PRECISION (comb->type))
2780     {
2781       comb->type = type;
2782       return;
2783     }
2784
2785   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2786   comb->offset = comb->offset & comb->mask;
2787
2788   /* The type of the elements can be different from comb->type only as
2789      much as what STRIP_NOPS would remove.  We can just directly cast
2790      to TYPE.  */
2791   for (i = 0; i < comb->n; i++)
2792     comb->elts[i] = fold_convert (type, comb->elts[i]);
2793   if (comb->rest)
2794     comb->rest = fold_convert (type, comb->rest);
2795
2796   comb->type = type;
2797 }
2798
2799 /* Splits EXPR into an affine combination of parts.  */
2800
2801 static void
2802 tree_to_aff_combination (tree expr, tree type,
2803                          struct affine_tree_combination *comb)
2804 {
2805   struct affine_tree_combination tmp;
2806   enum tree_code code;
2807   tree cst, core, toffset;
2808   HOST_WIDE_INT bitpos, bitsize;
2809   enum machine_mode mode;
2810   int unsignedp, volatilep;
2811
2812   STRIP_NOPS (expr);
2813
2814   code = TREE_CODE (expr);
2815   switch (code)
2816     {
2817     case INTEGER_CST:
2818       aff_combination_const (comb, type, int_cst_value (expr));
2819       return;
2820
2821     case PLUS_EXPR:
2822     case MINUS_EXPR:
2823       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825       if (code == MINUS_EXPR)
2826         aff_combination_scale (&tmp, -1);
2827       aff_combination_add (comb, &tmp);
2828       return;
2829
2830     case MULT_EXPR:
2831       cst = TREE_OPERAND (expr, 1);
2832       if (TREE_CODE (cst) != INTEGER_CST)
2833         break;
2834       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835       aff_combination_scale (comb, int_cst_value (cst));
2836       return;
2837
2838     case NEGATE_EXPR:
2839       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840       aff_combination_scale (comb, -1);
2841       return;
2842
2843     case ADDR_EXPR:
2844       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845                                   &toffset, &mode, &unsignedp, &volatilep,
2846                                   false);
2847       if (bitpos % BITS_PER_UNIT != 0)
2848         break;
2849       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850       core = build_fold_addr_expr (core);
2851       if (TREE_CODE (core) == ADDR_EXPR)
2852         aff_combination_add_elt (comb, core, 1);
2853       else
2854         {
2855           tree_to_aff_combination (core, type, &tmp);
2856           aff_combination_add (comb, &tmp);
2857         }
2858       if (toffset)
2859         {
2860           tree_to_aff_combination (toffset, type, &tmp);
2861           aff_combination_add (comb, &tmp);
2862         }
2863       return;
2864
2865     default:
2866       break;
2867     }
2868
2869   aff_combination_elt (comb, type, expr);
2870 }
2871
2872 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2873
2874 static tree
2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876                  unsigned HOST_WIDE_INT mask)
2877 {
2878   enum tree_code code;
2879
2880   scale &= mask;
2881   elt = fold_convert (type, elt);
2882
2883   if (scale == 1)
2884     {
2885       if (!expr)
2886         return elt;
2887
2888       return fold_build2 (PLUS_EXPR, type, expr, elt);
2889     }
2890
2891   if (scale == mask)
2892     {
2893       if (!expr)
2894         return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896       return fold_build2 (MINUS_EXPR, type, expr, elt);
2897     }
2898
2899   if (!expr)
2900     return fold_build2 (MULT_EXPR, type, elt,
2901                         build_int_cst_type (type, scale));
2902
2903   if ((scale | (mask >> 1)) == mask)
2904     {
2905       /* Scale is negative.  */
2906       code = MINUS_EXPR;
2907       scale = (-scale) & mask;
2908     }
2909   else
2910     code = PLUS_EXPR;
2911
2912   elt = fold_build2 (MULT_EXPR, type, elt,
2913                      build_int_cst_type (type, scale));
2914   return fold_build2 (code, type, expr, elt);
2915 }
2916
2917 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2918
2919 static void
2920 unshare_aff_combination (struct affine_tree_combination *comb)
2921 {
2922   unsigned i;
2923
2924   for (i = 0; i < comb->n; i++)
2925     comb->elts[i] = unshare_expr (comb->elts[i]);
2926   if (comb->rest)
2927     comb->rest = unshare_expr (comb->rest);
2928 }
2929
2930 /* Makes tree from the affine combination COMB.  */
2931
2932 static tree
2933 aff_combination_to_tree (struct affine_tree_combination *comb)
2934 {
2935   tree type = comb->type;
2936   tree expr = comb->rest;
2937   unsigned i;
2938   unsigned HOST_WIDE_INT off, sgn;
2939
2940   if (comb->n == 0 && comb->offset == 0)
2941     {
2942       if (expr)
2943         {
2944           /* Handle the special case produced by get_computation_aff when
2945              the type does not fit in HOST_WIDE_INT.  */
2946           return fold_convert (type, expr);
2947         }
2948       else
2949         return build_int_cst (type, 0);
2950     }
2951
2952   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2953
2954   for (i = 0; i < comb->n; i++)
2955     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2956                             comb->mask);
2957
2958   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2959     {
2960       /* Offset is negative.  */
2961       off = (-comb->offset) & comb->mask;
2962       sgn = comb->mask;
2963     }
2964   else
2965     {
2966       off = comb->offset;
2967       sgn = 1;
2968     }
2969   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2970                           comb->mask);
2971 }
2972
2973 /* Folds EXPR using the affine expressions framework.  */
2974
2975 static tree
2976 fold_affine_expr (tree expr)
2977 {
2978   tree type = TREE_TYPE (expr);
2979   struct affine_tree_combination comb;
2980
2981   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2982     return expr;
2983
2984   tree_to_aff_combination (expr, type, &comb);
2985   return aff_combination_to_tree (&comb);
2986 }
2987
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989    same precision that is at least as wide as the precision of TYPE, stores
2990    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2991    type of A and B.  */
2992
2993 static tree
2994 determine_common_wider_type (tree *a, tree *b)
2995 {
2996   tree wider_type = NULL;
2997   tree suba, subb;
2998   tree atype = TREE_TYPE (*a);
2999
3000   if ((TREE_CODE (*a) == NOP_EXPR
3001        || TREE_CODE (*a) == CONVERT_EXPR))
3002     {
3003       suba = TREE_OPERAND (*a, 0);
3004       wider_type = TREE_TYPE (suba);
3005       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3006         return atype;
3007     }
3008   else
3009     return atype;
3010
3011   if ((TREE_CODE (*b) == NOP_EXPR
3012        || TREE_CODE (*b) == CONVERT_EXPR))
3013     {
3014       subb = TREE_OPERAND (*b, 0);
3015       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3016         return atype;
3017     }
3018   else
3019     return atype;
3020
3021   *a = suba;
3022   *b = subb;
3023   return wider_type;
3024 }
3025
3026 /* Determines the expression by that USE is expressed from induction variable
3027    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3028    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3029
3030 static bool
3031 get_computation_aff (struct loop *loop,
3032                      struct iv_use *use, struct iv_cand *cand, tree at,
3033                      struct affine_tree_combination *aff)
3034 {
3035   tree ubase = use->iv->base;
3036   tree ustep = use->iv->step;
3037   tree cbase = cand->iv->base;
3038   tree cstep = cand->iv->step;
3039   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3040   tree common_type;
3041   tree uutype;
3042   tree expr, delta;
3043   tree ratio;
3044   unsigned HOST_WIDE_INT ustepi, cstepi;
3045   HOST_WIDE_INT ratioi;
3046   struct affine_tree_combination cbase_aff, expr_aff;
3047   tree cstep_orig = cstep, ustep_orig = ustep;
3048   double_int rat;
3049
3050   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3051     {
3052       /* We do not have a precision to express the values of use.  */
3053       return false;
3054     }
3055
3056   expr = var_at_stmt (loop, cand, at);
3057
3058   if (TREE_TYPE (expr) != ctype)
3059     {
3060       /* This may happen with the original ivs.  */
3061       expr = fold_convert (ctype, expr);
3062     }
3063
3064   if (TYPE_UNSIGNED (utype))
3065     uutype = utype;
3066   else
3067     {
3068       uutype = unsigned_type_for (utype);
3069       ubase = fold_convert (uutype, ubase);
3070       ustep = fold_convert (uutype, ustep);
3071     }
3072
3073   if (uutype != ctype)
3074     {
3075       expr = fold_convert (uutype, expr);
3076       cbase = fold_convert (uutype, cbase);
3077       cstep = fold_convert (uutype, cstep);
3078
3079       /* If the conversion is not noop, we must take it into account when
3080          considering the value of the step.  */
3081       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3082         cstep_orig = cstep;
3083     }
3084
3085   if (cst_and_fits_in_hwi (cstep_orig)
3086       && cst_and_fits_in_hwi (ustep_orig))
3087     {
3088       ustepi = int_cst_value (ustep_orig);
3089       cstepi = int_cst_value (cstep_orig);
3090
3091       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3092         {
3093           /* TODO maybe consider case when ustep divides cstep and the ratio is
3094              a power of 2 (so that the division is fast to execute)?  We would
3095              need to be much more careful with overflows etc. then.  */
3096           return false;
3097         }
3098
3099       ratio = build_int_cst_type (uutype, ratioi);
3100     }
3101   else
3102     {
3103       if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3104         return false;
3105       ratio = double_int_to_tree (uutype, rat);
3106
3107       /* Ratioi is only used to detect special cases when the multiplicative
3108          factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3109          set it to 0.  */
3110       if (double_int_fits_in_shwi_p (rat))
3111         ratioi = double_int_to_shwi (rat);
3112       else
3113         ratioi = 0;
3114     }
3115
3116   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3117      type, we achieve better folding by computing their difference in this
3118      wider type, and cast the result to UUTYPE.  We do not need to worry about
3119      overflows, as all the arithmetics will in the end be performed in UUTYPE
3120      anyway.  */
3121   common_type = determine_common_wider_type (&ubase, &cbase);
3122
3123   /* We may need to shift the value if we are after the increment.  */
3124   if (stmt_after_increment (loop, cand, at))
3125     {
3126       if (uutype != common_type)
3127         cstep = fold_convert (common_type, cstep);
3128       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3129     }
3130
3131   /* use = ubase - ratio * cbase + ratio * var.
3132
3133      In general case ubase + ratio * (var - cbase) could be better (one less
3134      multiplication), but often it is possible to eliminate redundant parts
3135      of computations from (ubase - ratio * cbase) term, and if it does not
3136      happen, fold is able to apply the distributive law to obtain this form
3137      anyway.  */
3138
3139   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3140     {
3141       /* Let's compute in trees and just return the result in AFF.  This case
3142          should not be very common, and fold itself is not that bad either,
3143          so making the aff. functions more complicated to handle this case
3144          is not that urgent.  */
3145       if (ratioi == 1)
3146         {
3147           delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3148           if (uutype != common_type)
3149             delta = fold_convert (uutype, delta);
3150           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3151         }
3152       else if (ratioi == -1)
3153         {
3154           delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3155           if (uutype != common_type)
3156             delta = fold_convert (uutype, delta);
3157           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3158         }
3159       else
3160         {
3161           delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3162           delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3163           if (uutype != common_type)
3164             delta = fold_convert (uutype, delta);
3165           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3166           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3167         }
3168
3169       aff->type = uutype;
3170       aff->n = 0;
3171       aff->offset = 0;
3172       aff->mask = 0;
3173       aff->rest = expr;
3174       return true;
3175     }
3176
3177   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3178      possible to compute ratioi.  */
3179   gcc_assert (ratioi);
3180
3181   tree_to_aff_combination (ubase, common_type, aff);
3182   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3183   tree_to_aff_combination (expr, uutype, &expr_aff);
3184   aff_combination_scale (&cbase_aff, -ratioi);
3185   aff_combination_scale (&expr_aff, ratioi);
3186   aff_combination_add (aff, &cbase_aff);
3187   if (common_type != uutype)
3188     aff_combination_convert (uutype, aff);
3189   aff_combination_add (aff, &expr_aff);
3190
3191   return true;
3192 }
3193
3194 /* Determines the expression by that USE is expressed from induction variable
3195    CAND at statement AT in LOOP.  The computation is unshared.  */
3196
3197 static tree
3198 get_computation_at (struct loop *loop,
3199                     struct iv_use *use, struct iv_cand *cand, tree at)
3200 {
3201   struct affine_tree_combination aff;
3202   tree type = TREE_TYPE (use->iv->base);
3203
3204   if (!get_computation_aff (loop, use, cand, at, &aff))
3205     return NULL_TREE;
3206   unshare_aff_combination (&aff);
3207   return fold_convert (type, aff_combination_to_tree (&aff));
3208 }
3209
3210 /* Determines the expression by that USE is expressed from induction variable
3211    CAND in LOOP.  The computation is unshared.  */
3212
3213 static tree
3214 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3215 {
3216   return get_computation_at (loop, use, cand, use->stmt);
3217 }
3218
3219 /* Returns cost of addition in MODE.  */
3220
3221 static unsigned
3222 add_cost (enum machine_mode mode)
3223 {
3224   static unsigned costs[NUM_MACHINE_MODES];
3225   rtx seq;
3226   unsigned cost;
3227
3228   if (costs[mode])
3229     return costs[mode];
3230
3231   start_sequence ();
3232   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3233                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3234                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3235                  NULL_RTX);
3236   seq = get_insns ();
3237   end_sequence ();
3238
3239   cost = seq_cost (seq);
3240   if (!cost)
3241     cost = 1;
3242
3243   costs[mode] = cost;
3244
3245   if (dump_file && (dump_flags & TDF_DETAILS))
3246     fprintf (dump_file, "Addition in %s costs %d\n",
3247              GET_MODE_NAME (mode), cost);
3248   return cost;
3249 }
3250
3251 /* Entry in a hashtable of already known costs for multiplication.  */
3252 struct mbc_entry
3253 {
3254   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3255   enum machine_mode mode;       /* In mode.  */
3256   unsigned cost;                /* The cost.  */
3257 };
3258
3259 /* Counts hash value for the ENTRY.  */
3260
3261 static hashval_t
3262 mbc_entry_hash (const void *entry)
3263 {
3264   const struct mbc_entry *e = entry;
3265
3266   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3267 }
3268
3269 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3270
3271 static int
3272 mbc_entry_eq (const void *entry1, const void *entry2)
3273 {
3274   const struct mbc_entry *e1 = entry1;
3275   const struct mbc_entry *e2 = entry2;
3276
3277   return (e1->mode == e2->mode
3278           && e1->cst == e2->cst);
3279 }
3280
3281 /* Returns cost of multiplication by constant CST in MODE.  */
3282
3283 unsigned
3284 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3285 {
3286   static htab_t costs;
3287   struct mbc_entry **cached, act;
3288   rtx seq;
3289   unsigned cost;
3290
3291   if (!costs)
3292     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3293
3294   act.mode = mode;
3295   act.cst = cst;
3296   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3297   if (*cached)
3298     return (*cached)->cost;
3299
3300   *cached = XNEW (struct mbc_entry);
3301   (*cached)->mode = mode;
3302   (*cached)->cst = cst;
3303
3304   start_sequence ();
3305   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3306                gen_int_mode (cst, mode), NULL_RTX, 0);
3307   seq = get_insns ();
3308   end_sequence ();
3309
3310   cost = seq_cost (seq);
3311
3312   if (dump_file && (dump_flags & TDF_DETAILS))
3313     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3314              (int) cst, GET_MODE_NAME (mode), cost);
3315
3316   (*cached)->cost = cost;
3317
3318   return cost;
3319 }
3320
3321 /* Returns true if multiplying by RATIO is allowed in address.  */
3322
3323 bool
3324 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3325 {
3326 #define MAX_RATIO 128
3327   static sbitmap valid_mult;
3328
3329   if (!valid_mult)
3330     {
3331       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3332       rtx addr;
3333       HOST_WIDE_INT i;
3334
3335       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3336       sbitmap_zero (valid_mult);
3337       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3338       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3339         {
3340           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3341           if (memory_address_p (Pmode, addr))
3342             SET_BIT (valid_mult, i + MAX_RATIO);
3343         }
3344
3345       if (dump_file && (dump_flags & TDF_DETAILS))
3346         {
3347           fprintf (dump_file, "  allowed multipliers:");
3348           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3349             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3350               fprintf (dump_file, " %d", (int) i);
3351           fprintf (dump_file, "\n");
3352           fprintf (dump_file, "\n");
3353         }
3354     }
3355
3356   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3357     return false;
3358
3359   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3360 }
3361
3362 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3363    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3364    variable is omitted.  The created memory accesses MODE.
3365
3366    TODO -- there must be some better way.  This all is quite crude.  */
3367
3368 static unsigned
3369 get_address_cost (bool symbol_present, bool var_present,
3370                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3371 {
3372   static bool initialized = false;
3373   static HOST_WIDE_INT rat, off;
3374   static HOST_WIDE_INT min_offset, max_offset;
3375   static unsigned costs[2][2][2][2];
3376   unsigned cost, acost;
3377   bool offset_p, ratio_p;
3378   HOST_WIDE_INT s_offset;
3379   unsigned HOST_WIDE_INT mask;
3380   unsigned bits;
3381
3382   if (!initialized)
3383     {
3384       HOST_WIDE_INT i;
3385       int old_cse_not_expected;
3386       unsigned sym_p, var_p, off_p, rat_p, add_c;
3387       rtx seq, addr, base;
3388       rtx reg0, reg1;
3389
3390       initialized = true;
3391
3392       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3393
3394       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3395       for (i = 1; i <= 1 << 20; i <<= 1)
3396         {
3397           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3398           if (!memory_address_p (Pmode, addr))
3399             break;
3400         }
3401       max_offset = i >> 1;
3402       off = max_offset;
3403
3404       for (i = 1; i <= 1 << 20; i <<= 1)
3405         {
3406           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3407           if (!memory_address_p (Pmode, addr))
3408             break;
3409         }
3410       min_offset = -(i >> 1);
3411
3412       if (dump_file && (dump_flags & TDF_DETAILS))
3413         {
3414           fprintf (dump_file, "get_address_cost:\n");
3415           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3416           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3417         }
3418
3419       rat = 1;
3420       for (i = 2; i <= MAX_RATIO; i++)
3421         if (multiplier_allowed_in_address_p (i))
3422           {
3423             rat = i;
3424             break;
3425           }
3426
3427       /* Compute the cost of various addressing modes.  */
3428       acost = 0;
3429       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3430       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3431
3432       for (i = 0; i < 16; i++)
3433         {
3434           sym_p = i & 1;
3435           var_p = (i >> 1) & 1;
3436           off_p = (i >> 2) & 1;
3437           rat_p = (i >> 3) & 1;
3438
3439           addr = reg0;
3440           if (rat_p)
3441             addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3442
3443           if (var_p)
3444             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3445
3446           if (sym_p)
3447             {
3448               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3449               if (off_p)
3450                 base = gen_rtx_fmt_e (CONST, Pmode,
3451                                       gen_rtx_fmt_ee (PLUS, Pmode,
3452                                                       base,
3453                                                       gen_int_mode (off, Pmode)));
3454             }
3455           else if (off_p)
3456             base = gen_int_mode (off, Pmode);
3457           else
3458             base = NULL_RTX;
3459
3460           if (base)
3461             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3462
3463           start_sequence ();
3464           /* To avoid splitting addressing modes, pretend that no cse will
3465              follow.  */
3466           old_cse_not_expected = cse_not_expected;
3467           cse_not_expected = true;
3468           addr = memory_address (Pmode, addr);
3469           cse_not_expected = old_cse_not_expected;
3470           seq = get_insns ();
3471           end_sequence ();
3472
3473           acost = seq_cost (seq);
3474           acost += address_cost (addr, Pmode);
3475
3476           if (!acost)
3477             acost = 1;
3478           costs[sym_p][var_p][off_p][rat_p] = acost;
3479         }
3480
3481       /* On some targets, it is quite expensive to load symbol to a register,
3482          which makes addresses that contain symbols look much more expensive.
3483          However, the symbol will have to be loaded in any case before the
3484          loop (and quite likely we have it in register already), so it does not
3485          make much sense to penalize them too heavily.  So make some final
3486          tweaks for the SYMBOL_PRESENT modes:
3487
3488          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3489          var is cheaper, use this mode with small penalty.
3490          If VAR_PRESENT is true, try whether the mode with
3491          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3492          if this is the case, use it.  */
3493       add_c = add_cost (Pmode);
3494       for (i = 0; i < 8; i++)
3495         {
3496           var_p = i & 1;
3497           off_p = (i >> 1) & 1;
3498           rat_p = (i >> 2) & 1;
3499
3500           acost = costs[0][1][off_p][rat_p] + 1;
3501           if (var_p)
3502             acost += add_c;
3503
3504           if (acost < costs[1][var_p][off_p][rat_p])
3505             costs[1][var_p][off_p][rat_p] = acost;
3506         }
3507
3508       if (dump_file && (dump_flags & TDF_DETAILS))
3509         {
3510           fprintf (dump_file, "Address costs:\n");
3511
3512           for (i = 0; i < 16; i++)
3513             {
3514               sym_p = i & 1;
3515               var_p = (i >> 1) & 1;
3516               off_p = (i >> 2) & 1;
3517               rat_p = (i >> 3) & 1;
3518
3519               fprintf (dump_file, "  ");
3520               if (sym_p)
3521                 fprintf (dump_file, "sym + ");
3522               if (var_p)
3523                 fprintf (dump_file, "var + ");
3524               if (off_p)
3525                 fprintf (dump_file, "cst + ");
3526               if (rat_p)
3527                 fprintf (dump_file, "rat * ");
3528
3529               acost = costs[sym_p][var_p][off_p][rat_p];
3530               fprintf (dump_file, "index costs %d\n", acost);
3531             }
3532           fprintf (dump_file, "\n");
3533         }
3534     }
3535
3536   bits = GET_MODE_BITSIZE (Pmode);
3537   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3538   offset &= mask;
3539   if ((offset >> (bits - 1) & 1))
3540     offset |= ~mask;
3541   s_offset = offset;
3542
3543   cost = 0;
3544   offset_p = (s_offset != 0
3545               && min_offset <= s_offset && s_offset <= max_offset);
3546   ratio_p = (ratio != 1
3547              && multiplier_allowed_in_address_p (ratio));
3548
3549   if (ratio != 1 && !ratio_p)
3550     cost += multiply_by_cost (ratio, Pmode);
3551
3552   if (s_offset && !offset_p && !symbol_present)
3553     {
3554       cost += add_cost (Pmode);
3555       var_present = true;
3556     }
3557
3558   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3559   return cost + acost;
3560 }
3561
3562 /* Estimates cost of forcing expression EXPR into a variable.  */
3563
3564 unsigned
3565 force_expr_to_var_cost (tree expr)
3566 {
3567   static bool costs_initialized = false;
3568   static unsigned integer_cost;
3569   static unsigned symbol_cost;
3570   static unsigned address_cost;
3571   tree op0, op1;
3572   unsigned cost0, cost1, cost;
3573   enum machine_mode mode;
3574
3575   if (!costs_initialized)
3576     {
3577       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3578       rtx x = gen_rtx_MEM (DECL_MODE (var),
3579                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3580       tree addr;
3581       tree type = build_pointer_type (integer_type_node);
3582
3583       integer_cost = computation_cost (build_int_cst (integer_type_node,
3584                                                       2000));
3585
3586       SET_DECL_RTL (var, x);
3587       TREE_STATIC (var) = 1;
3588       addr = build1 (ADDR_EXPR, type, var);
3589       symbol_cost = computation_cost (addr) + 1;
3590
3591       address_cost
3592         = computation_cost (build2 (PLUS_EXPR, type,
3593                                     addr,
3594                                     build_int_cst (type, 2000))) + 1;
3595       if (dump_file && (dump_flags & TDF_DETAILS))
3596         {
3597           fprintf (dump_file, "force_expr_to_var_cost:\n");
3598           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3599           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3600           fprintf (dump_file, "  address %d\n", (int) address_cost);
3601           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3602           fprintf (dump_file, "\n");
3603         }
3604
3605       costs_initialized = true;
3606     }
3607
3608   STRIP_NOPS (expr);
3609
3610   if (SSA_VAR_P (expr))
3611     return 0;
3612
3613   if (TREE_INVARIANT (expr))
3614     {
3615       if (TREE_CODE (expr) == INTEGER_CST)
3616         return integer_cost;
3617
3618       if (TREE_CODE (expr) == ADDR_EXPR)
3619         {
3620           tree obj = TREE_OPERAND (expr, 0);
3621
3622           if (TREE_CODE (obj) == VAR_DECL
3623               || TREE_CODE (obj) == PARM_DECL
3624               || TREE_CODE (obj) == RESULT_DECL)
3625             return symbol_cost;
3626         }
3627
3628       return address_cost;
3629     }
3630
3631   switch (TREE_CODE (expr))
3632     {
3633     case PLUS_EXPR:
3634     case MINUS_EXPR:
3635     case MULT_EXPR:
3636       op0 = TREE_OPERAND (expr, 0);
3637       op1 = TREE_OPERAND (expr, 1);
3638       STRIP_NOPS (op0);
3639       STRIP_NOPS (op1);
3640
3641       if (is_gimple_val (op0))
3642         cost0 = 0;
3643       else
3644         cost0 = force_expr_to_var_cost (op0);
3645
3646       if (is_gimple_val (op1))
3647         cost1 = 0;
3648       else
3649         cost1 = force_expr_to_var_cost (op1);
3650
3651       break;
3652
3653     default:
3654       /* Just an arbitrary value, FIXME.  */
3655       return target_spill_cost;
3656     }
3657
3658   mode = TYPE_MODE (TREE_TYPE (expr));
3659   switch (TREE_CODE (expr))
3660     {
3661     case PLUS_EXPR:
3662     case MINUS_EXPR:
3663       cost = add_cost (mode);
3664       break;
3665
3666     case MULT_EXPR:
3667       if (cst_and_fits_in_hwi (op0))
3668         cost = multiply_by_cost (int_cst_value (op0), mode);
3669       else if (cst_and_fits_in_hwi (op1))
3670         cost = multiply_by_cost (int_cst_value (op1), mode);
3671       else
3672         return target_spill_cost;
3673       break;
3674
3675     default:
3676       gcc_unreachable ();
3677     }
3678
3679   cost += cost0;
3680   cost += cost1;
3681
3682   /* Bound the cost by target_spill_cost.  The parts of complicated
3683      computations often are either loop invariant or at least can
3684      be shared between several iv uses, so letting this grow without
3685      limits would not give reasonable results.  */
3686   return cost < target_spill_cost ? cost : target_spill_cost;
3687 }
3688
3689 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3690    invariants the computation depends on.  */
3691
3692 static unsigned
3693 force_var_cost (struct ivopts_data *data,
3694                 tree expr, bitmap *depends_on)
3695 {
3696   if (depends_on)
3697     {
3698       fd_ivopts_data = data;
3699       walk_tree (&expr, find_depends, depends_on, NULL);
3700     }
3701
3702   return force_expr_to_var_cost (expr);
3703 }
3704
3705 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3706    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3707    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3708    invariants the computation depends on.  */
3709
3710 static unsigned
3711 split_address_cost (struct ivopts_data *data,
3712                     tree addr, bool *symbol_present, bool *var_present,
3713                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3714 {
3715   tree core;
3716   HOST_WIDE_INT bitsize;
3717   HOST_WIDE_INT bitpos;
3718   tree toffset;
3719   enum machine_mode mode;
3720   int unsignedp, volatilep;
3721
3722   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3723                               &unsignedp, &volatilep, false);
3724
3725   if (toffset != 0
3726       || bitpos % BITS_PER_UNIT != 0
3727       || TREE_CODE (core) != VAR_DECL)
3728     {
3729       *symbol_present = false;
3730       *var_present = true;
3731       fd_ivopts_data = data;
3732       walk_tree (&addr, find_depends, depends_on, NULL);
3733       return target_spill_cost;
3734     }
3735
3736   *offset += bitpos / BITS_PER_UNIT;
3737   if (TREE_STATIC (core)
3738       || DECL_EXTERNAL (core))
3739     {
3740       *symbol_present = true;
3741       *var_present = false;
3742       return 0;
3743     }
3744
3745   *symbol_present = false;
3746   *var_present = true;
3747   return 0;
3748 }
3749
3750 /* Estimates cost of expressing difference of addresses E1 - E2 as
3751    var + symbol + offset.  The value of offset is added to OFFSET,
3752    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3753    part is missing.  DEPENDS_ON is a set of the invariants the computation
3754    depends on.  */
3755
3756 static unsigned
3757 ptr_difference_cost (struct ivopts_data *data,
3758                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3759                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3760 {
3761   HOST_WIDE_INT diff = 0;
3762   unsigned cost;
3763
3764   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3765
3766   if (ptr_difference_const (e1, e2, &diff))
3767     {
3768       *offset += diff;
3769       *symbol_present = false;
3770       *var_present = false;
3771       return 0;
3772     }
3773
3774   if (e2 == integer_zero_node)
3775     return split_address_cost (data, TREE_OPERAND (e1, 0),
3776                                symbol_present, var_present, offset, depends_on);
3777
3778   *symbol_present = false;
3779   *var_present = true;
3780
3781   cost = force_var_cost (data, e1, depends_on);
3782   cost += force_var_cost (data, e2, depends_on);
3783   cost += add_cost (Pmode);
3784
3785   return cost;
3786 }
3787
3788 /* Estimates cost of expressing difference E1 - E2 as
3789    var + symbol + offset.  The value of offset is added to OFFSET,
3790    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3791    part is missing.  DEPENDS_ON is a set of the invariants the computation
3792    depends on.  */
3793
3794 static unsigned
3795 difference_cost (struct ivopts_data *data,
3796                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3797                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3798 {
3799   unsigned cost;
3800   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3801   unsigned HOST_WIDE_INT off1, off2;
3802
3803   e1 = strip_offset (e1, &off1);
3804   e2 = strip_offset (e2, &off2);
3805   *offset += off1 - off2;
3806
3807   STRIP_NOPS (e1);
3808   STRIP_NOPS (e2);
3809
3810   if (TREE_CODE (e1) == ADDR_EXPR)
3811     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3812                                 depends_on);
3813   *symbol_present = false;
3814
3815   if (operand_equal_p (e1, e2, 0))
3816     {
3817       *var_present = false;
3818       return 0;
3819     }
3820   *var_present = true;
3821   if (zero_p (e2))
3822     return force_var_cost (data, e1, depends_on);
3823
3824   if (zero_p (e1))
3825     {
3826       cost = force_var_cost (data, e2, depends_on);
3827       cost += multiply_by_cost (-1, mode);
3828
3829       return cost;
3830     }
3831
3832   cost = force_var_cost (data, e1, depends_on);
3833   cost += force_var_cost (data, e2, depends_on);
3834   cost += add_cost (mode);
3835
3836   return cost;
3837 }
3838
3839 /* Determines the cost of the computation by that USE is expressed
3840    from induction variable CAND.  If ADDRESS_P is true, we just need
3841    to create an address from it, otherwise we want to get it into
3842    register.  A set of invariants we depend on is stored in
3843    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3844
3845 static unsigned
3846 get_computation_cost_at (struct ivopts_data *data,
3847                          struct iv_use *use, struct iv_cand *cand,
3848                          bool address_p, bitmap *depends_on, tree at)
3849 {
3850   tree ubase = use->iv->base, ustep = use->iv->step;
3851   tree cbase, cstep;
3852   tree utype = TREE_TYPE (ubase), ctype;
3853   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3854   HOST_WIDE_INT ratio, aratio;
3855   bool var_present, symbol_present;
3856   unsigned cost = 0, n_sums;
3857
3858   *depends_on = NULL;
3859
3860   /* Only consider real candidates.  */
3861   if (!cand->iv)
3862     return INFTY;
3863
3864   cbase = cand->iv->base;
3865   cstep = cand->iv->step;
3866   ctype = TREE_TYPE (cbase);
3867
3868   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3869     {
3870       /* We do not have a precision to express the values of use.  */
3871       return INFTY;
3872     }
3873
3874   if (address_p)
3875     {
3876       /* Do not try to express address of an object with computation based
3877          on address of a different object.  This may cause problems in rtl
3878          level alias analysis (that does not expect this to be happening,
3879          as this is illegal in C), and would be unlikely to be useful
3880          anyway.  */
3881       if (use->iv->base_object
3882           && cand->iv->base_object
3883           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3884         return INFTY;
3885     }
3886
3887   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3888     {
3889       /* TODO -- add direct handling of this case.  */
3890       goto fallback;
3891     }
3892
3893   /* CSTEPI is removed from the offset in case statement is after the
3894      increment.  If the step is not constant, we use zero instead.
3895      This is a bit imprecise (there is the extra addition), but
3896      redundancy elimination is likely to transform the code so that
3897      it uses value of the variable before increment anyway,
3898      so it is not that much unrealistic.  */
3899   if (cst_and_fits_in_hwi (cstep))
3900     cstepi = int_cst_value (cstep);
3901   else
3902     cstepi = 0;
3903
3904   if (cst_and_fits_in_hwi (ustep)
3905       && cst_and_fits_in_hwi (cstep))
3906     {
3907       ustepi = int_cst_value (ustep);
3908
3909       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3910         return INFTY;
3911     }
3912   else
3913     {
3914       double_int rat;
3915
3916       if (!constant_multiple_of (ustep, cstep, &rat))
3917         return INFTY;
3918
3919       if (double_int_fits_in_shwi_p (rat))
3920         ratio = double_int_to_shwi (rat);
3921       else
3922         return INFTY;
3923     }
3924
3925   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3926      or ratio == 1, it is better to handle this like
3927
3928      ubase - ratio * cbase + ratio * var
3929
3930      (also holds in the case ratio == -1, TODO.  */
3931
3932   if (cst_and_fits_in_hwi (cbase))
3933     {
3934       offset = - ratio * int_cst_value (cbase);
3935       cost += difference_cost (data,
3936                                ubase, integer_zero_node,
3937                                &symbol_present, &var_present, &offset,
3938                                depends_on);
3939     }
3940   else if (ratio == 1)
3941     {
3942       cost += difference_cost (data,
3943                                ubase, cbase,
3944                                &symbol_present, &var_present, &offset,
3945                                depends_on);
3946     }
3947   else
3948     {
3949       cost += force_var_cost (data, cbase, depends_on);
3950       cost += add_cost (TYPE_MODE (ctype));
3951       cost += difference_cost (data,
3952                                ubase, integer_zero_node,
3953                                &symbol_present, &var_present, &offset,
3954                                depends_on);
3955     }
3956
3957   /* If we are after the increment, the value of the candidate is higher by
3958      one iteration.  */
3959   if (stmt_after_increment (data->current_loop, cand, at))
3960     offset -= ratio * cstepi;
3961
3962   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3963      (symbol/var/const parts may be omitted).  If we are looking for an address,
3964      find the cost of addressing this.  */
3965   if (address_p)
3966     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3967
3968   /* Otherwise estimate the costs for computing the expression.  */
3969   aratio = ratio > 0 ? ratio : -ratio;
3970   if (!symbol_present && !var_present && !offset)
3971     {
3972       if (ratio != 1)
3973         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3974
3975       return cost;
3976     }
3977
3978   if (aratio != 1)
3979     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3980
3981   n_sums = 1;
3982   if (var_present
3983       /* Symbol + offset should be compile-time computable.  */
3984       && (symbol_present || offset))
3985     n_sums++;
3986
3987   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3988
3989 fallback:
3990   {
3991     /* Just get the expression, expand it and measure the cost.  */
3992     tree comp = get_computation_at (data->current_loop, use, cand, at);
3993
3994     if (!comp)
3995       return INFTY;
3996
3997     if (address_p)
3998       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3999
4000     return computation_cost (comp);
4001   }
4002 }
4003
4004 /* Determines the cost of the computation by that USE is expressed
4005    from induction variable CAND.  If ADDRESS_P is true, we just need
4006    to create an address from it, otherwise we want to get it into
4007    register.  A set of invariants we depend on is stored in
4008    DEPENDS_ON.  */
4009
4010 static unsigned
4011 get_computation_cost (struct ivopts_data *data,
4012                       struct iv_use *use, struct iv_cand *cand,
4013                       bool address_p, bitmap *depends_on)
4014 {
4015   return get_computation_cost_at (data,
4016                                   use, cand, address_p, depends_on, use->stmt);
4017 }
4018
4019 /* Determines cost of basing replacement of USE on CAND in a generic
4020    expression.  */
4021
4022 static bool
4023 determine_use_iv_cost_generic (struct ivopts_data *data,
4024                                struct iv_use *use, struct iv_cand *cand)
4025 {
4026   bitmap depends_on;
4027   unsigned cost;
4028
4029   /* The simple case first -- if we need to express value of the preserved
4030      original biv, the cost is 0.  This also prevents us from counting the
4031      cost of increment twice -- once at this use and once in the cost of
4032      the candidate.  */
4033   if (cand->pos == IP_ORIGINAL
4034       && cand->incremented_at == use->stmt)
4035     {
4036       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4037       return true;
4038     }
4039
4040   cost = get_computation_cost (data, use, cand, false, &depends_on);
4041   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4042
4043   return cost != INFTY;
4044 }
4045
4046 /* Determines cost of basing replacement of USE on CAND in an address.  */
4047
4048 static bool
4049 determine_use_iv_cost_address (struct ivopts_data *data,
4050                                struct iv_use *use, struct iv_cand *cand)
4051 {
4052   bitmap depends_on;
4053   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4054
4055   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4056
4057   return cost != INFTY;
4058 }
4059
4060 /* Computes value of induction variable IV in iteration NITER.  */
4061
4062 static tree
4063 iv_value (struct iv *iv, tree niter)
4064 {
4065   tree val;
4066   tree type = TREE_TYPE (iv->base);
4067
4068   niter = fold_convert (type, niter);
4069   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4070
4071   return fold_build2 (PLUS_EXPR, type, iv->base, val);
4072 }
4073
4074 /* Computes value of candidate CAND at position AT in iteration NITER.  */
4075
4076 static tree
4077 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4078 {
4079   tree val = iv_value (cand->iv, niter);
4080   tree type = TREE_TYPE (cand->iv->base);
4081
4082   if (stmt_after_increment (loop, cand, at))
4083     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4084
4085   return val;
4086 }
4087
4088 /* Returns period of induction variable iv.  */
4089
4090 static tree
4091 iv_period (struct iv *iv)
4092 {
4093   tree step = iv->step, period, type;
4094   tree pow2div;
4095
4096   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4097
4098   /* Period of the iv is gcd (step, type range).  Since type range is power
4099      of two, it suffices to determine the maximum power of two that divides
4100      step.  */
4101   pow2div = num_ending_zeros (step);
4102   type = unsigned_type_for (TREE_TYPE (step));
4103
4104   period = build_low_bits_mask (type,
4105                                 (TYPE_PRECISION (type)
4106                                  - tree_low_cst (pow2div, 1)));
4107
4108   return period;
4109 }
4110
4111 /* Returns the comparison operator used when eliminating the iv USE.  */
4112
4113 static enum tree_code
4114 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4115 {
4116   struct loop *loop = data->current_loop;
4117   basic_block ex_bb;
4118   edge exit;
4119
4120   ex_bb = bb_for_stmt (use->stmt);
4121   exit = EDGE_SUCC (ex_bb, 0);
4122   if (flow_bb_inside_loop_p (loop, exit->dest))
4123     exit = EDGE_SUCC (ex_bb, 1);
4124
4125   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4126 }
4127
4128 /* Check whether it is possible to express the condition in USE by comparison
4129    of candidate CAND.  If so, store the value compared with to BOUND.  */
4130
4131 static bool
4132 may_eliminate_iv (struct ivopts_data *data,
4133                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4134 {
4135   basic_block ex_bb;
4136   edge exit;
4137   tree nit, nit_type;
4138   tree wider_type, period, per_type;
4139   struct loop *loop = data->current_loop;
4140
4141   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4142     return false;
4143
4144   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4145      for other conditions inside loop body.  */
4146   ex_bb = bb_for_stmt (use->stmt);
4147   if (use->stmt != last_stmt (ex_bb)
4148       || TREE_CODE (use->stmt) != COND_EXPR)
4149     return false;
4150   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4151     return false;
4152
4153   exit = EDGE_SUCC (ex_bb, 0);
4154   if (flow_bb_inside_loop_p (loop, exit->dest))
4155     exit = EDGE_SUCC (ex_bb, 1);
4156   if (flow_bb_inside_loop_p (loop, exit->dest))
4157     return false;
4158
4159   nit = niter_for_exit (data, exit);
4160   if (!nit)
4161     return false;
4162
4163   nit_type = TREE_TYPE (nit);
4164
4165   /* Determine whether we may use the variable to test whether niter iterations
4166      elapsed.  This is the case iff the period of the induction variable is
4167      greater than the number of iterations.  */
4168   period = iv_period (cand->iv);
4169   if (!period)
4170     return false;
4171   per_type = TREE_TYPE (period);
4172
4173   wider_type = TREE_TYPE (period);
4174   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4175     wider_type = per_type;
4176   else
4177     wider_type = nit_type;
4178
4179   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4180                                       fold_convert (wider_type, period),
4181                                       fold_convert (wider_type, nit))))
4182     return false;
4183
4184   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4185   return true;
4186 }
4187
4188 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4189
4190 static bool
4191 determine_use_iv_cost_condition (struct ivopts_data *data,
4192                                  struct iv_use *use, struct iv_cand *cand)
4193 {
4194   tree bound = NULL_TREE, op, cond;
4195   bitmap depends_on = NULL;
4196   unsigned cost;
4197
4198   /* Only consider real candidates.  */
4199   if (!cand->iv)
4200     {
4201       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4202       return false;
4203     }
4204
4205   if (may_eliminate_iv (data, use, cand, &bound))
4206     {
4207       cost = force_var_cost (data, bound, &depends_on);
4208
4209       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4210       return cost != INFTY;
4211     }
4212
4213   /* The induction variable elimination failed; just express the original
4214      giv.  If it is compared with an invariant, note that we cannot get
4215      rid of it.  */
4216   cost = get_computation_cost (data, use, cand, false, &depends_on);
4217
4218   cond = *use->op_p;
4219   if (TREE_CODE (cond) != SSA_NAME)
4220     {
4221       op = TREE_OPERAND (cond, 0);
4222       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4223         op = TREE_OPERAND (cond, 1);
4224       if (TREE_CODE (op) == SSA_NAME)
4225         {
4226           op = get_iv (data, op)->base;
4227           fd_ivopts_data = data;
4228           walk_tree (&op, find_depends, &depends_on, NULL);
4229         }
4230     }
4231
4232   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4233   return cost != INFTY;
4234 }
4235
4236 /* Determines cost of basing replacement of USE on CAND.  Returns false
4237    if USE cannot be based on CAND.  */
4238
4239 static bool
4240 determine_use_iv_cost (struct ivopts_data *data,
4241                        struct iv_use *use, struct iv_cand *cand)
4242 {
4243   switch (use->type)
4244     {
4245     case USE_NONLINEAR_EXPR:
4246       return determine_use_iv_cost_generic (data, use, cand);
4247
4248     case USE_ADDRESS:
4249       return determine_use_iv_cost_address (data, use, cand);
4250
4251     case USE_COMPARE:
4252       return determine_use_iv_cost_condition (data, use, cand);
4253
4254     default:
4255       gcc_unreachable ();
4256     }
4257 }
4258
4259 /* Determines costs of basing the use of the iv on an iv candidate.  */
4260
4261 static void
4262 determine_use_iv_costs (struct ivopts_data *data)
4263 {
4264   unsigned i, j;
4265   struct iv_use *use;
4266   struct iv_cand *cand;
4267   bitmap to_clear = BITMAP_ALLOC (NULL);
4268
4269   alloc_use_cost_map (data);
4270
4271   for (i = 0; i < n_iv_uses (data); i++)
4272     {
4273       use = iv_use (data, i);
4274
4275       if (data->consider_all_candidates)
4276         {
4277           for (j = 0; j < n_iv_cands (data); j++)
4278             {
4279               cand = iv_cand (data, j);
4280               determine_use_iv_cost (data, use, cand);
4281             }
4282         }
4283       else
4284         {
4285           bitmap_iterator bi;
4286
4287           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4288             {
4289               cand = iv_cand (data, j);
4290               if (!determine_use_iv_cost (data, use, cand))
4291                 bitmap_set_bit (to_clear, j);
4292             }
4293
4294           /* Remove the candidates for that the cost is infinite from
4295              the list of related candidates.  */
4296           bitmap_and_compl_into (use->related_cands, to_clear);
4297           bitmap_clear (to_clear);
4298         }
4299     }
4300
4301   BITMAP_FREE (to_clear);
4302
4303   if (dump_file && (dump_flags & TDF_DETAILS))
4304     {
4305       fprintf (dump_file, "Use-candidate costs:\n");
4306
4307       for (i = 0; i < n_iv_uses (data); i++)
4308         {
4309           use = iv_use (data, i);
4310
4311           fprintf (dump_file, "Use %d:\n", i);
4312           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4313           for (j = 0; j < use->n_map_members; j++)
4314             {
4315               if (!use->cost_map[j].cand
4316                   || use->cost_map[j].cost == INFTY)
4317                 continue;
4318
4319               fprintf (dump_file, "  %d\t%d\t",
4320                        use->cost_map[j].cand->id,
4321                        use->cost_map[j].cost);
4322               if (use->cost_map[j].depends_on)
4323                 bitmap_print (dump_file,
4324                               use->cost_map[j].depends_on, "","");
4325               fprintf (dump_file, "\n");
4326             }
4327
4328           fprintf (dump_file, "\n");
4329         }
4330       fprintf (dump_file, "\n");
4331     }
4332 }
4333
4334 /* Determines cost of the candidate CAND.  */
4335
4336 static void
4337 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4338 {
4339   unsigned cost_base, cost_step;
4340   tree base;
4341
4342   if (!cand->iv)
4343     {
4344       cand->cost = 0;
4345       return;
4346     }
4347
4348   /* There are two costs associated with the candidate -- its increment
4349      and its initialization.  The second is almost negligible for any loop
4350      that rolls enough, so we take it just very little into account.  */
4351
4352   base = cand->iv->base;
4353   cost_base = force_var_cost (data, base, NULL);
4354   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4355
4356   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4357
4358   /* Prefer the original iv unless we may gain something by replacing it;
4359      this is not really relevant for artificial ivs created by other
4360      passes.  */
4361   if (cand->pos == IP_ORIGINAL
4362       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4363     cand->cost--;
4364
4365   /* Prefer not to insert statements into latch unless there are some
4366      already (so that we do not create unnecessary jumps).  */
4367   if (cand->pos == IP_END
4368       && empty_block_p (ip_end_pos (data->current_loop)))
4369     cand->cost++;
4370 }
4371
4372 /* Determines costs of computation of the candidates.  */
4373
4374 static void
4375 determine_iv_costs (struct ivopts_data *data)
4376 {
4377   unsigned i;
4378
4379   if (dump_file && (dump_flags & TDF_DETAILS))
4380     {
4381       fprintf (dump_file, "Candidate costs:\n");
4382       fprintf (dump_file, "  cand\tcost\n");
4383     }
4384
4385   for (i = 0; i < n_iv_cands (data); i++)
4386     {
4387       struct iv_cand *cand = iv_cand (data, i);
4388
4389       determine_iv_cost (data, cand);
4390
4391       if (dump_file && (dump_flags & TDF_DETAILS))
4392         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4393     }
4394
4395 if (dump_file && (dump_flags & TDF_DETAILS))
4396       fprintf (dump_file, "\n");
4397 }
4398
4399 /* Calculates cost for having SIZE induction variables.  */
4400
4401 static unsigned
4402 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4403 {
4404   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4405 }
4406
4407 /* For each size of the induction variable set determine the penalty.  */
4408
4409 static void
4410 determine_set_costs (struct ivopts_data *data)
4411 {
4412   unsigned j, n;
4413   tree phi, op;
4414   struct loop *loop = data->current_loop;
4415   bitmap_iterator bi;
4416
4417   /* We use the following model (definitely improvable, especially the
4418      cost function -- TODO):
4419
4420      We estimate the number of registers available (using MD data), name it A.
4421
4422      We estimate the number of registers used by the loop, name it U.  This
4423      number is obtained as the number of loop phi nodes (not counting virtual
4424      registers and bivs) + the number of variables from outside of the loop.
4425
4426      We set a reserve R (free regs that are used for temporary computations,
4427      etc.).  For now the reserve is a constant 3.
4428
4429      Let I be the number of induction variables.
4430
4431      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4432         make a lot of ivs without a reason).
4433      -- if A - R < U + I <= A, the cost is I * PRES_COST
4434      -- if U + I > A, the cost is I * PRES_COST and
4435         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4436
4437   if (dump_file && (dump_flags & TDF_DETAILS))
4438     {
4439       fprintf (dump_file, "Global costs:\n");
4440       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4441       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4442       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4443       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4444     }
4445
4446   n = 0;
4447   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4448     {
4449       op = PHI_RESULT (phi);
4450
4451       if (!is_gimple_reg (op))
4452         continue;
4453
4454       if (get_iv (data, op))
4455         continue;
4456
4457       n++;
4458     }
4459
4460   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4461     {
4462       struct version_info *info = ver_info (data, j);
4463
4464       if (info->inv_id && info->has_nonlin_use)
4465         n++;
4466     }
4467
4468   data->regs_used = n;
4469   if (dump_file && (dump_flags & TDF_DETAILS))
4470     fprintf (dump_file, "  regs_used %d\n", n);
4471
4472   if (dump_file && (dump_flags & TDF_DETAILS))
4473     {
4474       fprintf (dump_file, "  cost for size:\n");
4475       fprintf (dump_file, "  ivs\tcost\n");
4476       for (j = 0; j <= 2 * target_avail_regs; j++)
4477         fprintf (dump_file, "  %d\t%d\n", j,
4478                  ivopts_global_cost_for_size (data, j));
4479       fprintf (dump_file, "\n");
4480     }
4481 }
4482
4483 /* Returns true if A is a cheaper cost pair than B.  */
4484
4485 static bool
4486 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4487 {
4488   if (!a)
4489     return false;
4490
4491   if (!b)
4492     return true;
4493
4494   if (a->cost < b->cost)
4495     return true;
4496
4497   if (a->cost > b->cost)
4498     return false;
4499
4500   /* In case the costs are the same, prefer the cheaper candidate.  */
4501   if (a->cand->cost < b->cand->cost)
4502     return true;
4503
4504   return false;
4505 }
4506
4507 /* Computes the cost field of IVS structure.  */
4508
4509 static void
4510 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4511 {
4512   unsigned cost = 0;
4513
4514   cost += ivs->cand_use_cost;
4515   cost += ivs->cand_cost;
4516   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4517
4518   ivs->cost = cost;
4519 }
4520
4521 /* Remove invariants in set INVS to set IVS.  */
4522
4523 static void
4524 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4525 {
4526   bitmap_iterator bi;
4527   unsigned iid;
4528
4529   if (!invs)
4530     return;
4531
4532   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4533     {
4534       ivs->n_invariant_uses[iid]--;
4535       if (ivs->n_invariant_uses[iid] == 0)
4536         ivs->n_regs--;
4537     }
4538 }
4539
4540 /* Set USE not to be expressed by any candidate in IVS.  */
4541
4542 static void
4543 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4544                  struct iv_use *use)
4545 {
4546   unsigned uid = use->id, cid;
4547   struct cost_pair *cp;
4548
4549   cp = ivs->cand_for_use[uid];
4550   if (!cp)
4551     return;
4552   cid = cp->cand->id;
4553
4554   ivs->bad_uses++;
4555   ivs->cand_for_use[uid] = NULL;
4556   ivs->n_cand_uses[cid]--;
4557
4558   if (ivs->n_cand_uses[cid] == 0)
4559     {
4560       bitmap_clear_bit (ivs->cands, cid);
4561       /* Do not count the pseudocandidates.  */
4562       if (cp->cand->iv)
4563         ivs->n_regs--;
4564       ivs->n_cands--;
4565       ivs->cand_cost -= cp->cand->cost;
4566
4567       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4568     }
4569
4570   ivs->cand_use_cost -= cp->cost;
4571
4572   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4573   iv_ca_recount_cost (data, ivs);
4574 }
4575
4576 /* Add invariants in set INVS to set IVS.  */
4577
4578 static void
4579 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4580 {
4581   bitmap_iterator bi;
4582   unsigned iid;
4583
4584   if (!invs)
4585     return;
4586
4587   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4588     {
4589       ivs->n_invariant_uses[iid]++;
4590       if (ivs->n_invariant_uses[iid] == 1)
4591         ivs->n_regs++;
4592     }
4593 }
4594
4595 /* Set cost pair for USE in set IVS to CP.  */
4596
4597 static void
4598 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4599               struct iv_use *use, struct cost_pair *cp)
4600 {
4601   unsigned uid = use->id, cid;
4602
4603   if (ivs->cand_for_use[uid] == cp)
4604     return;
4605
4606   if (ivs->cand_for_use[uid])
4607     iv_ca_set_no_cp (data, ivs, use);
4608
4609   if (cp)
4610     {
4611       cid = cp->cand->id;
4612
4613       ivs->bad_uses--;
4614       ivs->cand_for_use[uid] = cp;
4615       ivs->n_cand_uses[cid]++;
4616       if (ivs->n_cand_uses[cid] == 1)
4617         {
4618           bitmap_set_bit (ivs->cands, cid);
4619           /* Do not count the pseudocandidates.  */
4620           if (cp->cand->iv)
4621             ivs->n_regs++;
4622           ivs->n_cands++;
4623           ivs->cand_cost += cp->cand->cost;
4624
4625           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4626         }
4627
4628       ivs->cand_use_cost += cp->cost;
4629       iv_ca_set_add_invariants (ivs, cp->depends_on);
4630       iv_ca_recount_cost (data, ivs);
4631     }
4632 }
4633
4634 /* Extend set IVS by expressing USE by some of the candidates in it
4635    if possible.  */
4636
4637 static void
4638 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4639                struct iv_use *use)
4640 {
4641   struct cost_pair *best_cp = NULL, *cp;
4642   bitmap_iterator bi;
4643   unsigned i;
4644
4645   gcc_assert (ivs->upto >= use->id);
4646
4647   if (ivs->upto == use->id)
4648     {
4649       ivs->upto++;
4650       ivs->bad_uses++;
4651     }
4652
4653   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4654     {
4655       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4656
4657       if (cheaper_cost_pair (cp, best_cp))
4658         best_cp = cp;
4659     }
4660
4661   iv_ca_set_cp (data, ivs, use, best_cp);
4662 }
4663
4664 /* Get cost for assignment IVS.  */
4665
4666 static unsigned
4667 iv_ca_cost (struct iv_ca *ivs)
4668 {
4669   return (ivs->bad_uses ? INFTY : ivs->cost);
4670 }
4671
4672 /* Returns true if all dependences of CP are among invariants in IVS.  */
4673
4674 static bool
4675 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4676 {
4677   unsigned i;
4678   bitmap_iterator bi;
4679
4680   if (!cp->depends_on)
4681     return true;
4682
4683   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4684     {
4685       if (ivs->n_invariant_uses[i] == 0)
4686         return false;
4687     }
4688
4689   return true;
4690 }
4691
4692 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4693    it before NEXT_CHANGE.  */
4694
4695 static struct iv_ca_delta *
4696 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4697                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4698 {
4699   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4700
4701   change->use = use;
4702   change->old_cp = old_cp;
4703   change->new_cp = new_cp;
4704   change->next_change = next_change;
4705
4706   return change;
4707 }
4708
4709 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4710    are rewritten.  */
4711
4712 static struct iv_ca_delta *
4713 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4714 {
4715   struct iv_ca_delta *last;
4716
4717   if (!l2)
4718     return l1;
4719
4720   if (!l1)
4721     return l2;
4722
4723   for (last = l1; last->next_change; last = last->next_change)
4724     continue;
4725   last->next_change = l2;
4726
4727   return l1;
4728 }
4729
4730 /* Returns candidate by that USE is expressed in IVS.  */
4731
4732 static struct cost_pair *
4733 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4734 {
4735   return ivs->cand_for_use[use->id];
4736 }
4737
4738 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4739
4740 static struct iv_ca_delta *
4741 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4742 {
4743   struct iv_ca_delta *act, *next, *prev = NULL;
4744   struct cost_pair *tmp;
4745
4746   for (act = delta; act; act = next)
4747     {
4748       next = act->next_change;
4749       act->next_change = prev;
4750       prev = act;
4751
4752       tmp = act->old_cp;
4753       act->old_cp = act->new_cp;
4754       act->new_cp = tmp;
4755     }
4756
4757   return prev;
4758 }
4759
4760 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4761    reverted instead.  */
4762
4763 static void
4764 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4765                     struct iv_ca_delta *delta, bool forward)
4766 {
4767   struct cost_pair *from, *to;
4768   struct iv_ca_delta *act;
4769
4770   if (!forward)
4771     delta = iv_ca_delta_reverse (delta);
4772
4773   for (act = delta; act; act = act->next_change)
4774     {
4775       from = act->old_cp;
4776       to = act->new_cp;
4777       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4778       iv_ca_set_cp (data, ivs, act->use, to);
4779     }
4780
4781   if (!forward)
4782     iv_ca_delta_reverse (delta);
4783 }
4784
4785 /* Returns true if CAND is used in IVS.  */
4786
4787 static bool
4788 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4789 {
4790   return ivs->n_cand_uses[cand->id] > 0;
4791 }
4792
4793 /* Returns number of induction variable candidates in the set IVS.  */
4794
4795 static unsigned
4796 iv_ca_n_cands (struct iv_ca *ivs)
4797 {
4798   return ivs->n_cands;
4799 }
4800
4801 /* Free the list of changes DELTA.  */
4802
4803 static void
4804 iv_ca_delta_free (struct iv_ca_delta **delta)
4805 {
4806   struct iv_ca_delta *act, *next;
4807
4808   for (act = *delta; act; act = next)
4809     {
4810       next = act->next_change;
4811       free (act);
4812     }
4813
4814   *delta = NULL;
4815 }
4816
4817 /* Allocates new iv candidates assignment.  */
4818
4819 static struct iv_ca *
4820 iv_ca_new (struct ivopts_data *data)
4821 {
4822   struct iv_ca *nw = XNEW (struct iv_ca);
4823
4824   nw->upto = 0;
4825   nw->bad_uses = 0;
4826   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4827   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4828   nw->cands = BITMAP_ALLOC (NULL);
4829   nw->n_cands = 0;
4830   nw->n_regs = 0;
4831   nw->cand_use_cost = 0;
4832   nw->cand_cost = 0;
4833   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4834   nw->cost = 0;
4835
4836   return nw;
4837 }
4838
4839 /* Free memory occupied by the set IVS.  */
4840
4841 static void
4842 iv_ca_free (struct iv_ca **ivs)
4843 {
4844   free ((*ivs)->cand_for_use);
4845   free ((*ivs)->n_cand_uses);
4846   BITMAP_FREE ((*ivs)->cands);
4847   free ((*ivs)->n_invariant_uses);
4848   free (*ivs);
4849   *ivs = NULL;
4850 }
4851
4852 /* Dumps IVS to FILE.  */
4853
4854 static void
4855 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4856 {
4857   const char *pref = "  invariants ";
4858   unsigned i;
4859
4860   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4861   bitmap_print (file, ivs->cands, "  candidates ","\n");
4862
4863   for (i = 1; i <= data->max_inv_id; i++)
4864     if (ivs->n_invariant_uses[i])
4865       {
4866         fprintf (file, "%s%d", pref, i);
4867         pref = ", ";
4868       }
4869   fprintf (file, "\n");
4870 }
4871
4872 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4873    new set, and store differences in DELTA.  Number of induction variables
4874    in the new set is stored to N_IVS.  */
4875
4876 static unsigned
4877 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4878               struct iv_cand *cand, struct iv_ca_delta **delta,
4879               unsigned *n_ivs)
4880 {
4881   unsigned i, cost;
4882   struct iv_use *use;
4883   struct cost_pair *old_cp, *new_cp;
4884
4885   *delta = NULL;
4886   for (i = 0; i < ivs->upto; i++)
4887     {
4888       use = iv_use (data, i);
4889       old_cp = iv_ca_cand_for_use (ivs, use);
4890
4891       if (old_cp
4892           && old_cp->cand == cand)
4893         continue;
4894
4895       new_cp = get_use_iv_cost (data, use, cand);
4896       if (!new_cp)
4897         continue;
4898
4899       if (!iv_ca_has_deps (ivs, new_cp))
4900         continue;
4901
4902       if (!cheaper_cost_pair (new_cp, old_cp))
4903         continue;
4904
4905       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4906     }
4907
4908   iv_ca_delta_commit (data, ivs, *delta, true);
4909   cost = iv_ca_cost (ivs);
4910   if (n_ivs)
4911     *n_ivs = iv_ca_n_cands (ivs);
4912   iv_ca_delta_commit (data, ivs, *delta, false);
4913
4914   return cost;
4915 }
4916
4917 /* Try narrowing set IVS by removing CAND.  Return the cost of
4918    the new set and store the differences in DELTA.  */
4919
4920 static unsigned
4921 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4922               struct iv_cand *cand, struct iv_ca_delta **delta)
4923 {
4924   unsigned i, ci;
4925   struct iv_use *use;
4926   struct cost_pair *old_cp, *new_cp, *cp;
4927   bitmap_iterator bi;
4928   struct iv_cand *cnd;
4929   unsigned cost;
4930
4931   *delta = NULL;
4932   for (i = 0; i < n_iv_uses (data); i++)
4933     {
4934       use = iv_use (data, i);
4935
4936       old_cp = iv_ca_cand_for_use (ivs, use);
4937       if (old_cp->cand != cand)
4938         continue;
4939
4940       new_cp = NULL;
4941
4942       if (data->consider_all_candidates)
4943         {
4944           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4945             {
4946               if (ci == cand->id)
4947                 continue;
4948
4949               cnd = iv_cand (data, ci);
4950
4951               cp = get_use_iv_cost (data, use, cnd);
4952               if (!cp)
4953                 continue;
4954               if (!iv_ca_has_deps (ivs, cp))
4955                 continue;
4956
4957               if (!cheaper_cost_pair (cp, new_cp))
4958                 continue;
4959
4960               new_cp = cp;
4961             }
4962         }
4963       else
4964         {
4965           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4966             {
4967               if (ci == cand->id)
4968                 continue;
4969
4970               cnd = iv_cand (data, ci);
4971
4972               cp = get_use_iv_cost (data, use, cnd);
4973               if (!cp)
4974                 continue;
4975               if (!iv_ca_has_deps (ivs, cp))
4976                 continue;
4977
4978               if (!cheaper_cost_pair (cp, new_cp))
4979                 continue;
4980
4981               new_cp = cp;
4982             }
4983         }
4984
4985       if (!new_cp)
4986         {
4987           iv_ca_delta_free (delta);
4988           return INFTY;
4989         }
4990
4991       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4992     }
4993
4994   iv_ca_delta_commit (data, ivs, *delta, true);
4995   cost = iv_ca_cost (ivs);
4996   iv_ca_delta_commit (data, ivs, *delta, false);
4997
4998   return cost;
4999 }
5000
5001 /* Try optimizing the set of candidates IVS by removing candidates different
5002    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5003    differences in DELTA.  */
5004
5005 static unsigned
5006 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5007              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5008 {
5009   bitmap_iterator bi;
5010   struct iv_ca_delta *act_delta, *best_delta;
5011   unsigned i, best_cost, acost;
5012   struct iv_cand *cand;
5013
5014   best_delta = NULL;
5015   best_cost = iv_ca_cost (ivs);
5016
5017   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5018     {
5019       cand = iv_cand (data, i);
5020
5021       if (cand == except_cand)
5022         continue;
5023
5024       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5025
5026       if (acost < best_cost)
5027         {
5028           best_cost = acost;
5029           iv_ca_delta_free (&best_delta);
5030           best_delta = act_delta;
5031         }
5032       else
5033         iv_ca_delta_free (&act_delta);
5034     }
5035
5036   if (!best_delta)
5037     {
5038       *delta = NULL;
5039       return best_cost;
5040     }
5041
5042   /* Recurse to possibly remove other unnecessary ivs.  */
5043   iv_ca_delta_commit (data, ivs, best_delta, true);
5044   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5045   iv_ca_delta_commit (data, ivs, best_delta, false);
5046   *delta = iv_ca_delta_join (best_delta, *delta);
5047   return best_cost;
5048 }
5049
5050 /* Tries to extend the sets IVS in the best possible way in order
5051    to express the USE.  */
5052
5053 static bool
5054 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5055                   struct iv_use *use)
5056 {
5057   unsigned best_cost, act_cost;
5058   unsigned i;
5059   bitmap_iterator bi;
5060   struct iv_cand *cand;
5061   struct iv_ca_delta *best_delta = NULL, *act_delta;
5062   struct cost_pair *cp;
5063
5064   iv_ca_add_use (data, ivs, use);
5065   best_cost = iv_ca_cost (ivs);
5066
5067   cp = iv_ca_cand_for_use (ivs, use);
5068   if (cp)
5069     {
5070       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5071       iv_ca_set_no_cp (data, ivs, use);
5072     }
5073
5074   /* First try important candidates.  Only if it fails, try the specific ones.
5075      Rationale -- in loops with many variables the best choice often is to use
5076      just one generic biv.  If we added here many ivs specific to the uses,
5077      the optimization algorithm later would be likely to get stuck in a local
5078      minimum, thus causing us to create too many ivs.  The approach from
5079      few ivs to more seems more likely to be successful -- starting from few
5080      ivs, replacing an expensive use by a specific iv should always be a
5081      win.  */
5082   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5083     {
5084       cand = iv_cand (data, i);
5085
5086       if (iv_ca_cand_used_p (ivs, cand))
5087         continue;
5088
5089       cp = get_use_iv_cost (data, use, cand);
5090       if (!cp)
5091         continue;
5092
5093       iv_ca_set_cp (data, ivs, use, cp);
5094       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5095       iv_ca_set_no_cp (data, ivs, use);
5096       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5097
5098       if (act_cost < best_cost)
5099         {
5100           best_cost = act_cost;
5101
5102           iv_ca_delta_free (&best_delta);
5103           best_delta = act_delta;
5104         }
5105       else
5106         iv_ca_delta_free (&act_delta);
5107     }
5108
5109   if (best_cost == INFTY)
5110     {
5111       for (i = 0; i < use->n_map_members; i++)
5112         {
5113           cp = use->cost_map + i;
5114           cand = cp->cand;
5115           if (!cand)
5116             continue;
5117
5118           /* Already tried this.  */
5119           if (cand->important)
5120             continue;
5121
5122           if (iv_ca_cand_used_p (ivs, cand))
5123             continue;
5124
5125           act_delta = NULL;
5126           iv_ca_set_cp (data, ivs, use, cp);
5127           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5128           iv_ca_set_no_cp (data, ivs, use);
5129           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5130                                        cp, act_delta);
5131
5132           if (act_cost < best_cost)
5133             {
5134               best_cost = act_cost;
5135
5136               if (best_delta)
5137                 iv_ca_delta_free (&best_delta);
5138               best_delta = act_delta;
5139             }
5140           else
5141             iv_ca_delta_free (&act_delta);
5142         }
5143     }
5144
5145   iv_ca_delta_commit (data, ivs, best_delta, true);
5146   iv_ca_delta_free (&best_delta);
5147
5148   return (best_cost != INFTY);
5149 }
5150
5151 /* Finds an initial assignment of candidates to uses.  */
5152
5153 static struct iv_ca *
5154 get_initial_solution (struct ivopts_data *data)
5155 {
5156   struct iv_ca *ivs = iv_ca_new (data);
5157   unsigned i;
5158
5159   for (i = 0; i < n_iv_uses (data); i++)
5160     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5161       {
5162         iv_ca_free (&ivs);
5163         return NULL;
5164       }
5165
5166   return ivs;
5167 }
5168
5169 /* Tries to improve set of induction variables IVS.  */
5170
5171 static bool
5172 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5173 {
5174   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5175   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5176   struct iv_cand *cand;
5177
5178   /* Try extending the set of induction variables by one.  */
5179   for (i = 0; i < n_iv_cands (data); i++)
5180     {
5181       cand = iv_cand (data, i);
5182
5183       if (iv_ca_cand_used_p (ivs, cand))
5184         continue;
5185
5186       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5187       if (!act_delta)
5188         continue;
5189
5190       /* If we successfully added the candidate and the set is small enough,
5191          try optimizing it by removing other candidates.  */
5192       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5193         {
5194           iv_ca_delta_commit (data, ivs, act_delta, true);
5195           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5196           iv_ca_delta_commit (data, ivs, act_delta, false);
5197           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5198         }
5199
5200       if (acost < best_cost)
5201         {
5202           best_cost = acost;
5203           iv_ca_delta_free (&best_delta);
5204           best_delta = act_delta;
5205         }
5206       else
5207         iv_ca_delta_free (&act_delta);
5208     }
5209
5210   if (!best_delta)
5211     {
5212       /* Try removing the candidates from the set instead.  */
5213       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5214
5215       /* Nothing more we can do.  */
5216       if (!best_delta)
5217         return false;
5218     }
5219
5220   iv_ca_delta_commit (data, ivs, best_delta, true);
5221   gcc_assert (best_cost == iv_ca_cost (ivs));
5222   iv_ca_delta_free (&best_delta);
5223   return true;
5224 }
5225
5226 /* Attempts to find the optimal set of induction variables.  We do simple
5227    greedy heuristic -- we try to replace at most one candidate in the selected
5228    solution and remove the unused ivs while this improves the cost.  */
5229
5230 static struct iv_ca *
5231 find_optimal_iv_set (struct ivopts_data *data)
5232 {
5233   unsigned i;
5234   struct iv_ca *set;
5235   struct iv_use *use;
5236
5237   /* Get the initial solution.  */
5238   set = get_initial_solution (data);
5239   if (!set)
5240     {
5241       if (dump_file && (dump_flags & TDF_DETAILS))
5242         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5243       return NULL;
5244     }
5245
5246   if (dump_file && (dump_flags & TDF_DETAILS))
5247     {
5248       fprintf (dump_file, "Initial set of candidates:\n");
5249       iv_ca_dump (data, dump_file, set);
5250     }
5251
5252   while (try_improve_iv_set (data, set))
5253     {
5254       if (dump_file && (dump_flags & TDF_DETAILS))
5255         {
5256           fprintf (dump_file, "Improved to:\n");
5257           iv_ca_dump (data, dump_file, set);
5258         }
5259     }
5260
5261   if (dump_file && (dump_flags & TDF_DETAILS))
5262     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5263
5264   for (i = 0; i < n_iv_uses (data); i++)
5265     {
5266       use = iv_use (data, i);
5267       use->selected = iv_ca_cand_for_use (set, use)->cand;
5268     }
5269
5270   return set;
5271 }
5272
5273 /* Creates a new induction variable corresponding to CAND.  */
5274
5275 static void
5276 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5277 {
5278   block_stmt_iterator incr_pos;
5279   tree base;
5280   bool after = false;
5281
5282   if (!cand->iv)
5283     return;
5284
5285   switch (cand->pos)
5286     {
5287     case IP_NORMAL:
5288       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5289       break;
5290
5291     case IP_END:
5292       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5293       after = true;
5294       break;
5295
5296     case IP_ORIGINAL:
5297       /* Mark that the iv is preserved.  */
5298       name_info (data, cand->var_before)->preserve_biv = true;
5299       name_info (data, cand->var_after)->preserve_biv = true;
5300
5301       /* Rewrite the increment so that it uses var_before directly.  */
5302       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5303
5304       return;
5305     }
5306
5307   gimple_add_tmp_var (cand->var_before);
5308   add_referenced_var (cand->var_before);
5309
5310   base = unshare_expr (cand->iv->base);
5311
5312   create_iv (base, unshare_expr (cand->iv->step),
5313              cand->var_before, data->current_loop,
5314              &incr_pos, after, &cand->var_before, &cand->var_after);
5315 }
5316
5317 /* Creates new induction variables described in SET.  */
5318
5319 static void
5320 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5321 {
5322   unsigned i;
5323   struct iv_cand *cand;
5324   bitmap_iterator bi;
5325
5326   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5327     {
5328       cand = iv_cand (data, i);
5329       create_new_iv (data, cand);
5330     }
5331 }
5332
5333 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5334    is true, remove also the ssa name defined by the statement.  */
5335
5336 static void
5337 remove_statement (tree stmt, bool including_defined_name)
5338 {
5339   if (TREE_CODE (stmt) == PHI_NODE)
5340     {
5341       if (!including_defined_name)
5342         {
5343           /* Prevent the ssa name defined by the statement from being removed.  */
5344           SET_PHI_RESULT (stmt, NULL);
5345         }
5346       remove_phi_node (stmt, NULL_TREE);
5347     }
5348   else
5349     {
5350       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5351
5352       bsi_remove (&bsi, true);
5353     }
5354 }
5355
5356 /* Rewrites USE (definition of iv used in a nonlinear expression)
5357    using candidate CAND.  */
5358
5359 static void
5360 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5361                             struct iv_use *use, struct iv_cand *cand)
5362 {
5363   tree comp;
5364   tree op, stmts, tgt, ass;
5365   block_stmt_iterator bsi, pbsi;
5366
5367   /* An important special case -- if we are asked to express value of
5368      the original iv by itself, just exit; there is no need to
5369      introduce a new computation (that might also need casting the
5370      variable to unsigned and back).  */
5371   if (cand->pos == IP_ORIGINAL
5372       && cand->incremented_at == use->stmt)
5373     {
5374       tree step, ctype, utype;
5375       enum tree_code incr_code = PLUS_EXPR;
5376
5377       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5378       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5379
5380       step = cand->iv->step;
5381       ctype = TREE_TYPE (step);
5382       utype = TREE_TYPE (cand->var_after);
5383       if (TREE_CODE (step) == NEGATE_EXPR)
5384         {
5385           incr_code = MINUS_EXPR;
5386           step = TREE_OPERAND (step, 0);
5387         }
5388
5389       /* Check whether we may leave the computation unchanged.
5390          This is the case only if it does not rely on other
5391          computations in the loop -- otherwise, the computation
5392          we rely upon may be removed in remove_unused_ivs,
5393          thus leading to ICE.  */
5394       op = TREE_OPERAND (use->stmt, 1);
5395       if (TREE_CODE (op) == PLUS_EXPR
5396           || TREE_CODE (op) == MINUS_EXPR)
5397         {
5398           if (TREE_OPERAND (op, 0) == cand->var_before)
5399             op = TREE_OPERAND (op, 1);
5400           else if (TREE_CODE (op) == PLUS_EXPR
5401                    && TREE_OPERAND (op, 1) == cand->var_before)
5402             op = TREE_OPERAND (op, 0);
5403           else
5404             op = NULL_TREE;
5405         }
5406       else
5407         op = NULL_TREE;
5408
5409       if (op
5410           && (TREE_CODE (op) == INTEGER_CST
5411               || operand_equal_p (op, step, 0)))
5412         return;
5413
5414       /* Otherwise, add the necessary computations to express
5415          the iv.  */
5416       op = fold_convert (ctype, cand->var_before);
5417       comp = fold_convert (utype,
5418                            build2 (incr_code, ctype, op,
5419                                    unshare_expr (step)));
5420     }
5421   else
5422     comp = get_computation (data->current_loop, use, cand);
5423
5424   switch (TREE_CODE (use->stmt))
5425     {
5426     case PHI_NODE:
5427       tgt = PHI_RESULT (use->stmt);
5428
5429       /* If we should keep the biv, do not replace it.  */
5430       if (name_info (data, tgt)->preserve_biv)
5431         return;
5432
5433       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5434       while (!bsi_end_p (pbsi)
5435              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5436         {
5437           bsi = pbsi;
5438           bsi_next (&pbsi);
5439         }
5440       break;
5441
5442     case MODIFY_EXPR:
5443       tgt = TREE_OPERAND (use->stmt, 0);
5444       bsi = bsi_for_stmt (use->stmt);
5445       break;
5446
5447     default:
5448       gcc_unreachable ();
5449     }
5450
5451   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5452
5453   if (TREE_CODE (use->stmt) == PHI_NODE)
5454     {
5455       if (stmts)
5456         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5457       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5458       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5459       remove_statement (use->stmt, false);
5460       SSA_NAME_DEF_STMT (tgt) = ass;
5461     }
5462   else
5463     {
5464       if (stmts)
5465         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5466       TREE_OPERAND (use->stmt, 1) = op;
5467     }
5468 }
5469
5470 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5471    for_each_index.  */
5472
5473 static bool
5474 idx_remove_ssa_names (tree base, tree *idx,
5475                       void *data ATTRIBUTE_UNUSED)
5476 {
5477   tree *op;
5478
5479   if (TREE_CODE (*idx) == SSA_NAME)
5480     *idx = SSA_NAME_VAR (*idx);
5481
5482   if (TREE_CODE (base) == ARRAY_REF)
5483     {
5484       op = &TREE_OPERAND (base, 2);
5485       if (*op
5486           && TREE_CODE (*op) == SSA_NAME)
5487         *op = SSA_NAME_VAR (*op);
5488       op = &TREE_OPERAND (base, 3);
5489       if (*op
5490           && TREE_CODE (*op) == SSA_NAME)
5491         *op = SSA_NAME_VAR (*op);
5492     }
5493
5494   return true;
5495 }
5496
5497 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5498
5499 static tree
5500 unshare_and_remove_ssa_names (tree ref)
5501 {
5502   ref = unshare_expr (ref);
5503   for_each_index (&ref, idx_remove_ssa_names, NULL);
5504
5505   return ref;
5506 }
5507
5508 /* Extract the alias analysis info for the memory reference REF.  There are
5509    several ways how this information may be stored and what precisely is
5510    its semantics depending on the type of the reference, but there always is
5511    somewhere hidden one _DECL node that is used to determine the set of
5512    virtual operands for the reference.  The code below deciphers this jungle
5513    and extracts this single useful piece of information.  */
5514
5515 static tree
5516 get_ref_tag (tree ref, tree orig)
5517 {
5518   tree var = get_base_address (ref);
5519   tree aref = NULL_TREE, tag, sv;
5520   HOST_WIDE_INT offset, size, maxsize;
5521
5522   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5523     {
5524       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5525       if (ref)
5526         break;
5527     }
5528
5529   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5530     return unshare_expr (sv);
5531
5532   if (!var)
5533     return NULL_TREE;
5534
5535   if (TREE_CODE (var) == INDIRECT_REF)
5536     {
5537       /* If the base is a dereference of a pointer, first check its name memory
5538          tag.  If it does not have one, use its symbol memory tag.  */
5539       var = TREE_OPERAND (var, 0);
5540       if (TREE_CODE (var) != SSA_NAME)
5541         return NULL_TREE;
5542
5543       if (SSA_NAME_PTR_INFO (var))
5544         {
5545           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5546           if (tag)
5547             return tag;
5548         }
5549
5550       var = SSA_NAME_VAR (var);
5551       tag = var_ann (var)->symbol_mem_tag;
5552       gcc_assert (tag != NULL_TREE);
5553       return tag;
5554     }
5555   else
5556     {
5557       if (!DECL_P (var))
5558         return NULL_TREE;
5559
5560       tag = var_ann (var)->symbol_mem_tag;
5561       if (tag)
5562         return tag;
5563
5564       return var;
5565     }
5566 }
5567
5568 /* Copies the reference information from OLD_REF to NEW_REF.  */
5569
5570 static void
5571 copy_ref_info (tree new_ref, tree old_ref)
5572 {
5573   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5574     copy_mem_ref_info (new_ref, old_ref);
5575   else
5576     {
5577       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5578       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5579     }
5580 }
5581
5582 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5583
5584 static void
5585 rewrite_use_address (struct ivopts_data *data,
5586                      struct iv_use *use, struct iv_cand *cand)
5587 {
5588   struct affine_tree_combination aff;
5589   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5590   tree ref;
5591
5592   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5593   unshare_aff_combination (&aff);
5594
5595   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5596   copy_ref_info (ref, *use->op_p);
5597   *use->op_p = ref;
5598 }
5599
5600 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5601    candidate CAND.  */
5602
5603 static void
5604 rewrite_use_compare (struct ivopts_data *data,
5605                      struct iv_use *use, struct iv_cand *cand)
5606 {
5607   tree comp;
5608   tree *op_p, cond, op, stmts, bound;
5609   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5610   enum tree_code compare;
5611   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5612
5613   bound = cp->value;
5614   if (bound)
5615     {
5616       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5617       tree var_type = TREE_TYPE (var);
5618
5619       compare = iv_elimination_compare (data, use);
5620       bound = fold_convert (var_type, bound);
5621       op = force_gimple_operand (unshare_expr (bound), &stmts,
5622                                  true, NULL_TREE);
5623
5624       if (stmts)
5625         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5626
5627       *use->op_p = build2 (compare, boolean_type_node, var, op);
5628       update_stmt (use->stmt);
5629       return;
5630     }
5631
5632   /* The induction variable elimination failed; just express the original
5633      giv.  */
5634   comp = get_computation (data->current_loop, use, cand);
5635
5636   cond = *use->op_p;
5637   op_p = &TREE_OPERAND (cond, 0);
5638   if (TREE_CODE (*op_p) != SSA_NAME
5639       || zero_p (get_iv (data, *op_p)->step))
5640     op_p = &TREE_OPERAND (cond, 1);
5641
5642   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5643   if (stmts)
5644     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5645
5646   *op_p = op;
5647 }
5648
5649 /* Rewrites USE using candidate CAND.  */
5650
5651 static void
5652 rewrite_use (struct ivopts_data *data,
5653              struct iv_use *use, struct iv_cand *cand)
5654 {
5655   switch (use->type)
5656     {
5657       case USE_NONLINEAR_EXPR:
5658         rewrite_use_nonlinear_expr (data, use, cand);
5659         break;
5660
5661       case USE_ADDRESS:
5662         rewrite_use_address (data, use, cand);
5663         break;
5664
5665       case USE_COMPARE:
5666         rewrite_use_compare (data, use, cand);
5667         break;
5668
5669       default:
5670         gcc_unreachable ();
5671     }
5672   mark_new_vars_to_rename (use->stmt);
5673 }
5674
5675 /* Rewrite the uses using the selected induction variables.  */
5676
5677 static void
5678 rewrite_uses (struct ivopts_data *data)
5679 {
5680   unsigned i;
5681   struct iv_cand *cand;
5682   struct iv_use *use;
5683
5684   for (i = 0; i < n_iv_uses (data); i++)
5685     {
5686       use = iv_use (data, i);
5687       cand = use->selected;
5688       gcc_assert (cand);
5689
5690       rewrite_use (data, use, cand);
5691     }
5692 }
5693
5694 /* Removes the ivs that are not used after rewriting.  */
5695
5696 static void
5697 remove_unused_ivs (struct ivopts_data *data)
5698 {
5699   unsigned j;
5700   bitmap_iterator bi;
5701
5702   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703     {
5704       struct version_info *info;
5705
5706       info = ver_info (data, j);
5707       if (info->iv
5708           && !zero_p (info->iv->step)
5709           && !info->inv_id
5710           && !info->iv->have_use_for
5711           && !info->preserve_biv)
5712         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5713     }
5714 }
5715
5716 /* Frees data allocated by the optimization of a single loop.  */
5717
5718 static void
5719 free_loop_data (struct ivopts_data *data)
5720 {
5721   unsigned i, j;
5722   bitmap_iterator bi;
5723   tree obj;
5724
5725   htab_empty (data->niters);
5726
5727   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5728     {
5729       struct version_info *info;
5730
5731       info = ver_info (data, i);
5732       if (info->iv)
5733         free (info->iv);
5734       info->iv = NULL;
5735       info->has_nonlin_use = false;
5736       info->preserve_biv = false;
5737       info->inv_id = 0;
5738     }
5739   bitmap_clear (data->relevant);
5740   bitmap_clear (data->important_candidates);
5741
5742   for (i = 0; i < n_iv_uses (data); i++)
5743     {
5744       struct iv_use *use = iv_use (data, i);
5745
5746       free (use->iv);
5747       BITMAP_FREE (use->related_cands);
5748       for (j = 0; j < use->n_map_members; j++)
5749         if (use->cost_map[j].depends_on)
5750           BITMAP_FREE (use->cost_map[j].depends_on);
5751       free (use->cost_map);
5752       free (use);
5753     }
5754   VEC_truncate (iv_use_p, data->iv_uses, 0);
5755
5756   for (i = 0; i < n_iv_cands (data); i++)
5757     {
5758       struct iv_cand *cand = iv_cand (data, i);
5759
5760       if (cand->iv)
5761         free (cand->iv);
5762       if (cand->depends_on)
5763         BITMAP_FREE (cand->depends_on);
5764       free (cand);
5765     }
5766   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5767
5768   if (data->version_info_size < num_ssa_names)
5769     {
5770       data->version_info_size = 2 * num_ssa_names;
5771       free (data->version_info);
5772       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5773     }
5774
5775   data->max_inv_id = 0;
5776
5777   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5778     SET_DECL_RTL (obj, NULL_RTX);
5779
5780   VEC_truncate (tree, decl_rtl_to_reset, 0);
5781 }
5782
5783 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5784    loop tree.  */
5785
5786 static void
5787 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5788 {
5789   free_loop_data (data);
5790   free (data->version_info);
5791   BITMAP_FREE (data->relevant);
5792   BITMAP_FREE (data->important_candidates);
5793   htab_delete (data->niters);
5794
5795   VEC_free (tree, heap, decl_rtl_to_reset);
5796   VEC_free (iv_use_p, heap, data->iv_uses);
5797   VEC_free (iv_cand_p, heap, data->iv_candidates);
5798 }
5799
5800 /* Optimizes the LOOP.  Returns true if anything changed.  */
5801
5802 static bool
5803 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5804 {
5805   bool changed = false;
5806   struct iv_ca *iv_ca;
5807   edge exit;
5808
5809   data->current_loop = loop;
5810
5811   if (dump_file && (dump_flags & TDF_DETAILS))
5812     {
5813       fprintf (dump_file, "Processing loop %d\n", loop->num);
5814
5815       exit = single_dom_exit (loop);
5816       if (exit)
5817         {
5818           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5819                    exit->src->index, exit->dest->index);
5820           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5821           fprintf (dump_file, "\n");
5822         }
5823
5824       fprintf (dump_file, "\n");
5825     }
5826
5827   /* For each ssa name determines whether it behaves as an induction variable
5828      in some loop.  */
5829   if (!find_induction_variables (data))
5830     goto finish;
5831
5832   /* Finds interesting uses (item 1).  */
5833   find_interesting_uses (data);
5834   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5835     goto finish;
5836
5837   /* Finds candidates for the induction variables (item 2).  */
5838   find_iv_candidates (data);
5839
5840   /* Calculates the costs (item 3, part 1).  */
5841   determine_use_iv_costs (data);
5842   determine_iv_costs (data);
5843   determine_set_costs (data);
5844
5845   /* Find the optimal set of induction variables (item 3, part 2).  */
5846   iv_ca = find_optimal_iv_set (data);
5847   if (!iv_ca)
5848     goto finish;
5849   changed = true;
5850
5851   /* Create the new induction variables (item 4, part 1).  */
5852   create_new_ivs (data, iv_ca);
5853   iv_ca_free (&iv_ca);
5854
5855   /* Rewrite the uses (item 4, part 2).  */
5856   rewrite_uses (data);
5857
5858   /* Remove the ivs that are unused after rewriting.  */
5859   remove_unused_ivs (data);
5860
5861   /* We have changed the structure of induction variables; it might happen
5862      that definitions in the scev database refer to some of them that were
5863      eliminated.  */
5864   scev_reset ();
5865
5866 finish:
5867   free_loop_data (data);
5868
5869   return changed;
5870 }
5871
5872 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5873
5874 void
5875 tree_ssa_iv_optimize (struct loops *loops)
5876 {
5877   struct loop *loop;
5878   struct ivopts_data data;
5879
5880   tree_ssa_iv_optimize_init (&data);
5881
5882   /* Optimize the loops starting with the innermost ones.  */
5883   loop = loops->tree_root;
5884   while (loop->inner)
5885     loop = loop->inner;
5886
5887   /* Scan the loops, inner ones first.  */
5888   while (loop != loops->tree_root)
5889     {
5890       if (dump_file && (dump_flags & TDF_DETAILS))
5891         flow_loop_dump (loop, dump_file, NULL, 1);
5892
5893       tree_ssa_iv_optimize_loop (&data, loop);
5894
5895       if (loop->next)
5896         {
5897           loop = loop->next;
5898           while (loop->inner)
5899             loop = loop->inner;
5900         }
5901       else
5902         loop = loop->outer;
5903     }
5904
5905   tree_ssa_iv_optimize_finalize (&data);
5906 }