gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter,
 716                                                      true);
 717       *slot = nfe_desc;
 718     }
 719   else
 720     nfe_desc = *slot;
 721
 722   if (!nfe_desc->valid_p)
 723     return NULL;
 724
 725   return &nfe_desc->niter;
 726 }
 727
 728 /* Returns structure describing number of iterations determined from
 729    single dominating exit of DATA->current_loop, or NULL if something
 730    goes wrong.  */
 731
 732 static struct tree_niter_desc *
 733 niter_for_single_dom_exit (struct ivopts_data *data)
 734 {
 735   edge exit = single_dom_exit (data->current_loop);
 736
 737   if (!exit)
 738     return NULL;
 739
 740   return niter_for_exit (data, exit);
 741 }
 742
 743 /* Initializes data structures used by the iv optimization pass, stored
 744    in DATA.  LOOPS is the loop tree.  */
 745
 746 static void
 747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 748 {
 749   unsigned i;
 750
 751   data->version_info_size = 2 * num_ssa_names;
 752   data->version_info = xcalloc (data->version_info_size,
 753                                 sizeof (struct version_info));
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 758
 759   for (i = 1; i < loops->num; i++)
 760     if (loops->parray[i])
 761       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 762
 763   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 764   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 765   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 766 }
 767
 768 /* Returns a memory object to that EXPR points.  In case we are able to
 769    determine that it does not point to any such object, NULL is returned.  */
 770
 771 static tree
 772 determine_base_object (tree expr)
 773 {
 774   enum tree_code code = TREE_CODE (expr);
 775   tree base, obj, op0, op1;
 776
 777   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 778     return NULL_TREE;
 779
 780   switch (code)
 781     {
 782     case INTEGER_CST:
 783       return NULL_TREE;
 784
 785     case ADDR_EXPR:
 786       obj = TREE_OPERAND (expr, 0);
 787       base = get_base_address (obj);
 788
 789       if (!base)
 790         return expr;
 791
 792       if (TREE_CODE (base) == INDIRECT_REF)
 793         return determine_base_object (TREE_OPERAND (base, 0));
 794
 795       return fold_convert (ptr_type_node,
 796                            build_fold_addr_expr (base));
 797
 798     case PLUS_EXPR:
 799     case MINUS_EXPR:
 800       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 801       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 802
 803       if (!op1)
 804         return op0;
 805
 806       if (!op0)
 807         return (code == PLUS_EXPR
 808                 ? op1
 809                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 810
 811       return fold_build2 (code, ptr_type_node, op0, op1);
 812
 813     case NOP_EXPR:
 814     case CONVERT_EXPR:
 815       return determine_base_object (TREE_OPERAND (expr, 0));
 816
 817     default:
 818       return fold_convert (ptr_type_node, expr);
 819     }
 820 }
 821
 822 /* Allocates an induction variable with given initial value BASE and step STEP
 823    for loop LOOP.  */
 824
 825 static struct iv *
 826 alloc_iv (tree base, tree step)
 827 {
 828   struct iv *iv = xcalloc (1, sizeof (struct iv));
 829
 830   if (step && integer_zerop (step))
 831     step = NULL_TREE;
 832
 833   iv->base = base;
 834   iv->base_object = determine_base_object (base);
 835   iv->step = step;
 836   iv->biv_p = false;
 837   iv->have_use_for = false;
 838   iv->use_id = 0;
 839   iv->ssa_name = NULL_TREE;
 840
 841   return iv;
 842 }
 843
 844 /* Sets STEP and BASE for induction variable IV.  */
 845
 846 static void
 847 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 848 {
 849   struct version_info *info = name_info (data, iv);
 850
 851   gcc_assert (!info->iv);
 852
 853   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 854   info->iv = alloc_iv (base, step);
 855   info->iv->ssa_name = iv;
 856 }
 857
 858 /* Finds induction variable declaration for VAR.  */
 859
 860 static struct iv *
 861 get_iv (struct ivopts_data *data, tree var)
 862 {
 863   basic_block bb;
 864
 865   if (!name_info (data, var)->iv)
 866     {
 867       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 868
 869       if (!bb
 870           || !flow_bb_inside_loop_p (data->current_loop, bb))
 871         set_iv (data, var, var, NULL_TREE);
 872     }
 873
 874   return name_info (data, var)->iv;
 875 }
 876
 877 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 878    not define a simple affine biv with nonzero step.  */
 879
 880 static tree
 881 determine_biv_step (tree phi)
 882 {
 883   struct loop *loop = bb_for_stmt (phi)->loop_father;
 884   tree name = PHI_RESULT (phi), base, step;
 885
 886   if (!is_gimple_reg (name))
 887     return NULL_TREE;
 888
 889   if (!simple_iv (loop, phi, name, &base, &step, true))
 890     return NULL_TREE;
 891
 892   if (zero_p (step))
 893     return NULL_TREE;
 894
 895   return step;
 896 }
 897
 898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 899
 900 static bool
 901 abnormal_ssa_name_p (tree exp)
 902 {
 903   if (!exp)
 904     return false;
 905
 906   if (TREE_CODE (exp) != SSA_NAME)
 907     return false;
 908
 909   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 910 }
 911
 912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 913    abnormal phi node.  Callback for for_each_index.  */
 914
 915 static bool
 916 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 917                                   void *data ATTRIBUTE_UNUSED)
 918 {
 919   if (TREE_CODE (base) == ARRAY_REF)
 920     {
 921       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 922         return false;
 923       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 924         return false;
 925     }
 926
 927   return !abnormal_ssa_name_p (*index);
 928 }
 929
 930 /* Returns true if EXPR contains a ssa name that occurs in an
 931    abnormal phi node.  */
 932
 933 static bool
 934 contains_abnormal_ssa_name_p (tree expr)
 935 {
 936   enum tree_code code;
 937   enum tree_code_class class;
 938
 939   if (!expr)
 940     return false;
 941
 942   code = TREE_CODE (expr);
 943   class = TREE_CODE_CLASS (code);
 944
 945   if (code == SSA_NAME)
 946     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 947
 948   if (code == INTEGER_CST
 949       || is_gimple_min_invariant (expr))
 950     return false;
 951
 952   if (code == ADDR_EXPR)
 953     return !for_each_index (&TREE_OPERAND (expr, 0),
 954                             idx_contains_abnormal_ssa_name_p,
 955                             NULL);
 956
 957   switch (class)
 958     {
 959     case tcc_binary:
 960     case tcc_comparison:
 961       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 962         return true;
 963
 964       /* Fallthru.  */
 965     case tcc_unary:
 966       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 967         return true;
 968
 969       break;
 970
 971     default:
 972       gcc_unreachable ();
 973     }
 974
 975   return false;
 976 }
 977
 978 /* Finds basic ivs.  */
 979
 980 static bool
 981 find_bivs (struct ivopts_data *data)
 982 {
 983   tree phi, step, type, base;
 984   bool found = false;
 985   struct loop *loop = data->current_loop;
 986
 987   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 988     {
 989       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 990         continue;
 991
 992       step = determine_biv_step (phi);
 993       if (!step)
 994         continue;
 995
 996       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 997       base = expand_simple_operations (base);
 998       if (contains_abnormal_ssa_name_p (base)
 999           || contains_abnormal_ssa_name_p (step))
1000         continue;
1001
1002       type = TREE_TYPE (PHI_RESULT (phi));
1003       base = fold_convert (type, base);
1004       if (step)
1005         step = fold_convert (type, step);
1006
1007       set_iv (data, PHI_RESULT (phi), base, step);
1008       found = true;
1009     }
1010
1011   return found;
1012 }
1013
1014 /* Marks basic ivs.  */
1015
1016 static void
1017 mark_bivs (struct ivopts_data *data)
1018 {
1019   tree phi, var;
1020   struct iv *iv, *incr_iv;
1021   struct loop *loop = data->current_loop;
1022   basic_block incr_bb;
1023
1024   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1025     {
1026       iv = get_iv (data, PHI_RESULT (phi));
1027       if (!iv)
1028         continue;
1029
1030       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1031       incr_iv = get_iv (data, var);
1032       if (!incr_iv)
1033         continue;
1034
1035       /* If the increment is in the subloop, ignore it.  */
1036       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1037       if (incr_bb->loop_father != data->current_loop
1038           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1039         continue;
1040
1041       iv->biv_p = true;
1042       incr_iv->biv_p = true;
1043     }
1044 }
1045
1046 /* Checks whether STMT defines a linear induction variable and stores its
1047    parameters to BASE and STEP.  */
1048
1049 static bool
1050 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1051                         tree *base, tree *step)
1052 {
1053   tree lhs;
1054   struct loop *loop = data->current_loop;
1055
1056   *base = NULL_TREE;
1057   *step = NULL_TREE;
1058
1059   if (TREE_CODE (stmt) != MODIFY_EXPR)
1060     return false;
1061
1062   lhs = TREE_OPERAND (stmt, 0);
1063   if (TREE_CODE (lhs) != SSA_NAME)
1064     return false;
1065
1066   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1067     return false;
1068   *base = expand_simple_operations (*base);
1069
1070   if (contains_abnormal_ssa_name_p (*base)
1071       || contains_abnormal_ssa_name_p (*step))
1072     return false;
1073
1074   return true;
1075 }
1076
1077 /* Finds general ivs in statement STMT.  */
1078
1079 static void
1080 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1081 {
1082   tree base, step;
1083
1084   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1085     return;
1086
1087   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1088 }
1089
1090 /* Finds general ivs in basic block BB.  */
1091
1092 static void
1093 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1094 {
1095   block_stmt_iterator bsi;
1096
1097   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1098     find_givs_in_stmt (data, bsi_stmt (bsi));
1099 }
1100
1101 /* Finds general ivs.  */
1102
1103 static void
1104 find_givs (struct ivopts_data *data)
1105 {
1106   struct loop *loop = data->current_loop;
1107   basic_block *body = get_loop_body_in_dom_order (loop);
1108   unsigned i;
1109
1110   for (i = 0; i < loop->num_nodes; i++)
1111     find_givs_in_bb (data, body[i]);
1112   free (body);
1113 }
1114
1115 /* For each ssa name defined in LOOP determines whether it is an induction
1116    variable and if so, its initial value and step.  */
1117
1118 static bool
1119 find_induction_variables (struct ivopts_data *data)
1120 {
1121   unsigned i;
1122   bitmap_iterator bi;
1123
1124   if (!find_bivs (data))
1125     return false;
1126
1127   find_givs (data);
1128   mark_bivs (data);
1129
1130   if (dump_file && (dump_flags & TDF_DETAILS))
1131     {
1132       struct tree_niter_desc *niter;
1133
1134       niter = niter_for_single_dom_exit (data);
1135
1136       if (niter)
1137         {
1138           fprintf (dump_file, "  number of iterations ");
1139           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1140           fprintf (dump_file, "\n");
1141
1142           fprintf (dump_file, "  may be zero if ");
1143           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1144           fprintf (dump_file, "\n");
1145           fprintf (dump_file, "\n");
1146         };
1147
1148       fprintf (dump_file, "Induction variables:\n\n");
1149
1150       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1151         {
1152           if (ver_info (data, i)->iv)
1153             dump_iv (dump_file, ver_info (data, i)->iv);
1154         }
1155     }
1156
1157   return true;
1158 }
1159
1160 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1161
1162 static struct iv_use *
1163 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1164             tree stmt, enum use_type use_type)
1165 {
1166   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1167
1168   use->id = n_iv_uses (data);
1169   use->type = use_type;
1170   use->iv = iv;
1171   use->stmt = stmt;
1172   use->op_p = use_p;
1173   use->related_cands = BITMAP_ALLOC (NULL);
1174
1175   /* To avoid showing ssa name in the dumps, if it was not reset by the
1176      caller.  */
1177   iv->ssa_name = NULL_TREE;
1178
1179   if (dump_file && (dump_flags & TDF_DETAILS))
1180     dump_use (dump_file, use);
1181
1182   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1183
1184   return use;
1185 }
1186
1187 /* Checks whether OP is a loop-level invariant and if so, records it.
1188    NONLINEAR_USE is true if the invariant is used in a way we do not
1189    handle specially.  */
1190
1191 static void
1192 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1193 {
1194   basic_block bb;
1195   struct version_info *info;
1196
1197   if (TREE_CODE (op) != SSA_NAME
1198       || !is_gimple_reg (op))
1199     return;
1200
1201   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1202   if (bb
1203       && flow_bb_inside_loop_p (data->current_loop, bb))
1204     return;
1205
1206   info = name_info (data, op);
1207   info->name = op;
1208   info->has_nonlin_use |= nonlinear_use;
1209   if (!info->inv_id)
1210     info->inv_id = ++data->max_inv_id;
1211   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1212 }
1213
1214 /* Checks whether the use OP is interesting and if so, records it
1215    as TYPE.  */
1216
1217 static struct iv_use *
1218 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1219                                        enum use_type type)
1220 {
1221   struct iv *iv;
1222   struct iv *civ;
1223   tree stmt;
1224   struct iv_use *use;
1225
1226   if (TREE_CODE (op) != SSA_NAME)
1227     return NULL;
1228
1229   iv = get_iv (data, op);
1230   if (!iv)
1231     return NULL;
1232
1233   if (iv->have_use_for)
1234     {
1235       use = iv_use (data, iv->use_id);
1236
1237       gcc_assert (use->type == USE_NONLINEAR_EXPR
1238                   || use->type == USE_OUTER);
1239
1240       if (type == USE_NONLINEAR_EXPR)
1241         use->type = USE_NONLINEAR_EXPR;
1242       return use;
1243     }
1244
1245   if (zero_p (iv->step))
1246     {
1247       record_invariant (data, op, true);
1248       return NULL;
1249     }
1250   iv->have_use_for = true;
1251
1252   civ = xmalloc (sizeof (struct iv));
1253   *civ = *iv;
1254
1255   stmt = SSA_NAME_DEF_STMT (op);
1256   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1257               || TREE_CODE (stmt) == MODIFY_EXPR);
1258
1259   use = record_use (data, NULL, civ, stmt, type);
1260   iv->use_id = use->id;
1261
1262   return use;
1263 }
1264
1265 /* Checks whether the use OP is interesting and if so, records it.  */
1266
1267 static struct iv_use *
1268 find_interesting_uses_op (struct ivopts_data *data, tree op)
1269 {
1270   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1271 }
1272
1273 /* Records a definition of induction variable OP that is used outside of the
1274    loop.  */
1275
1276 static struct iv_use *
1277 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1278 {
1279   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1280 }
1281
1282 /* Checks whether the condition *COND_P in STMT is interesting
1283    and if so, records it.  */
1284
1285 static void
1286 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1287 {
1288   tree *op0_p;
1289   tree *op1_p;
1290   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1291   struct iv const_iv;
1292   tree zero = integer_zero_node;
1293
1294   const_iv.step = NULL_TREE;
1295
1296   if (TREE_CODE (*cond_p) != SSA_NAME
1297       && !COMPARISON_CLASS_P (*cond_p))
1298     return;
1299
1300   if (TREE_CODE (*cond_p) == SSA_NAME)
1301     {
1302       op0_p = cond_p;
1303       op1_p = &zero;
1304     }
1305   else
1306     {
1307       op0_p = &TREE_OPERAND (*cond_p, 0);
1308       op1_p = &TREE_OPERAND (*cond_p, 1);
1309     }
1310
1311   if (TREE_CODE (*op0_p) == SSA_NAME)
1312     iv0 = get_iv (data, *op0_p);
1313   else
1314     iv0 = &const_iv;
1315
1316   if (TREE_CODE (*op1_p) == SSA_NAME)
1317     iv1 = get_iv (data, *op1_p);
1318   else
1319     iv1 = &const_iv;
1320
1321   if (/* When comparing with non-invariant value, we may not do any senseful
1322          induction variable elimination.  */
1323       (!iv0 || !iv1)
1324       /* Eliminating condition based on two ivs would be nontrivial.
1325          ??? TODO -- it is not really important to handle this case.  */
1326       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1327     {
1328       find_interesting_uses_op (data, *op0_p);
1329       find_interesting_uses_op (data, *op1_p);
1330       return;
1331     }
1332
1333   if (zero_p (iv0->step) && zero_p (iv1->step))
1334     {
1335       /* If both are invariants, this is a work for unswitching.  */
1336       return;
1337     }
1338
1339   civ = xmalloc (sizeof (struct iv));
1340   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1341   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1342 }
1343
1344 /* Returns true if expression EXPR is obviously invariant in LOOP,
1345    i.e. if all its operands are defined outside of the LOOP.  */
1346
1347 bool
1348 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1349 {
1350   basic_block def_bb;
1351   unsigned i, len;
1352
1353   if (is_gimple_min_invariant (expr))
1354     return true;
1355
1356   if (TREE_CODE (expr) == SSA_NAME)
1357     {
1358       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1359       if (def_bb
1360           && flow_bb_inside_loop_p (loop, def_bb))
1361         return false;
1362
1363       return true;
1364     }
1365
1366   if (!EXPR_P (expr))
1367     return false;
1368
1369   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1370   for (i = 0; i < len; i++)
1371     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1372       return false;
1373
1374   return true;
1375 }
1376
1377 /* Cumulates the steps of indices into DATA and replaces their values with the
1378    initial ones.  Returns false when the value of the index cannot be determined.
1379    Callback for for_each_index.  */
1380
1381 struct ifs_ivopts_data
1382 {
1383   struct ivopts_data *ivopts_data;
1384   tree stmt;
1385   tree *step_p;
1386 };
1387
1388 static bool
1389 idx_find_step (tree base, tree *idx, void *data)
1390 {
1391   struct ifs_ivopts_data *dta = data;
1392   struct iv *iv;
1393   tree step, iv_step, lbound, off;
1394   struct loop *loop = dta->ivopts_data->current_loop;
1395
1396   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1397       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1398     return false;
1399
1400   /* If base is a component ref, require that the offset of the reference
1401      be invariant.  */
1402   if (TREE_CODE (base) == COMPONENT_REF)
1403     {
1404       off = component_ref_field_offset (base);
1405       return expr_invariant_in_loop_p (loop, off);
1406     }
1407
1408   /* If base is array, first check whether we will be able to move the
1409      reference out of the loop (in order to take its address in strength
1410      reduction).  In order for this to work we need both lower bound
1411      and step to be loop invariants.  */
1412   if (TREE_CODE (base) == ARRAY_REF)
1413     {
1414       step = array_ref_element_size (base);
1415       lbound = array_ref_low_bound (base);
1416
1417       if (!expr_invariant_in_loop_p (loop, step)
1418           || !expr_invariant_in_loop_p (loop, lbound))
1419         return false;
1420     }
1421
1422   if (TREE_CODE (*idx) != SSA_NAME)
1423     return true;
1424
1425   iv = get_iv (dta->ivopts_data, *idx);
1426   if (!iv)
1427     return false;
1428
1429   *idx = iv->base;
1430
1431   if (!iv->step)
1432     return true;
1433
1434   if (TREE_CODE (base) == ARRAY_REF)
1435     {
1436       step = array_ref_element_size (base);
1437
1438       /* We only handle addresses whose step is an integer constant.  */
1439       if (TREE_CODE (step) != INTEGER_CST)
1440         return false;
1441     }
1442   else
1443     /* The step for pointer arithmetics already is 1 byte.  */
1444     step = build_int_cst (sizetype, 1);
1445
1446   iv_step = convert_step (dta->ivopts_data->current_loop,
1447                           sizetype, iv->base, iv->step, dta->stmt);
1448
1449   if (!iv_step)
1450     {
1451       /* The index might wrap.  */
1452       return false;
1453     }
1454
1455   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1456
1457   if (!*dta->step_p)
1458     *dta->step_p = step;
1459   else
1460     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1461
1462   return true;
1463 }
1464
1465 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1466    object is passed to it in DATA.  */
1467
1468 static bool
1469 idx_record_use (tree base, tree *idx,
1470                 void *data)
1471 {
1472   find_interesting_uses_op (data, *idx);
1473   if (TREE_CODE (base) == ARRAY_REF)
1474     {
1475       find_interesting_uses_op (data, array_ref_element_size (base));
1476       find_interesting_uses_op (data, array_ref_low_bound (base));
1477     }
1478   return true;
1479 }
1480
1481 /* Returns true if memory reference REF may be unaligned.  */
1482
1483 static bool
1484 may_be_unaligned_p (tree ref)
1485 {
1486   tree base;
1487   tree base_type;
1488   HOST_WIDE_INT bitsize;
1489   HOST_WIDE_INT bitpos;
1490   tree toffset;
1491   enum machine_mode mode;
1492   int unsignedp, volatilep;
1493   unsigned base_align;
1494
1495   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1496      thus they are not misaligned.  */
1497   if (TREE_CODE (ref) == TARGET_MEM_REF)
1498     return false;
1499
1500   /* The test below is basically copy of what expr.c:normal_inner_ref
1501      does to check whether the object must be loaded by parts when
1502      STRICT_ALIGNMENT is true.  */
1503   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1504                               &unsignedp, &volatilep, true);
1505   base_type = TREE_TYPE (base);
1506   base_align = TYPE_ALIGN (base_type);
1507
1508   if (mode != BLKmode
1509       && (base_align < GET_MODE_ALIGNMENT (mode)
1510           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1511           || bitpos % BITS_PER_UNIT != 0))
1512     return true;
1513
1514   return false;
1515 }
1516
1517 /* Finds addresses in *OP_P inside STMT.  */
1518
1519 static void
1520 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1521 {
1522   tree base = *op_p, step = NULL;
1523   struct iv *civ;
1524   struct ifs_ivopts_data ifs_ivopts_data;
1525
1526   /* Do not play with volatile memory references.  A bit too conservative,
1527      perhaps, but safe.  */
1528   if (stmt_ann (stmt)->has_volatile_ops)
1529     goto fail;
1530
1531   /* Ignore bitfields for now.  Not really something terribly complicated
1532      to handle.  TODO.  */
1533   if (TREE_CODE (base) == COMPONENT_REF
1534       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1535     goto fail;
1536
1537   if (STRICT_ALIGNMENT
1538       && may_be_unaligned_p (base))
1539     goto fail;
1540
1541   base = unshare_expr (base);
1542
1543   if (TREE_CODE (base) == TARGET_MEM_REF)
1544     {
1545       tree type = build_pointer_type (TREE_TYPE (base));
1546       tree astep;
1547
1548       if (TMR_BASE (base)
1549           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1550         {
1551           civ = get_iv (data, TMR_BASE (base));
1552           if (!civ)
1553             goto fail;
1554
1555           TMR_BASE (base) = civ->base;
1556           step = civ->step;
1557         }
1558       if (TMR_INDEX (base)
1559           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1560         {
1561           civ = get_iv (data, TMR_INDEX (base));
1562           if (!civ)
1563             goto fail;
1564
1565           TMR_INDEX (base) = civ->base;
1566           astep = civ->step;
1567
1568           if (astep)
1569             {
1570               if (TMR_STEP (base))
1571                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1572
1573               if (step)
1574                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1575               else
1576                 step = astep;
1577             }
1578         }
1579
1580       if (zero_p (step))
1581         goto fail;
1582       base = tree_mem_ref_addr (type, base);
1583     }
1584   else
1585     {
1586       ifs_ivopts_data.ivopts_data = data;
1587       ifs_ivopts_data.stmt = stmt;
1588       ifs_ivopts_data.step_p = &step;
1589       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1590           || zero_p (step))
1591         goto fail;
1592
1593       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1594       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1595
1596       base = build_fold_addr_expr (base);
1597     }
1598
1599   civ = alloc_iv (base, step);
1600   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1601   return;
1602
1603 fail:
1604   for_each_index (op_p, idx_record_use, data);
1605 }
1606
1607 /* Finds and records invariants used in STMT.  */
1608
1609 static void
1610 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1611 {
1612   ssa_op_iter iter;
1613   use_operand_p use_p;
1614   tree op;
1615
1616   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1617     {
1618       op = USE_FROM_PTR (use_p);
1619       record_invariant (data, op, false);
1620     }
1621 }
1622
1623 /* Finds interesting uses of induction variables in the statement STMT.  */
1624
1625 static void
1626 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1627 {
1628   struct iv *iv;
1629   tree op, lhs, rhs;
1630   ssa_op_iter iter;
1631   use_operand_p use_p;
1632
1633   find_invariants_stmt (data, stmt);
1634
1635   if (TREE_CODE (stmt) == COND_EXPR)
1636     {
1637       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1638       return;
1639     }
1640
1641   if (TREE_CODE (stmt) == MODIFY_EXPR)
1642     {
1643       lhs = TREE_OPERAND (stmt, 0);
1644       rhs = TREE_OPERAND (stmt, 1);
1645
1646       if (TREE_CODE (lhs) == SSA_NAME)
1647         {
1648           /* If the statement defines an induction variable, the uses are not
1649              interesting by themselves.  */
1650
1651           iv = get_iv (data, lhs);
1652
1653           if (iv && !zero_p (iv->step))
1654             return;
1655         }
1656
1657       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1658         {
1659         case tcc_comparison:
1660           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1661           return;
1662
1663         case tcc_reference:
1664           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1665           if (REFERENCE_CLASS_P (lhs))
1666             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1667           return;
1668
1669         default: ;
1670         }
1671
1672       if (REFERENCE_CLASS_P (lhs)
1673           && is_gimple_val (rhs))
1674         {
1675           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1676           find_interesting_uses_op (data, rhs);
1677           return;
1678         }
1679
1680       /* TODO -- we should also handle address uses of type
1681
1682          memory = call (whatever);
1683
1684          and
1685
1686          call (memory).  */
1687     }
1688
1689   if (TREE_CODE (stmt) == PHI_NODE
1690       && bb_for_stmt (stmt) == data->current_loop->header)
1691     {
1692       lhs = PHI_RESULT (stmt);
1693       iv = get_iv (data, lhs);
1694
1695       if (iv && !zero_p (iv->step))
1696         return;
1697     }
1698
1699   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1700     {
1701       op = USE_FROM_PTR (use_p);
1702
1703       if (TREE_CODE (op) != SSA_NAME)
1704         continue;
1705
1706       iv = get_iv (data, op);
1707       if (!iv)
1708         continue;
1709
1710       find_interesting_uses_op (data, op);
1711     }
1712 }
1713
1714 /* Finds interesting uses of induction variables outside of loops
1715    on loop exit edge EXIT.  */
1716
1717 static void
1718 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1719 {
1720   tree phi, def;
1721
1722   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1723     {
1724       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1725       find_interesting_uses_outer (data, def);
1726     }
1727 }
1728
1729 /* Finds uses of the induction variables that are interesting.  */
1730
1731 static void
1732 find_interesting_uses (struct ivopts_data *data)
1733 {
1734   basic_block bb;
1735   block_stmt_iterator bsi;
1736   tree phi;
1737   basic_block *body = get_loop_body (data->current_loop);
1738   unsigned i;
1739   struct version_info *info;
1740   edge e;
1741
1742   if (dump_file && (dump_flags & TDF_DETAILS))
1743     fprintf (dump_file, "Uses:\n\n");
1744
1745   for (i = 0; i < data->current_loop->num_nodes; i++)
1746     {
1747       edge_iterator ei;
1748       bb = body[i];
1749
1750       FOR_EACH_EDGE (e, ei, bb->succs)
1751         if (e->dest != EXIT_BLOCK_PTR
1752             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1753           find_interesting_uses_outside (data, e);
1754
1755       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1756         find_interesting_uses_stmt (data, phi);
1757       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1758         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1759     }
1760
1761   if (dump_file && (dump_flags & TDF_DETAILS))
1762     {
1763       bitmap_iterator bi;
1764
1765       fprintf (dump_file, "\n");
1766
1767       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1768         {
1769           info = ver_info (data, i);
1770           if (info->inv_id)
1771             {
1772               fprintf (dump_file, "  ");
1773               print_generic_expr (dump_file, info->name, TDF_SLIM);
1774               fprintf (dump_file, " is invariant (%d)%s\n",
1775                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1776             }
1777         }
1778
1779       fprintf (dump_file, "\n");
1780     }
1781
1782   free (body);
1783 }
1784
1785 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1786    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1787    we are at the top-level of the processed address.  */
1788
1789 static tree
1790 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1791                 unsigned HOST_WIDE_INT *offset)
1792 {
1793   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1794   enum tree_code code;
1795   tree type, orig_type = TREE_TYPE (expr);
1796   unsigned HOST_WIDE_INT off0, off1, st;
1797   tree orig_expr = expr;
1798
1799   STRIP_NOPS (expr);
1800
1801   type = TREE_TYPE (expr);
1802   code = TREE_CODE (expr);
1803   *offset = 0;
1804
1805   switch (code)
1806     {
1807     case INTEGER_CST:
1808       if (!cst_and_fits_in_hwi (expr)
1809           || zero_p (expr))
1810         return orig_expr;
1811
1812       *offset = int_cst_value (expr);
1813       return build_int_cst_type (orig_type, 0);
1814
1815     case PLUS_EXPR:
1816     case MINUS_EXPR:
1817       op0 = TREE_OPERAND (expr, 0);
1818       op1 = TREE_OPERAND (expr, 1);
1819
1820       op0 = strip_offset_1 (op0, false, false, &off0);
1821       op1 = strip_offset_1 (op1, false, false, &off1);
1822
1823       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1824       if (op0 == TREE_OPERAND (expr, 0)
1825           && op1 == TREE_OPERAND (expr, 1))
1826         return orig_expr;
1827
1828       if (zero_p (op1))
1829         expr = op0;
1830       else if (zero_p (op0))
1831         {
1832           if (code == PLUS_EXPR)
1833             expr = op1;
1834           else
1835             expr = fold_build1 (NEGATE_EXPR, type, op1);
1836         }
1837       else
1838         expr = fold_build2 (code, type, op0, op1);
1839
1840       return fold_convert (orig_type, expr);
1841
1842     case ARRAY_REF:
1843       if (!inside_addr)
1844         return orig_expr;
1845
1846       step = array_ref_element_size (expr);
1847       if (!cst_and_fits_in_hwi (step))
1848         break;
1849
1850       st = int_cst_value (step);
1851       op1 = TREE_OPERAND (expr, 1);
1852       op1 = strip_offset_1 (op1, false, false, &off1);
1853       *offset = off1 * st;
1854
1855       if (top_compref
1856           && zero_p (op1))
1857         {
1858           /* Strip the component reference completely.  */
1859           op0 = TREE_OPERAND (expr, 0);
1860           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1861           *offset += off0;
1862           return op0;
1863         }
1864       break;
1865
1866     case COMPONENT_REF:
1867       if (!inside_addr)
1868         return orig_expr;
1869
1870       tmp = component_ref_field_offset (expr);
1871       if (top_compref
1872           && cst_and_fits_in_hwi (tmp))
1873         {
1874           /* Strip the component reference completely.  */
1875           op0 = TREE_OPERAND (expr, 0);
1876           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1877           *offset = off0 + int_cst_value (tmp);
1878           return op0;
1879         }
1880       break;
1881
1882     case ADDR_EXPR:
1883       op0 = TREE_OPERAND (expr, 0);
1884       op0 = strip_offset_1 (op0, true, true, &off0);
1885       *offset += off0;
1886
1887       if (op0 == TREE_OPERAND (expr, 0))
1888         return orig_expr;
1889
1890       expr = build_fold_addr_expr (op0);
1891       return fold_convert (orig_type, expr);
1892
1893     case INDIRECT_REF:
1894       inside_addr = false;
1895       break;
1896
1897     default:
1898       return orig_expr;
1899     }
1900
1901   /* Default handling of expressions for that we want to recurse into
1902      the first operand.  */
1903   op0 = TREE_OPERAND (expr, 0);
1904   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1905   *offset += off0;
1906
1907   if (op0 == TREE_OPERAND (expr, 0)
1908       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1909     return orig_expr;
1910
1911   expr = copy_node (expr);
1912   TREE_OPERAND (expr, 0) = op0;
1913   if (op1)
1914     TREE_OPERAND (expr, 1) = op1;
1915
1916   /* Inside address, we might strip the top level component references,
1917      thus changing type of the expression.  Handling of ADDR_EXPR
1918      will fix that.  */
1919   expr = fold_convert (orig_type, expr);
1920
1921   return expr;
1922 }
1923
1924 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1925
1926 static tree
1927 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1928 {
1929   return strip_offset_1 (expr, false, false, offset);
1930 }
1931
1932 /* Returns variant of TYPE that can be used as base for different uses.
1933    For integer types, we return unsigned variant of the type, which
1934    avoids problems with overflows.  For pointer types, we return void *.  */
1935
1936 static tree
1937 generic_type_for (tree type)
1938 {
1939   if (POINTER_TYPE_P (type))
1940     return ptr_type_node;
1941
1942   if (TYPE_UNSIGNED (type))
1943     return type;
1944
1945   return unsigned_type_for (type);
1946 }
1947
1948 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1949    the bitmap to that we should store it.  */
1950
1951 static struct ivopts_data *fd_ivopts_data;
1952 static tree
1953 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1954 {
1955   bitmap *depends_on = data;
1956   struct version_info *info;
1957
1958   if (TREE_CODE (*expr_p) != SSA_NAME)
1959     return NULL_TREE;
1960   info = name_info (fd_ivopts_data, *expr_p);
1961
1962   if (!info->inv_id || info->has_nonlin_use)
1963     return NULL_TREE;
1964
1965   if (!*depends_on)
1966     *depends_on = BITMAP_ALLOC (NULL);
1967   bitmap_set_bit (*depends_on, info->inv_id);
1968
1969   return NULL_TREE;
1970 }
1971
1972 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1973    position to POS.  If USE is not NULL, the candidate is set as related to
1974    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1975    replacement of the final value of the iv by a direct computation.  */
1976
1977 static struct iv_cand *
1978 add_candidate_1 (struct ivopts_data *data,
1979                  tree base, tree step, bool important, enum iv_position pos,
1980                  struct iv_use *use, tree incremented_at)
1981 {
1982   unsigned i;
1983   struct iv_cand *cand = NULL;
1984   tree type, orig_type;
1985
1986   if (base)
1987     {
1988       orig_type = TREE_TYPE (base);
1989       type = generic_type_for (orig_type);
1990       if (type != orig_type)
1991         {
1992           base = fold_convert (type, base);
1993           if (step)
1994             step = fold_convert (type, step);
1995         }
1996     }
1997
1998   for (i = 0; i < n_iv_cands (data); i++)
1999     {
2000       cand = iv_cand (data, i);
2001
2002       if (cand->pos != pos)
2003         continue;
2004
2005       if (cand->incremented_at != incremented_at)
2006         continue;
2007
2008       if (!cand->iv)
2009         {
2010           if (!base && !step)
2011             break;
2012
2013           continue;
2014         }
2015
2016       if (!base && !step)
2017         continue;
2018
2019       if (!operand_equal_p (base, cand->iv->base, 0))
2020         continue;
2021
2022       if (zero_p (cand->iv->step))
2023         {
2024           if (zero_p (step))
2025             break;
2026         }
2027       else
2028         {
2029           if (step && operand_equal_p (step, cand->iv->step, 0))
2030             break;
2031         }
2032     }
2033
2034   if (i == n_iv_cands (data))
2035     {
2036       cand = xcalloc (1, sizeof (struct iv_cand));
2037       cand->id = i;
2038
2039       if (!base && !step)
2040         cand->iv = NULL;
2041       else
2042         cand->iv = alloc_iv (base, step);
2043
2044       cand->pos = pos;
2045       if (pos != IP_ORIGINAL && cand->iv)
2046         {
2047           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2048           cand->var_after = cand->var_before;
2049         }
2050       cand->important = important;
2051       cand->incremented_at = incremented_at;
2052       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2053
2054       if (step
2055           && TREE_CODE (step) != INTEGER_CST)
2056         {
2057           fd_ivopts_data = data;
2058           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2059         }
2060
2061       if (dump_file && (dump_flags & TDF_DETAILS))
2062         dump_cand (dump_file, cand);
2063     }
2064
2065   if (important && !cand->important)
2066     {
2067       cand->important = true;
2068       if (dump_file && (dump_flags & TDF_DETAILS))
2069         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2070     }
2071
2072   if (use)
2073     {
2074       bitmap_set_bit (use->related_cands, i);
2075       if (dump_file && (dump_flags & TDF_DETAILS))
2076         fprintf (dump_file, "Candidate %d is related to use %d\n",
2077                  cand->id, use->id);
2078     }
2079
2080   return cand;
2081 }
2082
2083 /* Returns true if incrementing the induction variable at the end of the LOOP
2084    is allowed.
2085
2086    The purpose is to avoid splitting latch edge with a biv increment, thus
2087    creating a jump, possibly confusing other optimization passes and leaving
2088    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2089    is not available (so we do not have a better alternative), or if the latch
2090    edge is already nonempty.  */
2091
2092 static bool
2093 allow_ip_end_pos_p (struct loop *loop)
2094 {
2095   if (!ip_normal_pos (loop))
2096     return true;
2097
2098   if (!empty_block_p (ip_end_pos (loop)))
2099     return true;
2100
2101   return false;
2102 }
2103
2104 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2105    position to POS.  If USE is not NULL, the candidate is set as related to
2106    it.  The candidate computation is scheduled on all available positions.  */
2107
2108 static void
2109 add_candidate (struct ivopts_data *data,
2110                tree base, tree step, bool important, struct iv_use *use)
2111 {
2112   if (ip_normal_pos (data->current_loop))
2113     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2114   if (ip_end_pos (data->current_loop)
2115       && allow_ip_end_pos_p (data->current_loop))
2116     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2117 }
2118
2119 /* Add a standard "0 + 1 * iteration" iv candidate for a
2120    type with SIZE bits.  */
2121
2122 static void
2123 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2124                                      unsigned int size)
2125 {
2126   tree type = lang_hooks.types.type_for_size (size, true);
2127   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2128                  true, NULL);
2129 }
2130
2131 /* Adds standard iv candidates.  */
2132
2133 static void
2134 add_standard_iv_candidates (struct ivopts_data *data)
2135 {
2136   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2137
2138   /* The same for a double-integer type if it is still fast enough.  */
2139   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2140     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2141 }
2142
2143
2144 /* Adds candidates bases on the old induction variable IV.  */
2145
2146 static void
2147 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2148 {
2149   tree phi, def;
2150   struct iv_cand *cand;
2151
2152   add_candidate (data, iv->base, iv->step, true, NULL);
2153
2154   /* The same, but with initial value zero.  */
2155   add_candidate (data,
2156                  build_int_cst (TREE_TYPE (iv->base), 0),
2157                  iv->step, true, NULL);
2158
2159   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2160   if (TREE_CODE (phi) == PHI_NODE)
2161     {
2162       /* Additionally record the possibility of leaving the original iv
2163          untouched.  */
2164       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2165       cand = add_candidate_1 (data,
2166                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2167                               SSA_NAME_DEF_STMT (def));
2168       cand->var_before = iv->ssa_name;
2169       cand->var_after = def;
2170     }
2171 }
2172
2173 /* Adds candidates based on the old induction variables.  */
2174
2175 static void
2176 add_old_ivs_candidates (struct ivopts_data *data)
2177 {
2178   unsigned i;
2179   struct iv *iv;
2180   bitmap_iterator bi;
2181
2182   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2183     {
2184       iv = ver_info (data, i)->iv;
2185       if (iv && iv->biv_p && !zero_p (iv->step))
2186         add_old_iv_candidates (data, iv);
2187     }
2188 }
2189
2190 /* Adds candidates based on the value of the induction variable IV and USE.  */
2191
2192 static void
2193 add_iv_value_candidates (struct ivopts_data *data,
2194                          struct iv *iv, struct iv_use *use)
2195 {
2196   unsigned HOST_WIDE_INT offset;
2197   tree base;
2198
2199   add_candidate (data, iv->base, iv->step, false, use);
2200
2201   /* The same, but with initial value zero.  Make such variable important,
2202      since it is generic enough so that possibly many uses may be based
2203      on it.  */
2204   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2205                  iv->step, true, use);
2206
2207   /* Third, try removing the constant offset.  */
2208   base = strip_offset (iv->base, &offset);
2209   if (offset)
2210     add_candidate (data, base, iv->step, false, use);
2211 }
2212
2213 /* Possibly adds pseudocandidate for replacing the final value of USE by
2214    a direct computation.  */
2215
2216 static void
2217 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2218 {
2219   struct tree_niter_desc *niter;
2220
2221   /* We must know where we exit the loop and how many times does it roll.  */
2222   niter = niter_for_single_dom_exit (data);
2223   if (!niter
2224       || !zero_p (niter->may_be_zero))
2225     return;
2226
2227   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2228 }
2229
2230 /* Adds candidates based on the uses.  */
2231
2232 static void
2233 add_derived_ivs_candidates (struct ivopts_data *data)
2234 {
2235   unsigned i;
2236
2237   for (i = 0; i < n_iv_uses (data); i++)
2238     {
2239       struct iv_use *use = iv_use (data, i);
2240
2241       if (!use)
2242         continue;
2243
2244       switch (use->type)
2245         {
2246         case USE_NONLINEAR_EXPR:
2247         case USE_COMPARE:
2248         case USE_ADDRESS:
2249           /* Just add the ivs based on the value of the iv used here.  */
2250           add_iv_value_candidates (data, use->iv, use);
2251           break;
2252
2253         case USE_OUTER:
2254           add_iv_value_candidates (data, use->iv, use);
2255
2256           /* Additionally, add the pseudocandidate for the possibility to
2257              replace the final value by a direct computation.  */
2258           add_iv_outer_candidates (data, use);
2259           break;
2260
2261         default:
2262           gcc_unreachable ();
2263         }
2264     }
2265 }
2266
2267 /* Record important candidates and add them to related_cands bitmaps
2268    if needed.  */
2269
2270 static void
2271 record_important_candidates (struct ivopts_data *data)
2272 {
2273   unsigned i;
2274   struct iv_use *use;
2275
2276   for (i = 0; i < n_iv_cands (data); i++)
2277     {
2278       struct iv_cand *cand = iv_cand (data, i);
2279
2280       if (cand->important)
2281         bitmap_set_bit (data->important_candidates, i);
2282     }
2283
2284   data->consider_all_candidates = (n_iv_cands (data)
2285                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2286
2287   if (data->consider_all_candidates)
2288     {
2289       /* We will not need "related_cands" bitmaps in this case,
2290          so release them to decrease peak memory consumption.  */
2291       for (i = 0; i < n_iv_uses (data); i++)
2292         {
2293           use = iv_use (data, i);
2294           BITMAP_FREE (use->related_cands);
2295         }
2296     }
2297   else
2298     {
2299       /* Add important candidates to the related_cands bitmaps.  */
2300       for (i = 0; i < n_iv_uses (data); i++)
2301         bitmap_ior_into (iv_use (data, i)->related_cands,
2302                          data->important_candidates);
2303     }
2304 }
2305
2306 /* Finds the candidates for the induction variables.  */
2307
2308 static void
2309 find_iv_candidates (struct ivopts_data *data)
2310 {
2311   /* Add commonly used ivs.  */
2312   add_standard_iv_candidates (data);
2313
2314   /* Add old induction variables.  */
2315   add_old_ivs_candidates (data);
2316
2317   /* Add induction variables derived from uses.  */
2318   add_derived_ivs_candidates (data);
2319
2320   /* Record the important candidates.  */
2321   record_important_candidates (data);
2322 }
2323
2324 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2325    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2326    we allocate a simple list to every use.  */
2327
2328 static void
2329 alloc_use_cost_map (struct ivopts_data *data)
2330 {
2331   unsigned i, size, s, j;
2332
2333   for (i = 0; i < n_iv_uses (data); i++)
2334     {
2335       struct iv_use *use = iv_use (data, i);
2336       bitmap_iterator bi;
2337
2338       if (data->consider_all_candidates)
2339         size = n_iv_cands (data);
2340       else
2341         {
2342           s = 0;
2343           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2344             {
2345               s++;
2346             }
2347
2348           /* Round up to the power of two, so that moduling by it is fast.  */
2349           for (size = 1; size < s; size <<= 1)
2350             continue;
2351         }
2352
2353       use->n_map_members = size;
2354       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2355     }
2356 }
2357
2358 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2359    on invariants DEPENDS_ON and that the value used in expressing it
2360    is VALUE.*/
2361
2362 static void
2363 set_use_iv_cost (struct ivopts_data *data,
2364                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2365                  bitmap depends_on, tree value)
2366 {
2367   unsigned i, s;
2368
2369   if (cost == INFTY)
2370     {
2371       BITMAP_FREE (depends_on);
2372       return;
2373     }
2374
2375   if (data->consider_all_candidates)
2376     {
2377       use->cost_map[cand->id].cand = cand;
2378       use->cost_map[cand->id].cost = cost;
2379       use->cost_map[cand->id].depends_on = depends_on;
2380       use->cost_map[cand->id].value = value;
2381       return;
2382     }
2383
2384   /* n_map_members is a power of two, so this computes modulo.  */
2385   s = cand->id & (use->n_map_members - 1);
2386   for (i = s; i < use->n_map_members; i++)
2387     if (!use->cost_map[i].cand)
2388       goto found;
2389   for (i = 0; i < s; i++)
2390     if (!use->cost_map[i].cand)
2391       goto found;
2392
2393   gcc_unreachable ();
2394
2395 found:
2396   use->cost_map[i].cand = cand;
2397   use->cost_map[i].cost = cost;
2398   use->cost_map[i].depends_on = depends_on;
2399   use->cost_map[i].value = value;
2400 }
2401
2402 /* Gets cost of (USE, CANDIDATE) pair.  */
2403
2404 static struct cost_pair *
2405 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2406                  struct iv_cand *cand)
2407 {
2408   unsigned i, s;
2409   struct cost_pair *ret;
2410
2411   if (!cand)
2412     return NULL;
2413
2414   if (data->consider_all_candidates)
2415     {
2416       ret = use->cost_map + cand->id;
2417       if (!ret->cand)
2418         return NULL;
2419
2420       return ret;
2421     }
2422
2423   /* n_map_members is a power of two, so this computes modulo.  */
2424   s = cand->id & (use->n_map_members - 1);
2425   for (i = s; i < use->n_map_members; i++)
2426     if (use->cost_map[i].cand == cand)
2427       return use->cost_map + i;
2428
2429   for (i = 0; i < s; i++)
2430     if (use->cost_map[i].cand == cand)
2431       return use->cost_map + i;
2432
2433   return NULL;
2434 }
2435
2436 /* Returns estimate on cost of computing SEQ.  */
2437
2438 static unsigned
2439 seq_cost (rtx seq)
2440 {
2441   unsigned cost = 0;
2442   rtx set;
2443
2444   for (; seq; seq = NEXT_INSN (seq))
2445     {
2446       set = single_set (seq);
2447       if (set)
2448         cost += rtx_cost (set, SET);
2449       else
2450         cost++;
2451     }
2452
2453   return cost;
2454 }
2455
2456 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2457 static rtx
2458 produce_memory_decl_rtl (tree obj, int *regno)
2459 {
2460   rtx x;
2461
2462   gcc_assert (obj);
2463   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2464     {
2465       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2466       x = gen_rtx_SYMBOL_REF (Pmode, name);
2467     }
2468   else
2469     x = gen_raw_REG (Pmode, (*regno)++);
2470
2471   return gen_rtx_MEM (DECL_MODE (obj), x);
2472 }
2473
2474 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2475    walk_tree.  DATA contains the actual fake register number.  */
2476
2477 static tree
2478 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2479 {
2480   tree obj = NULL_TREE;
2481   rtx x = NULL_RTX;
2482   int *regno = data;
2483
2484   switch (TREE_CODE (*expr_p))
2485     {
2486     case ADDR_EXPR:
2487       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2488            handled_component_p (*expr_p);
2489            expr_p = &TREE_OPERAND (*expr_p, 0))
2490         continue;
2491       obj = *expr_p;
2492       if (DECL_P (obj))
2493         x = produce_memory_decl_rtl (obj, regno);
2494       break;
2495
2496     case SSA_NAME:
2497       *ws = 0;
2498       obj = SSA_NAME_VAR (*expr_p);
2499       if (!DECL_RTL_SET_P (obj))
2500         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2501       break;
2502
2503     case VAR_DECL:
2504     case PARM_DECL:
2505     case RESULT_DECL:
2506       *ws = 0;
2507       obj = *expr_p;
2508
2509       if (DECL_RTL_SET_P (obj))
2510         break;
2511
2512       if (DECL_MODE (obj) == BLKmode)
2513         x = produce_memory_decl_rtl (obj, regno);
2514       else
2515         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2516
2517       break;
2518
2519     default:
2520       break;
2521     }
2522
2523   if (x)
2524     {
2525       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2526       SET_DECL_RTL (obj, x);
2527     }
2528
2529   return NULL_TREE;
2530 }
2531
2532 /* Determines cost of the computation of EXPR.  */
2533
2534 static unsigned
2535 computation_cost (tree expr)
2536 {
2537   rtx seq, rslt;
2538   tree type = TREE_TYPE (expr);
2539   unsigned cost;
2540   /* Avoid using hard regs in ways which may be unsupported.  */
2541   int regno = LAST_VIRTUAL_REGISTER + 1;
2542
2543   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2544   start_sequence ();
2545   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2546   seq = get_insns ();
2547   end_sequence ();
2548
2549   cost = seq_cost (seq);
2550   if (MEM_P (rslt))
2551     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2552
2553   return cost;
2554 }
2555
2556 /* Returns variable containing the value of candidate CAND at statement AT.  */
2557
2558 static tree
2559 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2560 {
2561   if (stmt_after_increment (loop, cand, stmt))
2562     return cand->var_after;
2563   else
2564     return cand->var_before;
2565 }
2566
2567 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2568    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2569
2570 int
2571 tree_int_cst_sign_bit (tree t)
2572 {
2573   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2574   unsigned HOST_WIDE_INT w;
2575
2576   if (bitno < HOST_BITS_PER_WIDE_INT)
2577     w = TREE_INT_CST_LOW (t);
2578   else
2579     {
2580       w = TREE_INT_CST_HIGH (t);
2581       bitno -= HOST_BITS_PER_WIDE_INT;
2582     }
2583
2584   return (w >> bitno) & 1;
2585 }
2586
2587 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2588    return cst.  Otherwise return NULL_TREE.  */
2589
2590 static tree
2591 constant_multiple_of (tree type, tree top, tree bot)
2592 {
2593   tree res, mby, p0, p1;
2594   enum tree_code code;
2595   bool negate;
2596
2597   STRIP_NOPS (top);
2598   STRIP_NOPS (bot);
2599
2600   if (operand_equal_p (top, bot, 0))
2601     return build_int_cst (type, 1);
2602
2603   code = TREE_CODE (top);
2604   switch (code)
2605     {
2606     case MULT_EXPR:
2607       mby = TREE_OPERAND (top, 1);
2608       if (TREE_CODE (mby) != INTEGER_CST)
2609         return NULL_TREE;
2610
2611       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2612       if (!res)
2613         return NULL_TREE;
2614
2615       return fold_binary_to_constant (MULT_EXPR, type, res,
2616                                       fold_convert (type, mby));
2617
2618     case PLUS_EXPR:
2619     case MINUS_EXPR:
2620       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2621       if (!p0)
2622         return NULL_TREE;
2623       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2624       if (!p1)
2625         return NULL_TREE;
2626
2627       return fold_binary_to_constant (code, type, p0, p1);
2628
2629     case INTEGER_CST:
2630       if (TREE_CODE (bot) != INTEGER_CST)
2631         return NULL_TREE;
2632
2633       bot = fold_convert (type, bot);
2634       top = fold_convert (type, top);
2635
2636       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2637          the result afterwards.  */
2638       if (tree_int_cst_sign_bit (bot))
2639         {
2640           negate = true;
2641           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2642         }
2643       else
2644         negate = false;
2645
2646       /* Ditto for TOP.  */
2647       if (tree_int_cst_sign_bit (top))
2648         {
2649           negate = !negate;
2650           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2651         }
2652
2653       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2654         return NULL_TREE;
2655
2656       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2657       if (negate)
2658         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2659       return res;
2660
2661     default:
2662       return NULL_TREE;
2663     }
2664 }
2665
2666 /* Sets COMB to CST.  */
2667
2668 static void
2669 aff_combination_const (struct affine_tree_combination *comb, tree type,
2670                        unsigned HOST_WIDE_INT cst)
2671 {
2672   unsigned prec = TYPE_PRECISION (type);
2673
2674   comb->type = type;
2675   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2676
2677   comb->n = 0;
2678   comb->rest = NULL_TREE;
2679   comb->offset = cst & comb->mask;
2680 }
2681
2682 /* Sets COMB to single element ELT.  */
2683
2684 static void
2685 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2686 {
2687   unsigned prec = TYPE_PRECISION (type);
2688
2689   comb->type = type;
2690   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2691
2692   comb->n = 1;
2693   comb->elts[0] = elt;
2694   comb->coefs[0] = 1;
2695   comb->rest = NULL_TREE;
2696   comb->offset = 0;
2697 }
2698
2699 /* Scales COMB by SCALE.  */
2700
2701 static void
2702 aff_combination_scale (struct affine_tree_combination *comb,
2703                        unsigned HOST_WIDE_INT scale)
2704 {
2705   unsigned i, j;
2706
2707   if (scale == 1)
2708     return;
2709
2710   if (scale == 0)
2711     {
2712       aff_combination_const (comb, comb->type, 0);
2713       return;
2714     }
2715
2716   comb->offset = (scale * comb->offset) & comb->mask;
2717   for (i = 0, j = 0; i < comb->n; i++)
2718     {
2719       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2720       comb->elts[j] = comb->elts[i];
2721       if (comb->coefs[j] != 0)
2722         j++;
2723     }
2724   comb->n = j;
2725
2726   if (comb->rest)
2727     {
2728       if (comb->n < MAX_AFF_ELTS)
2729         {
2730           comb->coefs[comb->n] = scale;
2731           comb->elts[comb->n] = comb->rest;
2732           comb->rest = NULL_TREE;
2733           comb->n++;
2734         }
2735       else
2736         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2737                                   build_int_cst_type (comb->type, scale));
2738     }
2739 }
2740
2741 /* Adds ELT * SCALE to COMB.  */
2742
2743 static void
2744 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2745                          unsigned HOST_WIDE_INT scale)
2746 {
2747   unsigned i;
2748
2749   if (scale == 0)
2750     return;
2751
2752   for (i = 0; i < comb->n; i++)
2753     if (operand_equal_p (comb->elts[i], elt, 0))
2754       {
2755         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2756         if (comb->coefs[i])
2757           return;
2758
2759         comb->n--;
2760         comb->coefs[i] = comb->coefs[comb->n];
2761         comb->elts[i] = comb->elts[comb->n];
2762         return;
2763       }
2764   if (comb->n < MAX_AFF_ELTS)
2765     {
2766       comb->coefs[comb->n] = scale;
2767       comb->elts[comb->n] = elt;
2768       comb->n++;
2769       return;
2770     }
2771
2772   if (scale == 1)
2773     elt = fold_convert (comb->type, elt);
2774   else
2775     elt = fold_build2 (MULT_EXPR, comb->type,
2776                        fold_convert (comb->type, elt),
2777                        build_int_cst_type (comb->type, scale));
2778
2779   if (comb->rest)
2780     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2781   else
2782     comb->rest = elt;
2783 }
2784
2785 /* Adds COMB2 to COMB1.  */
2786
2787 static void
2788 aff_combination_add (struct affine_tree_combination *comb1,
2789                      struct affine_tree_combination *comb2)
2790 {
2791   unsigned i;
2792
2793   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2794   for (i = 0; i < comb2-> n; i++)
2795     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2796   if (comb2->rest)
2797     aff_combination_add_elt (comb1, comb2->rest, 1);
2798 }
2799
2800 /* Splits EXPR into an affine combination of parts.  */
2801
2802 static void
2803 tree_to_aff_combination (tree expr, tree type,
2804                          struct affine_tree_combination *comb)
2805 {
2806   struct affine_tree_combination tmp;
2807   enum tree_code code;
2808   tree cst, core, toffset;
2809   HOST_WIDE_INT bitpos, bitsize;
2810   enum machine_mode mode;
2811   int unsignedp, volatilep;
2812
2813   STRIP_NOPS (expr);
2814
2815   code = TREE_CODE (expr);
2816   switch (code)
2817     {
2818     case INTEGER_CST:
2819       aff_combination_const (comb, type, int_cst_value (expr));
2820       return;
2821
2822     case PLUS_EXPR:
2823     case MINUS_EXPR:
2824       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2825       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2826       if (code == MINUS_EXPR)
2827         aff_combination_scale (&tmp, -1);
2828       aff_combination_add (comb, &tmp);
2829       return;
2830
2831     case MULT_EXPR:
2832       cst = TREE_OPERAND (expr, 1);
2833       if (TREE_CODE (cst) != INTEGER_CST)
2834         break;
2835       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2836       aff_combination_scale (comb, int_cst_value (cst));
2837       return;
2838
2839     case NEGATE_EXPR:
2840       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2841       aff_combination_scale (comb, -1);
2842       return;
2843
2844     case ADDR_EXPR:
2845       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2846                                   &toffset, &mode, &unsignedp, &volatilep,
2847                                   false);
2848       if (bitpos % BITS_PER_UNIT != 0)
2849         break;
2850       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2851       core = build_fold_addr_expr (core);
2852       if (TREE_CODE (core) == ADDR_EXPR)
2853         aff_combination_add_elt (comb, core, 1);
2854       else
2855         {
2856           tree_to_aff_combination (core, type, &tmp);
2857           aff_combination_add (comb, &tmp);
2858         }
2859       if (toffset)
2860         {
2861           tree_to_aff_combination (toffset, type, &tmp);
2862           aff_combination_add (comb, &tmp);
2863         }
2864       return;
2865
2866     default:
2867       break;
2868     }
2869
2870   aff_combination_elt (comb, type, expr);
2871 }
2872
2873 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2874
2875 static tree
2876 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2877                  unsigned HOST_WIDE_INT mask)
2878 {
2879   enum tree_code code;
2880
2881   scale &= mask;
2882   elt = fold_convert (type, elt);
2883
2884   if (scale == 1)
2885     {
2886       if (!expr)
2887         return elt;
2888
2889       return fold_build2 (PLUS_EXPR, type, expr, elt);
2890     }
2891
2892   if (scale == mask)
2893     {
2894       if (!expr)
2895         return fold_build1 (NEGATE_EXPR, type, elt);
2896
2897       return fold_build2 (MINUS_EXPR, type, expr, elt);
2898     }
2899
2900   if (!expr)
2901     return fold_build2 (MULT_EXPR, type, elt,
2902                         build_int_cst_type (type, scale));
2903
2904   if ((scale | (mask >> 1)) == mask)
2905     {
2906       /* Scale is negative.  */
2907       code = MINUS_EXPR;
2908       scale = (-scale) & mask;
2909     }
2910   else
2911     code = PLUS_EXPR;
2912
2913   elt = fold_build2 (MULT_EXPR, type, elt,
2914                      build_int_cst_type (type, scale));
2915   return fold_build2 (code, type, expr, elt);
2916 }
2917
2918 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2919
2920 static void
2921 unshare_aff_combination (struct affine_tree_combination *comb)
2922 {
2923   unsigned i;
2924
2925   for (i = 0; i < comb->n; i++)
2926     comb->elts[i] = unshare_expr (comb->elts[i]);
2927   if (comb->rest)
2928     comb->rest = unshare_expr (comb->rest);
2929 }
2930
2931 /* Makes tree from the affine combination COMB.  */
2932
2933 static tree
2934 aff_combination_to_tree (struct affine_tree_combination *comb)
2935 {
2936   tree type = comb->type;
2937   tree expr = comb->rest;
2938   unsigned i;
2939   unsigned HOST_WIDE_INT off, sgn;
2940
2941   /* Handle the special case produced by get_computation_aff when
2942      the type does not fit in HOST_WIDE_INT.  */
2943   if (comb->n == 0 && comb->offset == 0)
2944     return fold_convert (type, expr);
2945
2946   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2947
2948   for (i = 0; i < comb->n; i++)
2949     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2950                             comb->mask);
2951
2952   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2953     {
2954       /* Offset is negative.  */
2955       off = (-comb->offset) & comb->mask;
2956       sgn = comb->mask;
2957     }
2958   else
2959     {
2960       off = comb->offset;
2961       sgn = 1;
2962     }
2963   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2964                           comb->mask);
2965 }
2966
2967 /* Determines the expression by that USE is expressed from induction variable
2968    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2969    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2970
2971 static bool
2972 get_computation_aff (struct loop *loop,
2973                      struct iv_use *use, struct iv_cand *cand, tree at,
2974                      struct affine_tree_combination *aff)
2975 {
2976   tree ubase = use->iv->base;
2977   tree ustep = use->iv->step;
2978   tree cbase = cand->iv->base;
2979   tree cstep = cand->iv->step;
2980   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2981   tree uutype;
2982   tree expr, delta;
2983   tree ratio;
2984   unsigned HOST_WIDE_INT ustepi, cstepi;
2985   HOST_WIDE_INT ratioi;
2986   struct affine_tree_combination cbase_aff, expr_aff;
2987   tree cstep_orig = cstep, ustep_orig = ustep;
2988
2989   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2990     {
2991       /* We do not have a precision to express the values of use.  */
2992       return false;
2993     }
2994
2995   expr = var_at_stmt (loop, cand, at);
2996
2997   if (TREE_TYPE (expr) != ctype)
2998     {
2999       /* This may happen with the original ivs.  */
3000       expr = fold_convert (ctype, expr);
3001     }
3002
3003   if (TYPE_UNSIGNED (utype))
3004     uutype = utype;
3005   else
3006     {
3007       uutype = unsigned_type_for (utype);
3008       ubase = fold_convert (uutype, ubase);
3009       ustep = fold_convert (uutype, ustep);
3010     }
3011
3012   if (uutype != ctype)
3013     {
3014       expr = fold_convert (uutype, expr);
3015       cbase = fold_convert (uutype, cbase);
3016       cstep = fold_convert (uutype, cstep);
3017
3018       /* If the conversion is not noop, we must take it into account when
3019          considering the value of the step.  */
3020       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3021         cstep_orig = cstep;
3022     }
3023
3024   if (cst_and_fits_in_hwi (cstep_orig)
3025       && cst_and_fits_in_hwi (ustep_orig))
3026     {
3027       ustepi = int_cst_value (ustep_orig);
3028       cstepi = int_cst_value (cstep_orig);
3029
3030       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3031         {
3032           /* TODO maybe consider case when ustep divides cstep and the ratio is
3033              a power of 2 (so that the division is fast to execute)?  We would
3034              need to be much more careful with overflows etc. then.  */
3035           return false;
3036         }
3037
3038       ratio = build_int_cst_type (uutype, ratioi);
3039     }
3040   else
3041     {
3042       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3043       if (!ratio)
3044         return false;
3045
3046       /* Ratioi is only used to detect special cases when the multiplicative
3047          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3048          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3049          to integer_onep/integer_all_onesp, since the former ignores
3050          TREE_OVERFLOW.  */
3051       if (cst_and_fits_in_hwi (ratio))
3052         ratioi = int_cst_value (ratio);
3053       else if (integer_onep (ratio))
3054         ratioi = 1;
3055       else if (integer_all_onesp (ratio))
3056         ratioi = -1;
3057       else
3058         ratioi = 0;
3059     }
3060
3061   /* We may need to shift the value if we are after the increment.  */
3062   if (stmt_after_increment (loop, cand, at))
3063     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3064
3065   /* use = ubase - ratio * cbase + ratio * var.
3066
3067      In general case ubase + ratio * (var - cbase) could be better (one less
3068      multiplication), but often it is possible to eliminate redundant parts
3069      of computations from (ubase - ratio * cbase) term, and if it does not
3070      happen, fold is able to apply the distributive law to obtain this form
3071      anyway.  */
3072
3073   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3074     {
3075       /* Let's compute in trees and just return the result in AFF.  This case
3076          should not be very common, and fold itself is not that bad either,
3077          so making the aff. functions more complicated to handle this case
3078          is not that urgent.  */
3079       if (ratioi == 1)
3080         {
3081           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3082           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3083         }
3084       else if (ratioi == -1)
3085         {
3086           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3087           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3088         }
3089       else
3090         {
3091           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3092           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3093           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3094           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3095         }
3096
3097       aff->type = uutype;
3098       aff->n = 0;
3099       aff->offset = 0;
3100       aff->mask = 0;
3101       aff->rest = expr;
3102       return true;
3103     }
3104
3105   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3106      possible to compute ratioi.  */
3107   gcc_assert (ratioi);
3108
3109   tree_to_aff_combination (ubase, uutype, aff);
3110   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3111   tree_to_aff_combination (expr, uutype, &expr_aff);
3112   aff_combination_scale (&cbase_aff, -ratioi);
3113   aff_combination_scale (&expr_aff, ratioi);
3114   aff_combination_add (aff, &cbase_aff);
3115   aff_combination_add (aff, &expr_aff);
3116
3117   return true;
3118 }
3119
3120 /* Determines the expression by that USE is expressed from induction variable
3121    CAND at statement AT in LOOP.  The computation is unshared.  */
3122
3123 static tree
3124 get_computation_at (struct loop *loop,
3125                     struct iv_use *use, struct iv_cand *cand, tree at)
3126 {
3127   struct affine_tree_combination aff;
3128   tree type = TREE_TYPE (use->iv->base);
3129
3130   if (!get_computation_aff (loop, use, cand, at, &aff))
3131     return NULL_TREE;
3132   unshare_aff_combination (&aff);
3133   return fold_convert (type, aff_combination_to_tree (&aff));
3134 }
3135
3136 /* Determines the expression by that USE is expressed from induction variable
3137    CAND in LOOP.  The computation is unshared.  */
3138
3139 static tree
3140 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3141 {
3142   return get_computation_at (loop, use, cand, use->stmt);
3143 }
3144
3145 /* Returns cost of addition in MODE.  */
3146
3147 static unsigned
3148 add_cost (enum machine_mode mode)
3149 {
3150   static unsigned costs[NUM_MACHINE_MODES];
3151   rtx seq;
3152   unsigned cost;
3153
3154   if (costs[mode])
3155     return costs[mode];
3156
3157   start_sequence ();
3158   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3159                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3160                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3161                  NULL_RTX);
3162   seq = get_insns ();
3163   end_sequence ();
3164
3165   cost = seq_cost (seq);
3166   if (!cost)
3167     cost = 1;
3168
3169   costs[mode] = cost;
3170
3171   if (dump_file && (dump_flags & TDF_DETAILS))
3172     fprintf (dump_file, "Addition in %s costs %d\n",
3173              GET_MODE_NAME (mode), cost);
3174   return cost;
3175 }
3176
3177 /* Entry in a hashtable of already known costs for multiplication.  */
3178 struct mbc_entry
3179 {
3180   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3181   enum machine_mode mode;       /* In mode.  */
3182   unsigned cost;                /* The cost.  */
3183 };
3184
3185 /* Counts hash value for the ENTRY.  */
3186
3187 static hashval_t
3188 mbc_entry_hash (const void *entry)
3189 {
3190   const struct mbc_entry *e = entry;
3191
3192   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3193 }
3194
3195 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3196
3197 static int
3198 mbc_entry_eq (const void *entry1, const void *entry2)
3199 {
3200   const struct mbc_entry *e1 = entry1;
3201   const struct mbc_entry *e2 = entry2;
3202
3203   return (e1->mode == e2->mode
3204           && e1->cst == e2->cst);
3205 }
3206
3207 /* Returns cost of multiplication by constant CST in MODE.  */
3208
3209 unsigned
3210 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3211 {
3212   static htab_t costs;
3213   struct mbc_entry **cached, act;
3214   rtx seq;
3215   unsigned cost;
3216
3217   if (!costs)
3218     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3219
3220   act.mode = mode;
3221   act.cst = cst;
3222   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3223   if (*cached)
3224     return (*cached)->cost;
3225
3226   *cached = xmalloc (sizeof (struct mbc_entry));
3227   (*cached)->mode = mode;
3228   (*cached)->cst = cst;
3229
3230   start_sequence ();
3231   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3232                gen_int_mode (cst, mode), NULL_RTX, 0);
3233   seq = get_insns ();
3234   end_sequence ();
3235
3236   cost = seq_cost (seq);
3237
3238   if (dump_file && (dump_flags & TDF_DETAILS))
3239     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3240              (int) cst, GET_MODE_NAME (mode), cost);
3241
3242   (*cached)->cost = cost;
3243
3244   return cost;
3245 }
3246
3247 /* Returns true if multiplying by RATIO is allowed in address.  */
3248
3249 bool
3250 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3251 {
3252 #define MAX_RATIO 128
3253   static sbitmap valid_mult;
3254
3255   if (!valid_mult)
3256     {
3257       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3258       rtx addr;
3259       HOST_WIDE_INT i;
3260
3261       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3262       sbitmap_zero (valid_mult);
3263       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3264       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3265         {
3266           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3267           if (memory_address_p (Pmode, addr))
3268             SET_BIT (valid_mult, i + MAX_RATIO);
3269         }
3270
3271       if (dump_file && (dump_flags & TDF_DETAILS))
3272         {
3273           fprintf (dump_file, "  allowed multipliers:");
3274           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3275             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3276               fprintf (dump_file, " %d", (int) i);
3277           fprintf (dump_file, "\n");
3278           fprintf (dump_file, "\n");
3279         }
3280     }
3281
3282   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3283     return false;
3284
3285   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3286 }
3287
3288 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3289    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3290    variable is omitted.  The created memory accesses MODE.
3291
3292    TODO -- there must be some better way.  This all is quite crude.  */
3293
3294 static unsigned
3295 get_address_cost (bool symbol_present, bool var_present,
3296                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3297 {
3298   static bool initialized = false;
3299   static HOST_WIDE_INT rat, off;
3300   static HOST_WIDE_INT min_offset, max_offset;
3301   static unsigned costs[2][2][2][2];
3302   unsigned cost, acost;
3303   rtx seq, addr, base;
3304   bool offset_p, ratio_p;
3305   rtx reg1;
3306   HOST_WIDE_INT s_offset;
3307   unsigned HOST_WIDE_INT mask;
3308   unsigned bits;
3309
3310   if (!initialized)
3311     {
3312       HOST_WIDE_INT i;
3313       initialized = true;
3314
3315       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3316
3317       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3318       for (i = 1; i <= 1 << 20; i <<= 1)
3319         {
3320           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3321           if (!memory_address_p (Pmode, addr))
3322             break;
3323         }
3324       max_offset = i >> 1;
3325       off = max_offset;
3326
3327       for (i = 1; i <= 1 << 20; i <<= 1)
3328         {
3329           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3330           if (!memory_address_p (Pmode, addr))
3331             break;
3332         }
3333       min_offset = -(i >> 1);
3334
3335       if (dump_file && (dump_flags & TDF_DETAILS))
3336         {
3337           fprintf (dump_file, "get_address_cost:\n");
3338           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3339           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3340         }
3341
3342       rat = 1;
3343       for (i = 2; i <= MAX_RATIO; i++)
3344         if (multiplier_allowed_in_address_p (i))
3345           {
3346             rat = i;
3347             break;
3348           }
3349     }
3350
3351   bits = GET_MODE_BITSIZE (Pmode);
3352   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3353   offset &= mask;
3354   if ((offset >> (bits - 1) & 1))
3355     offset |= ~mask;
3356   s_offset = offset;
3357
3358   cost = 0;
3359   offset_p = (s_offset != 0
3360               && min_offset <= s_offset && s_offset <= max_offset);
3361   ratio_p = (ratio != 1
3362              && multiplier_allowed_in_address_p (ratio));
3363
3364   if (ratio != 1 && !ratio_p)
3365     cost += multiply_by_cost (ratio, Pmode);
3366
3367   if (s_offset && !offset_p && !symbol_present)
3368     {
3369       cost += add_cost (Pmode);
3370       var_present = true;
3371     }
3372
3373   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3374   if (!acost)
3375     {
3376       acost = 0;
3377
3378       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3379       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3380       if (ratio_p)
3381         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3382
3383       if (var_present)
3384         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3385
3386       if (symbol_present)
3387         {
3388           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3389           if (offset_p)
3390             base = gen_rtx_fmt_e (CONST, Pmode,
3391                                   gen_rtx_fmt_ee (PLUS, Pmode,
3392                                                   base,
3393                                                   gen_int_mode (off, Pmode)));
3394         }
3395       else if (offset_p)
3396         base = gen_int_mode (off, Pmode);
3397       else
3398         base = NULL_RTX;
3399
3400       if (base)
3401         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3402
3403       start_sequence ();
3404       addr = memory_address (Pmode, addr);
3405       seq = get_insns ();
3406       end_sequence ();
3407
3408       acost = seq_cost (seq);
3409       acost += address_cost (addr, Pmode);
3410
3411       if (!acost)
3412         acost = 1;
3413       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3414     }
3415
3416   return cost + acost;
3417 }
3418 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3419    invariants the computation depends on.  */
3420
3421 static unsigned
3422 force_var_cost (struct ivopts_data *data,
3423                 tree expr, bitmap *depends_on)
3424 {
3425   static bool costs_initialized = false;
3426   static unsigned integer_cost;
3427   static unsigned symbol_cost;
3428   static unsigned address_cost;
3429   tree op0, op1;
3430   unsigned cost0, cost1, cost;
3431   enum machine_mode mode;
3432
3433   if (!costs_initialized)
3434     {
3435       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3436       rtx x = gen_rtx_MEM (DECL_MODE (var),
3437                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3438       tree addr;
3439       tree type = build_pointer_type (integer_type_node);
3440
3441       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3442                                                            2000));
3443
3444       SET_DECL_RTL (var, x);
3445       TREE_STATIC (var) = 1;
3446       addr = build1 (ADDR_EXPR, type, var);
3447       symbol_cost = computation_cost (addr) + 1;
3448
3449       address_cost
3450         = computation_cost (build2 (PLUS_EXPR, type,
3451                                     addr,
3452                                     build_int_cst_type (type, 2000))) + 1;
3453       if (dump_file && (dump_flags & TDF_DETAILS))
3454         {
3455           fprintf (dump_file, "force_var_cost:\n");
3456           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3457           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3458           fprintf (dump_file, "  address %d\n", (int) address_cost);
3459           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3460           fprintf (dump_file, "\n");
3461         }
3462
3463       costs_initialized = true;
3464     }
3465
3466   STRIP_NOPS (expr);
3467
3468   if (depends_on)
3469     {
3470       fd_ivopts_data = data;
3471       walk_tree (&expr, find_depends, depends_on, NULL);
3472     }
3473
3474   if (SSA_VAR_P (expr))
3475     return 0;
3476
3477   if (TREE_INVARIANT (expr))
3478     {
3479       if (TREE_CODE (expr) == INTEGER_CST)
3480         return integer_cost;
3481
3482       if (TREE_CODE (expr) == ADDR_EXPR)
3483         {
3484           tree obj = TREE_OPERAND (expr, 0);
3485
3486           if (TREE_CODE (obj) == VAR_DECL
3487               || TREE_CODE (obj) == PARM_DECL
3488               || TREE_CODE (obj) == RESULT_DECL)
3489             return symbol_cost;
3490         }
3491
3492       return address_cost;
3493     }
3494
3495   switch (TREE_CODE (expr))
3496     {
3497     case PLUS_EXPR:
3498     case MINUS_EXPR:
3499     case MULT_EXPR:
3500       op0 = TREE_OPERAND (expr, 0);
3501       op1 = TREE_OPERAND (expr, 1);
3502       STRIP_NOPS (op0);
3503       STRIP_NOPS (op1);
3504
3505       if (is_gimple_val (op0))
3506         cost0 = 0;
3507       else
3508         cost0 = force_var_cost (data, op0, NULL);
3509
3510       if (is_gimple_val (op1))
3511         cost1 = 0;
3512       else
3513         cost1 = force_var_cost (data, op1, NULL);
3514
3515       break;
3516
3517     default:
3518       /* Just an arbitrary value, FIXME.  */
3519       return target_spill_cost;
3520     }
3521
3522   mode = TYPE_MODE (TREE_TYPE (expr));
3523   switch (TREE_CODE (expr))
3524     {
3525     case PLUS_EXPR:
3526     case MINUS_EXPR:
3527       cost = add_cost (mode);
3528       break;
3529
3530     case MULT_EXPR:
3531       if (cst_and_fits_in_hwi (op0))
3532         cost = multiply_by_cost (int_cst_value (op0), mode);
3533       else if (cst_and_fits_in_hwi (op1))
3534         cost = multiply_by_cost (int_cst_value (op1), mode);
3535       else
3536         return target_spill_cost;
3537       break;
3538
3539     default:
3540       gcc_unreachable ();
3541     }
3542
3543   cost += cost0;
3544   cost += cost1;
3545
3546   /* Bound the cost by target_spill_cost.  The parts of complicated
3547      computations often are either loop invariant or at least can
3548      be shared between several iv uses, so letting this grow without
3549      limits would not give reasonable results.  */
3550   return cost < target_spill_cost ? cost : target_spill_cost;
3551 }
3552
3553 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3554    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3555    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3556    invariants the computation depends on.  */
3557
3558 static unsigned
3559 split_address_cost (struct ivopts_data *data,
3560                     tree addr, bool *symbol_present, bool *var_present,
3561                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3562 {
3563   tree core;
3564   HOST_WIDE_INT bitsize;
3565   HOST_WIDE_INT bitpos;
3566   tree toffset;
3567   enum machine_mode mode;
3568   int unsignedp, volatilep;
3569
3570   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3571                               &unsignedp, &volatilep, false);
3572
3573   if (toffset != 0
3574       || bitpos % BITS_PER_UNIT != 0
3575       || TREE_CODE (core) != VAR_DECL)
3576     {
3577       *symbol_present = false;
3578       *var_present = true;
3579       fd_ivopts_data = data;
3580       walk_tree (&addr, find_depends, depends_on, NULL);
3581       return target_spill_cost;
3582     }
3583
3584   *offset += bitpos / BITS_PER_UNIT;
3585   if (TREE_STATIC (core)
3586       || DECL_EXTERNAL (core))
3587     {
3588       *symbol_present = true;
3589       *var_present = false;
3590       return 0;
3591     }
3592
3593   *symbol_present = false;
3594   *var_present = true;
3595   return 0;
3596 }
3597
3598 /* Estimates cost of expressing difference of addresses E1 - E2 as
3599    var + symbol + offset.  The value of offset is added to OFFSET,
3600    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3601    part is missing.  DEPENDS_ON is a set of the invariants the computation
3602    depends on.  */
3603
3604 static unsigned
3605 ptr_difference_cost (struct ivopts_data *data,
3606                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3607                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3608 {
3609   HOST_WIDE_INT diff = 0;
3610   unsigned cost;
3611
3612   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3613
3614   if (ptr_difference_const (e1, e2, &diff))
3615     {
3616       *offset += diff;
3617       *symbol_present = false;
3618       *var_present = false;
3619       return 0;
3620     }
3621
3622   if (e2 == integer_zero_node)
3623     return split_address_cost (data, TREE_OPERAND (e1, 0),
3624                                symbol_present, var_present, offset, depends_on);
3625
3626   *symbol_present = false;
3627   *var_present = true;
3628
3629   cost = force_var_cost (data, e1, depends_on);
3630   cost += force_var_cost (data, e2, depends_on);
3631   cost += add_cost (Pmode);
3632
3633   return cost;
3634 }
3635
3636 /* Estimates cost of expressing difference E1 - E2 as
3637    var + symbol + offset.  The value of offset is added to OFFSET,
3638    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3639    part is missing.  DEPENDS_ON is a set of the invariants the computation
3640    depends on.  */
3641
3642 static unsigned
3643 difference_cost (struct ivopts_data *data,
3644                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3645                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3646 {
3647   unsigned cost;
3648   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3649   unsigned HOST_WIDE_INT off1, off2;
3650
3651   e1 = strip_offset (e1, &off1);
3652   e2 = strip_offset (e2, &off2);
3653   *offset += off1 - off2;
3654
3655   STRIP_NOPS (e1);
3656   STRIP_NOPS (e2);
3657
3658   if (TREE_CODE (e1) == ADDR_EXPR)
3659     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3660                                 depends_on);
3661   *symbol_present = false;
3662
3663   if (operand_equal_p (e1, e2, 0))
3664     {
3665       *var_present = false;
3666       return 0;
3667     }
3668   *var_present = true;
3669   if (zero_p (e2))
3670     return force_var_cost (data, e1, depends_on);
3671
3672   if (zero_p (e1))
3673     {
3674       cost = force_var_cost (data, e2, depends_on);
3675       cost += multiply_by_cost (-1, mode);
3676
3677       return cost;
3678     }
3679
3680   cost = force_var_cost (data, e1, depends_on);
3681   cost += force_var_cost (data, e2, depends_on);
3682   cost += add_cost (mode);
3683
3684   return cost;
3685 }
3686
3687 /* Determines the cost of the computation by that USE is expressed
3688    from induction variable CAND.  If ADDRESS_P is true, we just need
3689    to create an address from it, otherwise we want to get it into
3690    register.  A set of invariants we depend on is stored in
3691    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3692
3693 static unsigned
3694 get_computation_cost_at (struct ivopts_data *data,
3695                          struct iv_use *use, struct iv_cand *cand,
3696                          bool address_p, bitmap *depends_on, tree at)
3697 {
3698   tree ubase = use->iv->base, ustep = use->iv->step;
3699   tree cbase, cstep;
3700   tree utype = TREE_TYPE (ubase), ctype;
3701   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3702   HOST_WIDE_INT ratio, aratio;
3703   bool var_present, symbol_present;
3704   unsigned cost = 0, n_sums;
3705
3706   *depends_on = NULL;
3707
3708   /* Only consider real candidates.  */
3709   if (!cand->iv)
3710     return INFTY;
3711
3712   cbase = cand->iv->base;
3713   cstep = cand->iv->step;
3714   ctype = TREE_TYPE (cbase);
3715
3716   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3717     {
3718       /* We do not have a precision to express the values of use.  */
3719       return INFTY;
3720     }
3721
3722   if (address_p)
3723     {
3724       /* Do not try to express address of an object with computation based
3725          on address of a different object.  This may cause problems in rtl
3726          level alias analysis (that does not expect this to be happening,
3727          as this is illegal in C), and would be unlikely to be useful
3728          anyway.  */
3729       if (use->iv->base_object
3730           && cand->iv->base_object
3731           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3732         return INFTY;
3733     }
3734
3735   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3736     {
3737       /* TODO -- add direct handling of this case.  */
3738       goto fallback;
3739     }
3740
3741   /* CSTEPI is removed from the offset in case statement is after the
3742      increment.  If the step is not constant, we use zero instead.
3743      This is a bit imprecise (there is the extra addition), but
3744      redundancy elimination is likely to transform the code so that
3745      it uses value of the variable before increment anyway,
3746      so it is not that much unrealistic.  */
3747   if (cst_and_fits_in_hwi (cstep))
3748     cstepi = int_cst_value (cstep);
3749   else
3750     cstepi = 0;
3751
3752   if (cst_and_fits_in_hwi (ustep)
3753       && cst_and_fits_in_hwi (cstep))
3754     {
3755       ustepi = int_cst_value (ustep);
3756
3757       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3758         return INFTY;
3759     }
3760   else
3761     {
3762       tree rat;
3763
3764       rat = constant_multiple_of (utype, ustep, cstep);
3765
3766       if (!rat)
3767         return INFTY;
3768
3769       if (cst_and_fits_in_hwi (rat))
3770         ratio = int_cst_value (rat);
3771       else if (integer_onep (rat))
3772         ratio = 1;
3773       else if (integer_all_onesp (rat))
3774         ratio = -1;
3775       else
3776         return INFTY;
3777     }
3778
3779   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3780      or ratio == 1, it is better to handle this like
3781
3782      ubase - ratio * cbase + ratio * var
3783
3784      (also holds in the case ratio == -1, TODO.  */
3785
3786   if (cst_and_fits_in_hwi (cbase))
3787     {
3788       offset = - ratio * int_cst_value (cbase);
3789       cost += difference_cost (data,
3790                                ubase, integer_zero_node,
3791                                &symbol_present, &var_present, &offset,
3792                                depends_on);
3793     }
3794   else if (ratio == 1)
3795     {
3796       cost += difference_cost (data,
3797                                ubase, cbase,
3798                                &symbol_present, &var_present, &offset,
3799                                depends_on);
3800     }
3801   else
3802     {
3803       cost += force_var_cost (data, cbase, depends_on);
3804       cost += add_cost (TYPE_MODE (ctype));
3805       cost += difference_cost (data,
3806                                ubase, integer_zero_node,
3807                                &symbol_present, &var_present, &offset,
3808                                depends_on);
3809     }
3810
3811   /* If we are after the increment, the value of the candidate is higher by
3812      one iteration.  */
3813   if (stmt_after_increment (data->current_loop, cand, at))
3814     offset -= ratio * cstepi;
3815
3816   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3817      (symbol/var/const parts may be omitted).  If we are looking for an address,
3818      find the cost of addressing this.  */
3819   if (address_p)
3820     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3821
3822   /* Otherwise estimate the costs for computing the expression.  */
3823   aratio = ratio > 0 ? ratio : -ratio;
3824   if (!symbol_present && !var_present && !offset)
3825     {
3826       if (ratio != 1)
3827         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3828
3829       return cost;
3830     }
3831
3832   if (aratio != 1)
3833     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3834
3835   n_sums = 1;
3836   if (var_present
3837       /* Symbol + offset should be compile-time computable.  */
3838       && (symbol_present || offset))
3839     n_sums++;
3840
3841   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3842
3843 fallback:
3844   {
3845     /* Just get the expression, expand it and measure the cost.  */
3846     tree comp = get_computation_at (data->current_loop, use, cand, at);
3847
3848     if (!comp)
3849       return INFTY;
3850
3851     if (address_p)
3852       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3853
3854     return computation_cost (comp);
3855   }
3856 }
3857
3858 /* Determines the cost of the computation by that USE is expressed
3859    from induction variable CAND.  If ADDRESS_P is true, we just need
3860    to create an address from it, otherwise we want to get it into
3861    register.  A set of invariants we depend on is stored in
3862    DEPENDS_ON.  */
3863
3864 static unsigned
3865 get_computation_cost (struct ivopts_data *data,
3866                       struct iv_use *use, struct iv_cand *cand,
3867                       bool address_p, bitmap *depends_on)
3868 {
3869   return get_computation_cost_at (data,
3870                                   use, cand, address_p, depends_on, use->stmt);
3871 }
3872
3873 /* Determines cost of basing replacement of USE on CAND in a generic
3874    expression.  */
3875
3876 static bool
3877 determine_use_iv_cost_generic (struct ivopts_data *data,
3878                                struct iv_use *use, struct iv_cand *cand)
3879 {
3880   bitmap depends_on;
3881   unsigned cost;
3882
3883   /* The simple case first -- if we need to express value of the preserved
3884      original biv, the cost is 0.  This also prevents us from counting the
3885      cost of increment twice -- once at this use and once in the cost of
3886      the candidate.  */
3887   if (cand->pos == IP_ORIGINAL
3888       && cand->incremented_at == use->stmt)
3889     {
3890       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3891       return true;
3892     }
3893
3894   cost = get_computation_cost (data, use, cand, false, &depends_on);
3895   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3896
3897   return cost != INFTY;
3898 }
3899
3900 /* Determines cost of basing replacement of USE on CAND in an address.  */
3901
3902 static bool
3903 determine_use_iv_cost_address (struct ivopts_data *data,
3904                                struct iv_use *use, struct iv_cand *cand)
3905 {
3906   bitmap depends_on;
3907   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3908
3909   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3910
3911   return cost != INFTY;
3912 }
3913
3914 /* Computes value of induction variable IV in iteration NITER.  */
3915
3916 static tree
3917 iv_value (struct iv *iv, tree niter)
3918 {
3919   tree val;
3920   tree type = TREE_TYPE (iv->base);
3921
3922   niter = fold_convert (type, niter);
3923   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3924
3925   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3926 }
3927
3928 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3929
3930 static tree
3931 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3932 {
3933   tree val = iv_value (cand->iv, niter);
3934   tree type = TREE_TYPE (cand->iv->base);
3935
3936   if (stmt_after_increment (loop, cand, at))
3937     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3938
3939   return val;
3940 }
3941
3942 /* Returns period of induction variable iv.  */
3943
3944 static tree
3945 iv_period (struct iv *iv)
3946 {
3947   tree step = iv->step, period, type;
3948   tree pow2div;
3949
3950   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3951
3952   /* Period of the iv is gcd (step, type range).  Since type range is power
3953      of two, it suffices to determine the maximum power of two that divides
3954      step.  */
3955   pow2div = num_ending_zeros (step);
3956   type = unsigned_type_for (TREE_TYPE (step));
3957
3958   period = build_low_bits_mask (type,
3959                                 (TYPE_PRECISION (type)
3960                                  - tree_low_cst (pow2div, 1)));
3961
3962   return period;
3963 }
3964
3965 /* Returns the comparison operator used when eliminating the iv USE.  */
3966
3967 static enum tree_code
3968 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3969 {
3970   struct loop *loop = data->current_loop;
3971   basic_block ex_bb;
3972   edge exit;
3973
3974   ex_bb = bb_for_stmt (use->stmt);
3975   exit = EDGE_SUCC (ex_bb, 0);
3976   if (flow_bb_inside_loop_p (loop, exit->dest))
3977     exit = EDGE_SUCC (ex_bb, 1);
3978
3979   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3980 }
3981
3982 /* Check whether it is possible to express the condition in USE by comparison
3983    of candidate CAND.  If so, store the value compared with to BOUND.  */
3984
3985 static bool
3986 may_eliminate_iv (struct ivopts_data *data,
3987                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3988 {
3989   basic_block ex_bb;
3990   edge exit;
3991   struct tree_niter_desc *niter;
3992   tree nit, nit_type;
3993   tree wider_type, period, per_type;
3994   struct loop *loop = data->current_loop;
3995
3996   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3997     return false;
3998
3999   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4000      for other conditions inside loop body.  */
4001   ex_bb = bb_for_stmt (use->stmt);
4002   if (use->stmt != last_stmt (ex_bb)
4003       || TREE_CODE (use->stmt) != COND_EXPR)
4004     return false;
4005   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4006     return false;
4007
4008   exit = EDGE_SUCC (ex_bb, 0);
4009   if (flow_bb_inside_loop_p (loop, exit->dest))
4010     exit = EDGE_SUCC (ex_bb, 1);
4011   if (flow_bb_inside_loop_p (loop, exit->dest))
4012     return false;
4013
4014   niter = niter_for_exit (data, exit);
4015   if (!niter
4016       || !zero_p (niter->may_be_zero))
4017     return false;
4018
4019   nit = niter->niter;
4020   nit_type = TREE_TYPE (nit);
4021
4022   /* Determine whether we may use the variable to test whether niter iterations
4023      elapsed.  This is the case iff the period of the induction variable is
4024      greater than the number of iterations.  */
4025   period = iv_period (cand->iv);
4026   if (!period)
4027     return false;
4028   per_type = TREE_TYPE (period);
4029
4030   wider_type = TREE_TYPE (period);
4031   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4032     wider_type = per_type;
4033   else
4034     wider_type = nit_type;
4035
4036   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4037                                       fold_convert (wider_type, period),
4038                                       fold_convert (wider_type, nit))))
4039     return false;
4040
4041   *bound = cand_value_at (loop, cand, use->stmt, nit);
4042   return true;
4043 }
4044
4045 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4046
4047 static bool
4048 determine_use_iv_cost_condition (struct ivopts_data *data,
4049                                  struct iv_use *use, struct iv_cand *cand)
4050 {
4051   tree bound = NULL_TREE, op, cond;
4052   bitmap depends_on = NULL;
4053   unsigned cost;
4054
4055   /* Only consider real candidates.  */
4056   if (!cand->iv)
4057     {
4058       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4059       return false;
4060     }
4061
4062   if (may_eliminate_iv (data, use, cand, &bound))
4063     {
4064       cost = force_var_cost (data, bound, &depends_on);
4065
4066       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4067       return cost != INFTY;
4068     }
4069
4070   /* The induction variable elimination failed; just express the original
4071      giv.  If it is compared with an invariant, note that we cannot get
4072      rid of it.  */
4073   cost = get_computation_cost (data, use, cand, false, &depends_on);
4074
4075   cond = *use->op_p;
4076   if (TREE_CODE (cond) != SSA_NAME)
4077     {
4078       op = TREE_OPERAND (cond, 0);
4079       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4080         op = TREE_OPERAND (cond, 1);
4081       if (TREE_CODE (op) == SSA_NAME)
4082         {
4083           op = get_iv (data, op)->base;
4084           fd_ivopts_data = data;
4085           walk_tree (&op, find_depends, &depends_on, NULL);
4086         }
4087     }
4088
4089   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4090   return cost != INFTY;
4091 }
4092
4093 /* Checks whether it is possible to replace the final value of USE by
4094    a direct computation.  If so, the formula is stored to *VALUE.  */
4095
4096 static bool
4097 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4098                          tree *value)
4099 {
4100   struct loop *loop = data->current_loop;
4101   edge exit;
4102   struct tree_niter_desc *niter;
4103
4104   exit = single_dom_exit (loop);
4105   if (!exit)
4106     return false;
4107
4108   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4109                               bb_for_stmt (use->stmt)));
4110
4111   niter = niter_for_single_dom_exit (data);
4112   if (!niter
4113       || !zero_p (niter->may_be_zero))
4114     return false;
4115
4116   *value = iv_value (use->iv, niter->niter);
4117
4118   return true;
4119 }
4120
4121 /* Determines cost of replacing final value of USE using CAND.  */
4122
4123 static bool
4124 determine_use_iv_cost_outer (struct ivopts_data *data,
4125                              struct iv_use *use, struct iv_cand *cand)
4126 {
4127   bitmap depends_on;
4128   unsigned cost;
4129   edge exit;
4130   tree value = NULL_TREE;
4131   struct loop *loop = data->current_loop;
4132
4133   /* The simple case first -- if we need to express value of the preserved
4134      original biv, the cost is 0.  This also prevents us from counting the
4135      cost of increment twice -- once at this use and once in the cost of
4136      the candidate.  */
4137   if (cand->pos == IP_ORIGINAL
4138       && cand->incremented_at == use->stmt)
4139     {
4140       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4141       return true;
4142     }
4143
4144   if (!cand->iv)
4145     {
4146       if (!may_replace_final_value (data, use, &value))
4147         {
4148           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4149           return false;
4150         }
4151
4152       depends_on = NULL;
4153       cost = force_var_cost (data, value, &depends_on);
4154
4155       cost /= AVG_LOOP_NITER (loop);
4156
4157       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4158       return cost != INFTY;
4159     }
4160
4161   exit = single_dom_exit (loop);
4162   if (exit)
4163     {
4164       /* If there is just a single exit, we may use value of the candidate
4165          after we take it to determine the value of use.  */
4166       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4167                                       last_stmt (exit->src));
4168       if (cost != INFTY)
4169         cost /= AVG_LOOP_NITER (loop);
4170     }
4171   else
4172     {
4173       /* Otherwise we just need to compute the iv.  */
4174       cost = get_computation_cost (data, use, cand, false, &depends_on);
4175     }
4176
4177   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4178
4179   return cost != INFTY;
4180 }
4181
4182 /* Determines cost of basing replacement of USE on CAND.  Returns false
4183    if USE cannot be based on CAND.  */
4184
4185 static bool
4186 determine_use_iv_cost (struct ivopts_data *data,
4187                        struct iv_use *use, struct iv_cand *cand)
4188 {
4189   switch (use->type)
4190     {
4191     case USE_NONLINEAR_EXPR:
4192       return determine_use_iv_cost_generic (data, use, cand);
4193
4194     case USE_OUTER:
4195       return determine_use_iv_cost_outer (data, use, cand);
4196
4197     case USE_ADDRESS:
4198       return determine_use_iv_cost_address (data, use, cand);
4199
4200     case USE_COMPARE:
4201       return determine_use_iv_cost_condition (data, use, cand);
4202
4203     default:
4204       gcc_unreachable ();
4205     }
4206 }
4207
4208 /* Determines costs of basing the use of the iv on an iv candidate.  */
4209
4210 static void
4211 determine_use_iv_costs (struct ivopts_data *data)
4212 {
4213   unsigned i, j;
4214   struct iv_use *use;
4215   struct iv_cand *cand;
4216   bitmap to_clear = BITMAP_ALLOC (NULL);
4217
4218   alloc_use_cost_map (data);
4219
4220   for (i = 0; i < n_iv_uses (data); i++)
4221     {
4222       use = iv_use (data, i);
4223
4224       if (data->consider_all_candidates)
4225         {
4226           for (j = 0; j < n_iv_cands (data); j++)
4227             {
4228               cand = iv_cand (data, j);
4229               determine_use_iv_cost (data, use, cand);
4230             }
4231         }
4232       else
4233         {
4234           bitmap_iterator bi;
4235
4236           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4237             {
4238               cand = iv_cand (data, j);
4239               if (!determine_use_iv_cost (data, use, cand))
4240                 bitmap_set_bit (to_clear, j);
4241             }
4242
4243           /* Remove the candidates for that the cost is infinite from
4244              the list of related candidates.  */
4245           bitmap_and_compl_into (use->related_cands, to_clear);
4246           bitmap_clear (to_clear);
4247         }
4248     }
4249
4250   BITMAP_FREE (to_clear);
4251
4252   if (dump_file && (dump_flags & TDF_DETAILS))
4253     {
4254       fprintf (dump_file, "Use-candidate costs:\n");
4255
4256       for (i = 0; i < n_iv_uses (data); i++)
4257         {
4258           use = iv_use (data, i);
4259
4260           fprintf (dump_file, "Use %d:\n", i);
4261           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4262           for (j = 0; j < use->n_map_members; j++)
4263             {
4264               if (!use->cost_map[j].cand
4265                   || use->cost_map[j].cost == INFTY)
4266                 continue;
4267
4268               fprintf (dump_file, "  %d\t%d\t",
4269                        use->cost_map[j].cand->id,
4270                        use->cost_map[j].cost);
4271               if (use->cost_map[j].depends_on)
4272                 bitmap_print (dump_file,
4273                               use->cost_map[j].depends_on, "","");
4274               fprintf (dump_file, "\n");
4275             }
4276
4277           fprintf (dump_file, "\n");
4278         }
4279       fprintf (dump_file, "\n");
4280     }
4281 }
4282
4283 /* Determines cost of the candidate CAND.  */
4284
4285 static void
4286 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4287 {
4288   unsigned cost_base, cost_step;
4289   tree base;
4290
4291   if (!cand->iv)
4292     {
4293       cand->cost = 0;
4294       return;
4295     }
4296
4297   /* There are two costs associated with the candidate -- its increment
4298      and its initialization.  The second is almost negligible for any loop
4299      that rolls enough, so we take it just very little into account.  */
4300
4301   base = cand->iv->base;
4302   cost_base = force_var_cost (data, base, NULL);
4303   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4304
4305   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4306
4307   /* Prefer the original iv unless we may gain something by replacing it;
4308      this is not really relevant for artificial ivs created by other
4309      passes.  */
4310   if (cand->pos == IP_ORIGINAL
4311       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4312     cand->cost--;
4313
4314   /* Prefer not to insert statements into latch unless there are some
4315      already (so that we do not create unnecessary jumps).  */
4316   if (cand->pos == IP_END
4317       && empty_block_p (ip_end_pos (data->current_loop)))
4318     cand->cost++;
4319 }
4320
4321 /* Determines costs of computation of the candidates.  */
4322
4323 static void
4324 determine_iv_costs (struct ivopts_data *data)
4325 {
4326   unsigned i;
4327
4328   if (dump_file && (dump_flags & TDF_DETAILS))
4329     {
4330       fprintf (dump_file, "Candidate costs:\n");
4331       fprintf (dump_file, "  cand\tcost\n");
4332     }
4333
4334   for (i = 0; i < n_iv_cands (data); i++)
4335     {
4336       struct iv_cand *cand = iv_cand (data, i);
4337
4338       determine_iv_cost (data, cand);
4339
4340       if (dump_file && (dump_flags & TDF_DETAILS))
4341         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4342     }
4343
4344 if (dump_file && (dump_flags & TDF_DETAILS))
4345       fprintf (dump_file, "\n");
4346 }
4347
4348 /* Calculates cost for having SIZE induction variables.  */
4349
4350 static unsigned
4351 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4352 {
4353   return global_cost_for_size (size,
4354                                loop_data (data->current_loop)->regs_used,
4355                                n_iv_uses (data));
4356 }
4357
4358 /* For each size of the induction variable set determine the penalty.  */
4359
4360 static void
4361 determine_set_costs (struct ivopts_data *data)
4362 {
4363   unsigned j, n;
4364   tree phi, op;
4365   struct loop *loop = data->current_loop;
4366   bitmap_iterator bi;
4367
4368   /* We use the following model (definitely improvable, especially the
4369      cost function -- TODO):
4370
4371      We estimate the number of registers available (using MD data), name it A.
4372
4373      We estimate the number of registers used by the loop, name it U.  This
4374      number is obtained as the number of loop phi nodes (not counting virtual
4375      registers and bivs) + the number of variables from outside of the loop.
4376
4377      We set a reserve R (free regs that are used for temporary computations,
4378      etc.).  For now the reserve is a constant 3.
4379
4380      Let I be the number of induction variables.
4381
4382      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4383         make a lot of ivs without a reason).
4384      -- if A - R < U + I <= A, the cost is I * PRES_COST
4385      -- if U + I > A, the cost is I * PRES_COST and
4386         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4387
4388   if (dump_file && (dump_flags & TDF_DETAILS))
4389     {
4390       fprintf (dump_file, "Global costs:\n");
4391       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4392       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4393       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4394       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4395     }
4396
4397   n = 0;
4398   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4399     {
4400       op = PHI_RESULT (phi);
4401
4402       if (!is_gimple_reg (op))
4403         continue;
4404
4405       if (get_iv (data, op))
4406         continue;
4407
4408       n++;
4409     }
4410
4411   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4412     {
4413       struct version_info *info = ver_info (data, j);
4414
4415       if (info->inv_id && info->has_nonlin_use)
4416         n++;
4417     }
4418
4419   loop_data (loop)->regs_used = n;
4420   if (dump_file && (dump_flags & TDF_DETAILS))
4421     fprintf (dump_file, "  regs_used %d\n", n);
4422
4423   if (dump_file && (dump_flags & TDF_DETAILS))
4424     {
4425       fprintf (dump_file, "  cost for size:\n");
4426       fprintf (dump_file, "  ivs\tcost\n");
4427       for (j = 0; j <= 2 * target_avail_regs; j++)
4428         fprintf (dump_file, "  %d\t%d\n", j,
4429                  ivopts_global_cost_for_size (data, j));
4430       fprintf (dump_file, "\n");
4431     }
4432 }
4433
4434 /* Returns true if A is a cheaper cost pair than B.  */
4435
4436 static bool
4437 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4438 {
4439   if (!a)
4440     return false;
4441
4442   if (!b)
4443     return true;
4444
4445   if (a->cost < b->cost)
4446     return true;
4447
4448   if (a->cost > b->cost)
4449     return false;
4450
4451   /* In case the costs are the same, prefer the cheaper candidate.  */
4452   if (a->cand->cost < b->cand->cost)
4453     return true;
4454
4455   return false;
4456 }
4457
4458 /* Computes the cost field of IVS structure.  */
4459
4460 static void
4461 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4462 {
4463   unsigned cost = 0;
4464
4465   cost += ivs->cand_use_cost;
4466   cost += ivs->cand_cost;
4467   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4468
4469   ivs->cost = cost;
4470 }
4471
4472 /* Remove invariants in set INVS to set IVS.  */
4473
4474 static void
4475 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4476 {
4477   bitmap_iterator bi;
4478   unsigned iid;
4479
4480   if (!invs)
4481     return;
4482
4483   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4484     {
4485       ivs->n_invariant_uses[iid]--;
4486       if (ivs->n_invariant_uses[iid] == 0)
4487         ivs->n_regs--;
4488     }
4489 }
4490
4491 /* Set USE not to be expressed by any candidate in IVS.  */
4492
4493 static void
4494 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4495                  struct iv_use *use)
4496 {
4497   unsigned uid = use->id, cid;
4498   struct cost_pair *cp;
4499
4500   cp = ivs->cand_for_use[uid];
4501   if (!cp)
4502     return;
4503   cid = cp->cand->id;
4504
4505   ivs->bad_uses++;
4506   ivs->cand_for_use[uid] = NULL;
4507   ivs->n_cand_uses[cid]--;
4508
4509   if (ivs->n_cand_uses[cid] == 0)
4510     {
4511       bitmap_clear_bit (ivs->cands, cid);
4512       /* Do not count the pseudocandidates.  */
4513       if (cp->cand->iv)
4514         ivs->n_regs--;
4515       ivs->n_cands--;
4516       ivs->cand_cost -= cp->cand->cost;
4517
4518       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4519     }
4520
4521   ivs->cand_use_cost -= cp->cost;
4522
4523   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4524   iv_ca_recount_cost (data, ivs);
4525 }
4526
4527 /* Add invariants in set INVS to set IVS.  */
4528
4529 static void
4530 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4531 {
4532   bitmap_iterator bi;
4533   unsigned iid;
4534
4535   if (!invs)
4536     return;
4537
4538   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4539     {
4540       ivs->n_invariant_uses[iid]++;
4541       if (ivs->n_invariant_uses[iid] == 1)
4542         ivs->n_regs++;
4543     }
4544 }
4545
4546 /* Set cost pair for USE in set IVS to CP.  */
4547
4548 static void
4549 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4550               struct iv_use *use, struct cost_pair *cp)
4551 {
4552   unsigned uid = use->id, cid;
4553
4554   if (ivs->cand_for_use[uid] == cp)
4555     return;
4556
4557   if (ivs->cand_for_use[uid])
4558     iv_ca_set_no_cp (data, ivs, use);
4559
4560   if (cp)
4561     {
4562       cid = cp->cand->id;
4563
4564       ivs->bad_uses--;
4565       ivs->cand_for_use[uid] = cp;
4566       ivs->n_cand_uses[cid]++;
4567       if (ivs->n_cand_uses[cid] == 1)
4568         {
4569           bitmap_set_bit (ivs->cands, cid);
4570           /* Do not count the pseudocandidates.  */
4571           if (cp->cand->iv)
4572             ivs->n_regs++;
4573           ivs->n_cands++;
4574           ivs->cand_cost += cp->cand->cost;
4575
4576           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4577         }
4578
4579       ivs->cand_use_cost += cp->cost;
4580       iv_ca_set_add_invariants (ivs, cp->depends_on);
4581       iv_ca_recount_cost (data, ivs);
4582     }
4583 }
4584
4585 /* Extend set IVS by expressing USE by some of the candidates in it
4586    if possible.  */
4587
4588 static void
4589 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4590                struct iv_use *use)
4591 {
4592   struct cost_pair *best_cp = NULL, *cp;
4593   bitmap_iterator bi;
4594   unsigned i;
4595
4596   gcc_assert (ivs->upto >= use->id);
4597
4598   if (ivs->upto == use->id)
4599     {
4600       ivs->upto++;
4601       ivs->bad_uses++;
4602     }
4603
4604   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4605     {
4606       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4607
4608       if (cheaper_cost_pair (cp, best_cp))
4609         best_cp = cp;
4610     }
4611
4612   iv_ca_set_cp (data, ivs, use, best_cp);
4613 }
4614
4615 /* Get cost for assignment IVS.  */
4616
4617 static unsigned
4618 iv_ca_cost (struct iv_ca *ivs)
4619 {
4620   return (ivs->bad_uses ? INFTY : ivs->cost);
4621 }
4622
4623 /* Returns true if all dependences of CP are among invariants in IVS.  */
4624
4625 static bool
4626 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4627 {
4628   unsigned i;
4629   bitmap_iterator bi;
4630
4631   if (!cp->depends_on)
4632     return true;
4633
4634   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4635     {
4636       if (ivs->n_invariant_uses[i] == 0)
4637         return false;
4638     }
4639
4640   return true;
4641 }
4642
4643 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4644    it before NEXT_CHANGE.  */
4645
4646 static struct iv_ca_delta *
4647 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4648                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4649 {
4650   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4651
4652   change->use = use;
4653   change->old_cp = old_cp;
4654   change->new_cp = new_cp;
4655   change->next_change = next_change;
4656
4657   return change;
4658 }
4659
4660 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4661    are rewritten.  */
4662
4663 static struct iv_ca_delta *
4664 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4665 {
4666   struct iv_ca_delta *last;
4667
4668   if (!l2)
4669     return l1;
4670
4671   if (!l1)
4672     return l2;
4673
4674   for (last = l1; last->next_change; last = last->next_change)
4675     continue;
4676   last->next_change = l2;
4677
4678   return l1;
4679 }
4680
4681 /* Returns candidate by that USE is expressed in IVS.  */
4682
4683 static struct cost_pair *
4684 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4685 {
4686   return ivs->cand_for_use[use->id];
4687 }
4688
4689 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4690
4691 static struct iv_ca_delta *
4692 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4693 {
4694   struct iv_ca_delta *act, *next, *prev = NULL;
4695   struct cost_pair *tmp;
4696
4697   for (act = delta; act; act = next)
4698     {
4699       next = act->next_change;
4700       act->next_change = prev;
4701       prev = act;
4702
4703       tmp = act->old_cp;
4704       act->old_cp = act->new_cp;
4705       act->new_cp = tmp;
4706     }
4707
4708   return prev;
4709 }
4710
4711 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4712    reverted instead.  */
4713
4714 static void
4715 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4716                     struct iv_ca_delta *delta, bool forward)
4717 {
4718   struct cost_pair *from, *to;
4719   struct iv_ca_delta *act;
4720
4721   if (!forward)
4722     delta = iv_ca_delta_reverse (delta);
4723
4724   for (act = delta; act; act = act->next_change)
4725     {
4726       from = act->old_cp;
4727       to = act->new_cp;
4728       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4729       iv_ca_set_cp (data, ivs, act->use, to);
4730     }
4731
4732   if (!forward)
4733     iv_ca_delta_reverse (delta);
4734 }
4735
4736 /* Returns true if CAND is used in IVS.  */
4737
4738 static bool
4739 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4740 {
4741   return ivs->n_cand_uses[cand->id] > 0;
4742 }
4743
4744 /* Returns number of induction variable candidates in the set IVS.  */
4745
4746 static unsigned
4747 iv_ca_n_cands (struct iv_ca *ivs)
4748 {
4749   return ivs->n_cands;
4750 }
4751
4752 /* Free the list of changes DELTA.  */
4753
4754 static void
4755 iv_ca_delta_free (struct iv_ca_delta **delta)
4756 {
4757   struct iv_ca_delta *act, *next;
4758
4759   for (act = *delta; act; act = next)
4760     {
4761       next = act->next_change;
4762       free (act);
4763     }
4764
4765   *delta = NULL;
4766 }
4767
4768 /* Allocates new iv candidates assignment.  */
4769
4770 static struct iv_ca *
4771 iv_ca_new (struct ivopts_data *data)
4772 {
4773   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4774
4775   nw->upto = 0;
4776   nw->bad_uses = 0;
4777   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4778   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4779   nw->cands = BITMAP_ALLOC (NULL);
4780   nw->n_cands = 0;
4781   nw->n_regs = 0;
4782   nw->cand_use_cost = 0;
4783   nw->cand_cost = 0;
4784   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4785   nw->cost = 0;
4786
4787   return nw;
4788 }
4789
4790 /* Free memory occupied by the set IVS.  */
4791
4792 static void
4793 iv_ca_free (struct iv_ca **ivs)
4794 {
4795   free ((*ivs)->cand_for_use);
4796   free ((*ivs)->n_cand_uses);
4797   BITMAP_FREE ((*ivs)->cands);
4798   free ((*ivs)->n_invariant_uses);
4799   free (*ivs);
4800   *ivs = NULL;
4801 }
4802
4803 /* Dumps IVS to FILE.  */
4804
4805 static void
4806 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4807 {
4808   const char *pref = "  invariants ";
4809   unsigned i;
4810
4811   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4812   bitmap_print (file, ivs->cands, "  candidates ","\n");
4813
4814   for (i = 1; i <= data->max_inv_id; i++)
4815     if (ivs->n_invariant_uses[i])
4816       {
4817         fprintf (file, "%s%d", pref, i);
4818         pref = ", ";
4819       }
4820   fprintf (file, "\n");
4821 }
4822
4823 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4824    new set, and store differences in DELTA.  Number of induction variables
4825    in the new set is stored to N_IVS.  */
4826
4827 static unsigned
4828 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4829               struct iv_cand *cand, struct iv_ca_delta **delta,
4830               unsigned *n_ivs)
4831 {
4832   unsigned i, cost;
4833   struct iv_use *use;
4834   struct cost_pair *old_cp, *new_cp;
4835
4836   *delta = NULL;
4837   for (i = 0; i < ivs->upto; i++)
4838     {
4839       use = iv_use (data, i);
4840       old_cp = iv_ca_cand_for_use (ivs, use);
4841
4842       if (old_cp
4843           && old_cp->cand == cand)
4844         continue;
4845
4846       new_cp = get_use_iv_cost (data, use, cand);
4847       if (!new_cp)
4848         continue;
4849
4850       if (!iv_ca_has_deps (ivs, new_cp))
4851         continue;
4852
4853       if (!cheaper_cost_pair (new_cp, old_cp))
4854         continue;
4855
4856       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4857     }
4858
4859   iv_ca_delta_commit (data, ivs, *delta, true);
4860   cost = iv_ca_cost (ivs);
4861   if (n_ivs)
4862     *n_ivs = iv_ca_n_cands (ivs);
4863   iv_ca_delta_commit (data, ivs, *delta, false);
4864
4865   return cost;
4866 }
4867
4868 /* Try narrowing set IVS by removing CAND.  Return the cost of
4869    the new set and store the differences in DELTA.  */
4870
4871 static unsigned
4872 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4873               struct iv_cand *cand, struct iv_ca_delta **delta)
4874 {
4875   unsigned i, ci;
4876   struct iv_use *use;
4877   struct cost_pair *old_cp, *new_cp, *cp;
4878   bitmap_iterator bi;
4879   struct iv_cand *cnd;
4880   unsigned cost;
4881
4882   *delta = NULL;
4883   for (i = 0; i < n_iv_uses (data); i++)
4884     {
4885       use = iv_use (data, i);
4886
4887       old_cp = iv_ca_cand_for_use (ivs, use);
4888       if (old_cp->cand != cand)
4889         continue;
4890
4891       new_cp = NULL;
4892
4893       if (data->consider_all_candidates)
4894         {
4895           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4896             {
4897               if (ci == cand->id)
4898                 continue;
4899
4900               cnd = iv_cand (data, ci);
4901
4902               cp = get_use_iv_cost (data, use, cnd);
4903               if (!cp)
4904                 continue;
4905               if (!iv_ca_has_deps (ivs, cp))
4906                 continue;
4907
4908               if (!cheaper_cost_pair (cp, new_cp))
4909                 continue;
4910
4911               new_cp = cp;
4912             }
4913         }
4914       else
4915         {
4916           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4917             {
4918               if (ci == cand->id)
4919                 continue;
4920
4921               cnd = iv_cand (data, ci);
4922
4923               cp = get_use_iv_cost (data, use, cnd);
4924               if (!cp)
4925                 continue;
4926               if (!iv_ca_has_deps (ivs, cp))
4927                 continue;
4928
4929               if (!cheaper_cost_pair (cp, new_cp))
4930                 continue;
4931
4932               new_cp = cp;
4933             }
4934         }
4935
4936       if (!new_cp)
4937         {
4938           iv_ca_delta_free (delta);
4939           return INFTY;
4940         }
4941
4942       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4943     }
4944
4945   iv_ca_delta_commit (data, ivs, *delta, true);
4946   cost = iv_ca_cost (ivs);
4947   iv_ca_delta_commit (data, ivs, *delta, false);
4948
4949   return cost;
4950 }
4951
4952 /* Try optimizing the set of candidates IVS by removing candidates different
4953    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4954    differences in DELTA.  */
4955
4956 static unsigned
4957 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4958              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4959 {
4960   bitmap_iterator bi;
4961   struct iv_ca_delta *act_delta, *best_delta;
4962   unsigned i, best_cost, acost;
4963   struct iv_cand *cand;
4964
4965   best_delta = NULL;
4966   best_cost = iv_ca_cost (ivs);
4967
4968   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4969     {
4970       cand = iv_cand (data, i);
4971
4972       if (cand == except_cand)
4973         continue;
4974
4975       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4976
4977       if (acost < best_cost)
4978         {
4979           best_cost = acost;
4980           iv_ca_delta_free (&best_delta);
4981           best_delta = act_delta;
4982         }
4983       else
4984         iv_ca_delta_free (&act_delta);
4985     }
4986
4987   if (!best_delta)
4988     {
4989       *delta = NULL;
4990       return best_cost;
4991     }
4992
4993   /* Recurse to possibly remove other unnecessary ivs.  */
4994   iv_ca_delta_commit (data, ivs, best_delta, true);
4995   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4996   iv_ca_delta_commit (data, ivs, best_delta, false);
4997   *delta = iv_ca_delta_join (best_delta, *delta);
4998   return best_cost;
4999 }
5000
5001 /* Tries to extend the sets IVS in the best possible way in order
5002    to express the USE.  */
5003
5004 static bool
5005 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5006                   struct iv_use *use)
5007 {
5008   unsigned best_cost, act_cost;
5009   unsigned i;
5010   bitmap_iterator bi;
5011   struct iv_cand *cand;
5012   struct iv_ca_delta *best_delta = NULL, *act_delta;
5013   struct cost_pair *cp;
5014
5015   iv_ca_add_use (data, ivs, use);
5016   best_cost = iv_ca_cost (ivs);
5017
5018   cp = iv_ca_cand_for_use (ivs, use);
5019   if (cp)
5020     {
5021       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5022       iv_ca_set_no_cp (data, ivs, use);
5023     }
5024
5025   /* First try important candidates.  Only if it fails, try the specific ones.
5026      Rationale -- in loops with many variables the best choice often is to use
5027      just one generic biv.  If we added here many ivs specific to the uses,
5028      the optimization algorithm later would be likely to get stuck in a local
5029      minimum, thus causing us to create too many ivs.  The approach from
5030      few ivs to more seems more likely to be successful -- starting from few
5031      ivs, replacing an expensive use by a specific iv should always be a
5032      win.  */
5033   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5034     {
5035       cand = iv_cand (data, i);
5036
5037       if (iv_ca_cand_used_p (ivs, cand))
5038         continue;
5039
5040       cp = get_use_iv_cost (data, use, cand);
5041       if (!cp)
5042         continue;
5043
5044       iv_ca_set_cp (data, ivs, use, cp);
5045       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5046       iv_ca_set_no_cp (data, ivs, use);
5047       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5048
5049       if (act_cost < best_cost)
5050         {
5051           best_cost = act_cost;
5052
5053           iv_ca_delta_free (&best_delta);
5054           best_delta = act_delta;
5055         }
5056       else
5057         iv_ca_delta_free (&act_delta);
5058     }
5059
5060   if (best_cost == INFTY)
5061     {
5062       for (i = 0; i < use->n_map_members; i++)
5063         {
5064           cp = use->cost_map + i;
5065           cand = cp->cand;
5066           if (!cand)
5067             continue;
5068
5069           /* Already tried this.  */
5070           if (cand->important)
5071             continue;
5072
5073           if (iv_ca_cand_used_p (ivs, cand))
5074             continue;
5075
5076           act_delta = NULL;
5077           iv_ca_set_cp (data, ivs, use, cp);
5078           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5079           iv_ca_set_no_cp (data, ivs, use);
5080           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5081                                        cp, act_delta);
5082
5083           if (act_cost < best_cost)
5084             {
5085               best_cost = act_cost;
5086
5087               if (best_delta)
5088                 iv_ca_delta_free (&best_delta);
5089               best_delta = act_delta;
5090             }
5091           else
5092             iv_ca_delta_free (&act_delta);
5093         }
5094     }
5095
5096   iv_ca_delta_commit (data, ivs, best_delta, true);
5097   iv_ca_delta_free (&best_delta);
5098
5099   return (best_cost != INFTY);
5100 }
5101
5102 /* Finds an initial assignment of candidates to uses.  */
5103
5104 static struct iv_ca *
5105 get_initial_solution (struct ivopts_data *data)
5106 {
5107   struct iv_ca *ivs = iv_ca_new (data);
5108   unsigned i;
5109
5110   for (i = 0; i < n_iv_uses (data); i++)
5111     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5112       {
5113         iv_ca_free (&ivs);
5114         return NULL;
5115       }
5116
5117   return ivs;
5118 }
5119
5120 /* Tries to improve set of induction variables IVS.  */
5121
5122 static bool
5123 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5124 {
5125   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5126   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5127   struct iv_cand *cand;
5128
5129   /* Try extending the set of induction variables by one.  */
5130   for (i = 0; i < n_iv_cands (data); i++)
5131     {
5132       cand = iv_cand (data, i);
5133
5134       if (iv_ca_cand_used_p (ivs, cand))
5135         continue;
5136
5137       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5138       if (!act_delta)
5139         continue;
5140
5141       /* If we successfully added the candidate and the set is small enough,
5142          try optimizing it by removing other candidates.  */
5143       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5144         {
5145           iv_ca_delta_commit (data, ivs, act_delta, true);
5146           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5147           iv_ca_delta_commit (data, ivs, act_delta, false);
5148           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5149         }
5150
5151       if (acost < best_cost)
5152         {
5153           best_cost = acost;
5154           iv_ca_delta_free (&best_delta);
5155           best_delta = act_delta;
5156         }
5157       else
5158         iv_ca_delta_free (&act_delta);
5159     }
5160
5161   if (!best_delta)
5162     {
5163       /* Try removing the candidates from the set instead.  */
5164       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5165
5166       /* Nothing more we can do.  */
5167       if (!best_delta)
5168         return false;
5169     }
5170
5171   iv_ca_delta_commit (data, ivs, best_delta, true);
5172   gcc_assert (best_cost == iv_ca_cost (ivs));
5173   iv_ca_delta_free (&best_delta);
5174   return true;
5175 }
5176
5177 /* Attempts to find the optimal set of induction variables.  We do simple
5178    greedy heuristic -- we try to replace at most one candidate in the selected
5179    solution and remove the unused ivs while this improves the cost.  */
5180
5181 static struct iv_ca *
5182 find_optimal_iv_set (struct ivopts_data *data)
5183 {
5184   unsigned i;
5185   struct iv_ca *set;
5186   struct iv_use *use;
5187
5188   /* Get the initial solution.  */
5189   set = get_initial_solution (data);
5190   if (!set)
5191     {
5192       if (dump_file && (dump_flags & TDF_DETAILS))
5193         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5194       return NULL;
5195     }
5196
5197   if (dump_file && (dump_flags & TDF_DETAILS))
5198     {
5199       fprintf (dump_file, "Initial set of candidates:\n");
5200       iv_ca_dump (data, dump_file, set);
5201     }
5202
5203   while (try_improve_iv_set (data, set))
5204     {
5205       if (dump_file && (dump_flags & TDF_DETAILS))
5206         {
5207           fprintf (dump_file, "Improved to:\n");
5208           iv_ca_dump (data, dump_file, set);
5209         }
5210     }
5211
5212   if (dump_file && (dump_flags & TDF_DETAILS))
5213     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5214
5215   for (i = 0; i < n_iv_uses (data); i++)
5216     {
5217       use = iv_use (data, i);
5218       use->selected = iv_ca_cand_for_use (set, use)->cand;
5219     }
5220
5221   return set;
5222 }
5223
5224 /* Creates a new induction variable corresponding to CAND.  */
5225
5226 static void
5227 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5228 {
5229   block_stmt_iterator incr_pos;
5230   tree base;
5231   bool after = false;
5232
5233   if (!cand->iv)
5234     return;
5235
5236   switch (cand->pos)
5237     {
5238     case IP_NORMAL:
5239       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5240       break;
5241
5242     case IP_END:
5243       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5244       after = true;
5245       break;
5246
5247     case IP_ORIGINAL:
5248       /* Mark that the iv is preserved.  */
5249       name_info (data, cand->var_before)->preserve_biv = true;
5250       name_info (data, cand->var_after)->preserve_biv = true;
5251
5252       /* Rewrite the increment so that it uses var_before directly.  */
5253       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5254
5255       return;
5256     }
5257
5258   gimple_add_tmp_var (cand->var_before);
5259   add_referenced_tmp_var (cand->var_before);
5260
5261   base = unshare_expr (cand->iv->base);
5262
5263   create_iv (base, unshare_expr (cand->iv->step),
5264              cand->var_before, data->current_loop,
5265              &incr_pos, after, &cand->var_before, &cand->var_after);
5266 }
5267
5268 /* Creates new induction variables described in SET.  */
5269
5270 static void
5271 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5272 {
5273   unsigned i;
5274   struct iv_cand *cand;
5275   bitmap_iterator bi;
5276
5277   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5278     {
5279       cand = iv_cand (data, i);
5280       create_new_iv (data, cand);
5281     }
5282 }
5283
5284 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5285    is true, remove also the ssa name defined by the statement.  */
5286
5287 static void
5288 remove_statement (tree stmt, bool including_defined_name)
5289 {
5290   if (TREE_CODE (stmt) == PHI_NODE)
5291     {
5292       if (!including_defined_name)
5293         {
5294           /* Prevent the ssa name defined by the statement from being removed.  */
5295           SET_PHI_RESULT (stmt, NULL);
5296         }
5297       remove_phi_node (stmt, NULL_TREE);
5298     }
5299   else
5300     {
5301       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5302
5303       bsi_remove (&bsi);
5304     }
5305 }
5306
5307 /* Rewrites USE (definition of iv used in a nonlinear expression)
5308    using candidate CAND.  */
5309
5310 static void
5311 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5312                             struct iv_use *use, struct iv_cand *cand)
5313 {
5314   tree comp;
5315   tree op, stmts, tgt, ass;
5316   block_stmt_iterator bsi, pbsi;
5317
5318   /* An important special case -- if we are asked to express value of
5319      the original iv by itself, just exit; there is no need to
5320      introduce a new computation (that might also need casting the
5321      variable to unsigned and back).  */
5322   if (cand->pos == IP_ORIGINAL
5323       && TREE_CODE (use->stmt) == MODIFY_EXPR
5324       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5325     {
5326       op = TREE_OPERAND (use->stmt, 1);
5327
5328       /* Be a bit careful.  In case variable is expressed in some
5329          complicated way, rewrite it so that we may get rid of this
5330          complicated expression.  */
5331       if ((TREE_CODE (op) == PLUS_EXPR
5332            || TREE_CODE (op) == MINUS_EXPR)
5333           && TREE_OPERAND (op, 0) == cand->var_before
5334           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5335         return;
5336     }
5337
5338   comp = get_computation (data->current_loop, use, cand);
5339   switch (TREE_CODE (use->stmt))
5340     {
5341     case PHI_NODE:
5342       tgt = PHI_RESULT (use->stmt);
5343
5344       /* If we should keep the biv, do not replace it.  */
5345       if (name_info (data, tgt)->preserve_biv)
5346         return;
5347
5348       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5349       while (!bsi_end_p (pbsi)
5350              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5351         {
5352           bsi = pbsi;
5353           bsi_next (&pbsi);
5354         }
5355       break;
5356
5357     case MODIFY_EXPR:
5358       tgt = TREE_OPERAND (use->stmt, 0);
5359       bsi = bsi_for_stmt (use->stmt);
5360       break;
5361
5362     default:
5363       gcc_unreachable ();
5364     }
5365
5366   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5367
5368   if (TREE_CODE (use->stmt) == PHI_NODE)
5369     {
5370       if (stmts)
5371         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5372       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5373       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5374       remove_statement (use->stmt, false);
5375       SSA_NAME_DEF_STMT (tgt) = ass;
5376     }
5377   else
5378     {
5379       if (stmts)
5380         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5381       TREE_OPERAND (use->stmt, 1) = op;
5382     }
5383 }
5384
5385 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5386    for_each_index.  */
5387
5388 static bool
5389 idx_remove_ssa_names (tree base, tree *idx,
5390                       void *data ATTRIBUTE_UNUSED)
5391 {
5392   tree *op;
5393
5394   if (TREE_CODE (*idx) == SSA_NAME)
5395     *idx = SSA_NAME_VAR (*idx);
5396
5397   if (TREE_CODE (base) == ARRAY_REF)
5398     {
5399       op = &TREE_OPERAND (base, 2);
5400       if (*op
5401           && TREE_CODE (*op) == SSA_NAME)
5402         *op = SSA_NAME_VAR (*op);
5403       op = &TREE_OPERAND (base, 3);
5404       if (*op
5405           && TREE_CODE (*op) == SSA_NAME)
5406         *op = SSA_NAME_VAR (*op);
5407     }
5408
5409   return true;
5410 }
5411
5412 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5413
5414 static tree
5415 unshare_and_remove_ssa_names (tree ref)
5416 {
5417   ref = unshare_expr (ref);
5418   for_each_index (&ref, idx_remove_ssa_names, NULL);
5419
5420   return ref;
5421 }
5422
5423 /* Extract the alias analysis info for the memory reference REF.  There are
5424    several ways how this information may be stored and what precisely is
5425    its semantics depending on the type of the reference, but there always is
5426    somewhere hidden one _DECL node that is used to determine the set of
5427    virtual operands for the reference.  The code below deciphers this jungle
5428    and extracts this single useful piece of information.  */
5429
5430 static tree
5431 get_ref_tag (tree ref)
5432 {
5433   tree var = get_base_address (ref);
5434   tree tag;
5435
5436   if (!var)
5437     return NULL_TREE;
5438
5439   if (TREE_CODE (var) == INDIRECT_REF)
5440     var = TREE_OPERAND (var, 0);
5441   if (TREE_CODE (var) == SSA_NAME)
5442     {
5443       if (SSA_NAME_PTR_INFO (var))
5444         {
5445           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5446           if (tag)
5447             return tag;
5448         }
5449
5450       var = SSA_NAME_VAR (var);
5451     }
5452
5453   if (DECL_P (var))
5454     {
5455       tag = var_ann (var)->type_mem_tag;
5456       if (tag)
5457         return tag;
5458
5459       return var;
5460     }
5461
5462   return NULL_TREE;
5463 }
5464
5465 /* Copies the reference information from OLD_REF to NEW_REF.  */
5466
5467 static void
5468 copy_ref_info (tree new_ref, tree old_ref)
5469 {
5470   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5471     copy_mem_ref_info (new_ref, old_ref);
5472   else
5473     {
5474       TMR_TAG (new_ref) = get_ref_tag (old_ref);
5475       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5476     }
5477 }
5478
5479 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5480
5481 static void
5482 rewrite_use_address (struct ivopts_data *data,
5483                      struct iv_use *use, struct iv_cand *cand)
5484 {
5485   struct affine_tree_combination aff;
5486   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5487   tree ref;
5488
5489   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5490   unshare_aff_combination (&aff);
5491
5492   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5493   copy_ref_info (ref, *use->op_p);
5494   *use->op_p = ref;
5495 }
5496
5497 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5498    candidate CAND.  */
5499
5500 static void
5501 rewrite_use_compare (struct ivopts_data *data,
5502                      struct iv_use *use, struct iv_cand *cand)
5503 {
5504   tree comp;
5505   tree *op_p, cond, op, stmts, bound;
5506   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5507   enum tree_code compare;
5508   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5509
5510   bound = cp->value;
5511   if (bound)
5512     {
5513       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5514       tree var_type = TREE_TYPE (var);
5515
5516       compare = iv_elimination_compare (data, use);
5517       bound = fold_convert (var_type, bound);
5518       op = force_gimple_operand (unshare_expr (bound), &stmts,
5519                                  true, NULL_TREE);
5520
5521       if (stmts)
5522         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5523
5524       *use->op_p = build2 (compare, boolean_type_node, var, op);
5525       update_stmt (use->stmt);
5526       return;
5527     }
5528
5529   /* The induction variable elimination failed; just express the original
5530      giv.  */
5531   comp = get_computation (data->current_loop, use, cand);
5532
5533   cond = *use->op_p;
5534   op_p = &TREE_OPERAND (cond, 0);
5535   if (TREE_CODE (*op_p) != SSA_NAME
5536       || zero_p (get_iv (data, *op_p)->step))
5537     op_p = &TREE_OPERAND (cond, 1);
5538
5539   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5540   if (stmts)
5541     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5542
5543   *op_p = op;
5544 }
5545
5546 /* Ensure that operand *OP_P may be used at the end of EXIT without
5547    violating loop closed ssa form.  */
5548
5549 static void
5550 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5551 {
5552   basic_block def_bb;
5553   struct loop *def_loop;
5554   tree phi, use;
5555
5556   use = USE_FROM_PTR (op_p);
5557   if (TREE_CODE (use) != SSA_NAME)
5558     return;
5559
5560   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5561   if (!def_bb)
5562     return;
5563
5564   def_loop = def_bb->loop_father;
5565   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5566     return;
5567
5568   /* Try finding a phi node that copies the value out of the loop.  */
5569   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5570     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5571       break;
5572
5573   if (!phi)
5574     {
5575       /* Create such a phi node.  */
5576       tree new_name = duplicate_ssa_name (use, NULL);
5577
5578       phi = create_phi_node (new_name, exit->dest);
5579       SSA_NAME_DEF_STMT (new_name) = phi;
5580       add_phi_arg (phi, use, exit);
5581     }
5582
5583   SET_USE (op_p, PHI_RESULT (phi));
5584 }
5585
5586 /* Ensure that operands of STMT may be used at the end of EXIT without
5587    violating loop closed ssa form.  */
5588
5589 static void
5590 protect_loop_closed_ssa_form (edge exit, tree stmt)
5591 {
5592   ssa_op_iter iter;
5593   use_operand_p use_p;
5594
5595   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5596     protect_loop_closed_ssa_form_use (exit, use_p);
5597 }
5598
5599 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5600    so that they are emitted on the correct place, and so that the loop closed
5601    ssa form is preserved.  */
5602
5603 static void
5604 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5605 {
5606   tree_stmt_iterator tsi;
5607   block_stmt_iterator bsi;
5608   tree phi, stmt, def, next;
5609
5610   if (!single_pred_p (exit->dest))
5611     split_loop_exit_edge (exit);
5612
5613   /* Ensure there is label in exit->dest, so that we can
5614      insert after it.  */
5615   tree_block_label (exit->dest);
5616   bsi = bsi_after_labels (exit->dest);
5617
5618   if (TREE_CODE (stmts) == STATEMENT_LIST)
5619     {
5620       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5621         {
5622           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5623           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5624         }
5625     }
5626   else
5627     {
5628       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5629       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5630     }
5631
5632   if (!op)
5633     return;
5634
5635   for (phi = phi_nodes (exit->dest); phi; phi = next)
5636     {
5637       next = PHI_CHAIN (phi);
5638
5639       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5640         {
5641           def = PHI_RESULT (phi);
5642           remove_statement (phi, false);
5643           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5644                         def, op);
5645           SSA_NAME_DEF_STMT (def) = stmt;
5646           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5647         }
5648     }
5649 }
5650
5651 /* Rewrites the final value of USE (that is only needed outside of the loop)
5652    using candidate CAND.  */
5653
5654 static void
5655 rewrite_use_outer (struct ivopts_data *data,
5656                    struct iv_use *use, struct iv_cand *cand)
5657 {
5658   edge exit;
5659   tree value, op, stmts, tgt;
5660   tree phi;
5661
5662   switch (TREE_CODE (use->stmt))
5663     {
5664     case PHI_NODE:
5665       tgt = PHI_RESULT (use->stmt);
5666       break;
5667     case MODIFY_EXPR:
5668       tgt = TREE_OPERAND (use->stmt, 0);
5669       break;
5670     default:
5671       gcc_unreachable ();
5672     }
5673
5674   exit = single_dom_exit (data->current_loop);
5675
5676   if (exit)
5677     {
5678       if (!cand->iv)
5679         {
5680           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5681           value = unshare_expr (cp->value);
5682         }
5683       else
5684         value = get_computation_at (data->current_loop,
5685                                     use, cand, last_stmt (exit->src));
5686
5687       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5688
5689       /* If we will preserve the iv anyway and we would need to perform
5690          some computation to replace the final value, do nothing.  */
5691       if (stmts && name_info (data, tgt)->preserve_biv)
5692         return;
5693
5694       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5695         {
5696           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5697
5698           if (USE_FROM_PTR (use_p) == tgt)
5699             SET_USE (use_p, op);
5700         }
5701
5702       if (stmts)
5703         compute_phi_arg_on_exit (exit, stmts, op);
5704
5705       /* Enable removal of the statement.  We cannot remove it directly,
5706          since we may still need the aliasing information attached to the
5707          ssa name defined by it.  */
5708       name_info (data, tgt)->iv->have_use_for = false;
5709       return;
5710     }
5711
5712   /* If the variable is going to be preserved anyway, there is nothing to
5713      do.  */
5714   if (name_info (data, tgt)->preserve_biv)
5715     return;
5716
5717   /* Otherwise we just need to compute the iv.  */
5718   rewrite_use_nonlinear_expr (data, use, cand);
5719 }
5720
5721 /* Rewrites USE using candidate CAND.  */
5722
5723 static void
5724 rewrite_use (struct ivopts_data *data,
5725              struct iv_use *use, struct iv_cand *cand)
5726 {
5727   switch (use->type)
5728     {
5729       case USE_NONLINEAR_EXPR:
5730         rewrite_use_nonlinear_expr (data, use, cand);
5731         break;
5732
5733       case USE_OUTER:
5734         rewrite_use_outer (data, use, cand);
5735         break;
5736
5737       case USE_ADDRESS:
5738         rewrite_use_address (data, use, cand);
5739         break;
5740
5741       case USE_COMPARE:
5742         rewrite_use_compare (data, use, cand);
5743         break;
5744
5745       default:
5746         gcc_unreachable ();
5747     }
5748   update_stmt (use->stmt);
5749 }
5750
5751 /* Rewrite the uses using the selected induction variables.  */
5752
5753 static void
5754 rewrite_uses (struct ivopts_data *data)
5755 {
5756   unsigned i;
5757   struct iv_cand *cand;
5758   struct iv_use *use;
5759
5760   for (i = 0; i < n_iv_uses (data); i++)
5761     {
5762       use = iv_use (data, i);
5763       cand = use->selected;
5764       gcc_assert (cand);
5765
5766       rewrite_use (data, use, cand);
5767     }
5768 }
5769
5770 /* Removes the ivs that are not used after rewriting.  */
5771
5772 static void
5773 remove_unused_ivs (struct ivopts_data *data)
5774 {
5775   unsigned j;
5776   bitmap_iterator bi;
5777
5778   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5779     {
5780       struct version_info *info;
5781
5782       info = ver_info (data, j);
5783       if (info->iv
5784           && !zero_p (info->iv->step)
5785           && !info->inv_id
5786           && !info->iv->have_use_for
5787           && !info->preserve_biv)
5788         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5789     }
5790 }
5791
5792 /* Frees data allocated by the optimization of a single loop.  */
5793
5794 static void
5795 free_loop_data (struct ivopts_data *data)
5796 {
5797   unsigned i, j;
5798   bitmap_iterator bi;
5799   tree obj;
5800
5801   htab_empty (data->niters);
5802
5803   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5804     {
5805       struct version_info *info;
5806
5807       info = ver_info (data, i);
5808       if (info->iv)
5809         free (info->iv);
5810       info->iv = NULL;
5811       info->has_nonlin_use = false;
5812       info->preserve_biv = false;
5813       info->inv_id = 0;
5814     }
5815   bitmap_clear (data->relevant);
5816   bitmap_clear (data->important_candidates);
5817
5818   for (i = 0; i < n_iv_uses (data); i++)
5819     {
5820       struct iv_use *use = iv_use (data, i);
5821
5822       free (use->iv);
5823       BITMAP_FREE (use->related_cands);
5824       for (j = 0; j < use->n_map_members; j++)
5825         if (use->cost_map[j].depends_on)
5826           BITMAP_FREE (use->cost_map[j].depends_on);
5827       free (use->cost_map);
5828       free (use);
5829     }
5830   VEC_truncate (iv_use_p, data->iv_uses, 0);
5831
5832   for (i = 0; i < n_iv_cands (data); i++)
5833     {
5834       struct iv_cand *cand = iv_cand (data, i);
5835
5836       if (cand->iv)
5837         free (cand->iv);
5838       if (cand->depends_on)
5839         BITMAP_FREE (cand->depends_on);
5840       free (cand);
5841     }
5842   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5843
5844   if (data->version_info_size < num_ssa_names)
5845     {
5846       data->version_info_size = 2 * num_ssa_names;
5847       free (data->version_info);
5848       data->version_info = xcalloc (data->version_info_size,
5849                                     sizeof (struct version_info));
5850     }
5851
5852   data->max_inv_id = 0;
5853
5854   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5855     SET_DECL_RTL (obj, NULL_RTX);
5856
5857   VEC_truncate (tree, decl_rtl_to_reset, 0);
5858 }
5859
5860 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5861    loop tree.  */
5862
5863 static void
5864 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5865 {
5866   unsigned i;
5867
5868   for (i = 1; i < loops->num; i++)
5869     if (loops->parray[i])
5870       {
5871         free (loops->parray[i]->aux);
5872         loops->parray[i]->aux = NULL;
5873       }
5874
5875   free_loop_data (data);
5876   free (data->version_info);
5877   BITMAP_FREE (data->relevant);
5878   BITMAP_FREE (data->important_candidates);
5879   htab_delete (data->niters);
5880
5881   VEC_free (tree, heap, decl_rtl_to_reset);
5882   VEC_free (iv_use_p, heap, data->iv_uses);
5883   VEC_free (iv_cand_p, heap, data->iv_candidates);
5884 }
5885
5886 /* Optimizes the LOOP.  Returns true if anything changed.  */
5887
5888 static bool
5889 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5890 {
5891   bool changed = false;
5892   struct iv_ca *iv_ca;
5893   edge exit;
5894
5895   data->current_loop = loop;
5896
5897   if (dump_file && (dump_flags & TDF_DETAILS))
5898     {
5899       fprintf (dump_file, "Processing loop %d\n", loop->num);
5900
5901       exit = single_dom_exit (loop);
5902       if (exit)
5903         {
5904           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5905                    exit->src->index, exit->dest->index);
5906           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5907           fprintf (dump_file, "\n");
5908         }
5909
5910       fprintf (dump_file, "\n");
5911     }
5912
5913   /* For each ssa name determines whether it behaves as an induction variable
5914      in some loop.  */
5915   if (!find_induction_variables (data))
5916     goto finish;
5917
5918   /* Finds interesting uses (item 1).  */
5919   find_interesting_uses (data);
5920   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5921     goto finish;
5922
5923   /* Finds candidates for the induction variables (item 2).  */
5924   find_iv_candidates (data);
5925
5926   /* Calculates the costs (item 3, part 1).  */
5927   determine_use_iv_costs (data);
5928   determine_iv_costs (data);
5929   determine_set_costs (data);
5930
5931   /* Find the optimal set of induction variables (item 3, part 2).  */
5932   iv_ca = find_optimal_iv_set (data);
5933   if (!iv_ca)
5934     goto finish;
5935   changed = true;
5936
5937   /* Create the new induction variables (item 4, part 1).  */
5938   create_new_ivs (data, iv_ca);
5939   iv_ca_free (&iv_ca);
5940
5941   /* Rewrite the uses (item 4, part 2).  */
5942   rewrite_uses (data);
5943
5944   /* Remove the ivs that are unused after rewriting.  */
5945   remove_unused_ivs (data);
5946
5947   /* We have changed the structure of induction variables; it might happen
5948      that definitions in the scev database refer to some of them that were
5949      eliminated.  */
5950   scev_reset ();
5951
5952 finish:
5953   free_loop_data (data);
5954
5955   return changed;
5956 }
5957
5958 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5959
5960 void
5961 tree_ssa_iv_optimize (struct loops *loops)
5962 {
5963   struct loop *loop;
5964   struct ivopts_data data;
5965
5966   tree_ssa_iv_optimize_init (loops, &data);
5967
5968   /* Optimize the loops starting with the innermost ones.  */
5969   loop = loops->tree_root;
5970   while (loop->inner)
5971     loop = loop->inner;
5972
5973   /* Scan the loops, inner ones first.  */
5974   while (loop != loops->tree_root)
5975     {
5976       if (dump_file && (dump_flags & TDF_DETAILS))
5977         flow_loop_dump (loop, dump_file, NULL, 1);
5978
5979       tree_ssa_iv_optimize_loop (&data, loop);
5980
5981       if (loop->next)
5982         {
5983           loop = loop->next;
5984           while (loop->inner)
5985             loop = loop->inner;
5986         }
5987       else
5988         loop = loop->outer;
5989     }
5990
5991   tree_ssa_iv_optimize_finalize (loops, &data);
5992 }