gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter,
 716                                                      true);
 717       *slot = nfe_desc;
 718     }
 719   else
 720     nfe_desc = *slot;
 721
 722   if (!nfe_desc->valid_p)
 723     return NULL;
 724
 725   return &nfe_desc->niter;
 726 }
 727
 728 /* Returns structure describing number of iterations determined from
 729    single dominating exit of DATA->current_loop, or NULL if something
 730    goes wrong.  */
 731
 732 static struct tree_niter_desc *
 733 niter_for_single_dom_exit (struct ivopts_data *data)
 734 {
 735   edge exit = single_dom_exit (data->current_loop);
 736
 737   if (!exit)
 738     return NULL;
 739
 740   return niter_for_exit (data, exit);
 741 }
 742
 743 /* Initializes data structures used by the iv optimization pass, stored
 744    in DATA.  LOOPS is the loop tree.  */
 745
 746 static void
 747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 748 {
 749   unsigned i;
 750
 751   data->version_info_size = 2 * num_ssa_names;
 752   data->version_info = xcalloc (data->version_info_size,
 753                                 sizeof (struct version_info));
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 758
 759   for (i = 1; i < loops->num; i++)
 760     if (loops->parray[i])
 761       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 762
 763   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 764   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 765   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 766 }
 767
 768 /* Returns a memory object to that EXPR points.  In case we are able to
 769    determine that it does not point to any such object, NULL is returned.  */
 770
 771 static tree
 772 determine_base_object (tree expr)
 773 {
 774   enum tree_code code = TREE_CODE (expr);
 775   tree base, obj, op0, op1;
 776
 777   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 778     return NULL_TREE;
 779
 780   switch (code)
 781     {
 782     case INTEGER_CST:
 783       return NULL_TREE;
 784
 785     case ADDR_EXPR:
 786       obj = TREE_OPERAND (expr, 0);
 787       base = get_base_address (obj);
 788
 789       if (!base)
 790         return expr;
 791
 792       if (TREE_CODE (base) == INDIRECT_REF)
 793         return determine_base_object (TREE_OPERAND (base, 0));
 794
 795       return fold_convert (ptr_type_node,
 796                            build_fold_addr_expr (base));
 797
 798     case PLUS_EXPR:
 799     case MINUS_EXPR:
 800       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 801       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 802
 803       if (!op1)
 804         return op0;
 805
 806       if (!op0)
 807         return (code == PLUS_EXPR
 808                 ? op1
 809                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 810
 811       return fold_build2 (code, ptr_type_node, op0, op1);
 812
 813     case NOP_EXPR:
 814     case CONVERT_EXPR:
 815       return determine_base_object (TREE_OPERAND (expr, 0));
 816
 817     default:
 818       return fold_convert (ptr_type_node, expr);
 819     }
 820 }
 821
 822 /* Allocates an induction variable with given initial value BASE and step STEP
 823    for loop LOOP.  */
 824
 825 static struct iv *
 826 alloc_iv (tree base, tree step)
 827 {
 828   struct iv *iv = xcalloc (1, sizeof (struct iv));
 829
 830   if (step && integer_zerop (step))
 831     step = NULL_TREE;
 832
 833   iv->base = base;
 834   iv->base_object = determine_base_object (base);
 835   iv->step = step;
 836   iv->biv_p = false;
 837   iv->have_use_for = false;
 838   iv->use_id = 0;
 839   iv->ssa_name = NULL_TREE;
 840
 841   return iv;
 842 }
 843
 844 /* Sets STEP and BASE for induction variable IV.  */
 845
 846 static void
 847 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 848 {
 849   struct version_info *info = name_info (data, iv);
 850
 851   gcc_assert (!info->iv);
 852
 853   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 854   info->iv = alloc_iv (base, step);
 855   info->iv->ssa_name = iv;
 856 }
 857
 858 /* Finds induction variable declaration for VAR.  */
 859
 860 static struct iv *
 861 get_iv (struct ivopts_data *data, tree var)
 862 {
 863   basic_block bb;
 864
 865   if (!name_info (data, var)->iv)
 866     {
 867       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 868
 869       if (!bb
 870           || !flow_bb_inside_loop_p (data->current_loop, bb))
 871         set_iv (data, var, var, NULL_TREE);
 872     }
 873
 874   return name_info (data, var)->iv;
 875 }
 876
 877 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 878    not define a simple affine biv with nonzero step.  */
 879
 880 static tree
 881 determine_biv_step (tree phi)
 882 {
 883   struct loop *loop = bb_for_stmt (phi)->loop_father;
 884   tree name = PHI_RESULT (phi), base, step;
 885
 886   if (!is_gimple_reg (name))
 887     return NULL_TREE;
 888
 889   if (!simple_iv (loop, phi, name, &base, &step, true))
 890     return NULL_TREE;
 891
 892   if (zero_p (step))
 893     return NULL_TREE;
 894
 895   return step;
 896 }
 897
 898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 899
 900 static bool
 901 abnormal_ssa_name_p (tree exp)
 902 {
 903   if (!exp)
 904     return false;
 905
 906   if (TREE_CODE (exp) != SSA_NAME)
 907     return false;
 908
 909   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 910 }
 911
 912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 913    abnormal phi node.  Callback for for_each_index.  */
 914
 915 static bool
 916 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 917                                   void *data ATTRIBUTE_UNUSED)
 918 {
 919   if (TREE_CODE (base) == ARRAY_REF)
 920     {
 921       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 922         return false;
 923       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 924         return false;
 925     }
 926
 927   return !abnormal_ssa_name_p (*index);
 928 }
 929
 930 /* Returns true if EXPR contains a ssa name that occurs in an
 931    abnormal phi node.  */
 932
 933 static bool
 934 contains_abnormal_ssa_name_p (tree expr)
 935 {
 936   enum tree_code code;
 937   enum tree_code_class class;
 938
 939   if (!expr)
 940     return false;
 941
 942   code = TREE_CODE (expr);
 943   class = TREE_CODE_CLASS (code);
 944
 945   if (code == SSA_NAME)
 946     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 947
 948   if (code == INTEGER_CST
 949       || is_gimple_min_invariant (expr))
 950     return false;
 951
 952   if (code == ADDR_EXPR)
 953     return !for_each_index (&TREE_OPERAND (expr, 0),
 954                             idx_contains_abnormal_ssa_name_p,
 955                             NULL);
 956
 957   switch (class)
 958     {
 959     case tcc_binary:
 960     case tcc_comparison:
 961       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 962         return true;
 963
 964       /* Fallthru.  */
 965     case tcc_unary:
 966       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 967         return true;
 968
 969       break;
 970
 971     default:
 972       gcc_unreachable ();
 973     }
 974
 975   return false;
 976 }
 977
 978 /* Finds basic ivs.  */
 979
 980 static bool
 981 find_bivs (struct ivopts_data *data)
 982 {
 983   tree phi, step, type, base;
 984   bool found = false;
 985   struct loop *loop = data->current_loop;
 986
 987   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 988     {
 989       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 990         continue;
 991
 992       step = determine_biv_step (phi);
 993       if (!step)
 994         continue;
 995
 996       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 997       base = expand_simple_operations (base);
 998       if (contains_abnormal_ssa_name_p (base)
 999           || contains_abnormal_ssa_name_p (step))
1000         continue;
1001
1002       type = TREE_TYPE (PHI_RESULT (phi));
1003       base = fold_convert (type, base);
1004       if (step)
1005         step = fold_convert (type, step);
1006
1007       set_iv (data, PHI_RESULT (phi), base, step);
1008       found = true;
1009     }
1010
1011   return found;
1012 }
1013
1014 /* Marks basic ivs.  */
1015
1016 static void
1017 mark_bivs (struct ivopts_data *data)
1018 {
1019   tree phi, var;
1020   struct iv *iv, *incr_iv;
1021   struct loop *loop = data->current_loop;
1022   basic_block incr_bb;
1023
1024   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1025     {
1026       iv = get_iv (data, PHI_RESULT (phi));
1027       if (!iv)
1028         continue;
1029
1030       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1031       incr_iv = get_iv (data, var);
1032       if (!incr_iv)
1033         continue;
1034
1035       /* If the increment is in the subloop, ignore it.  */
1036       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1037       if (incr_bb->loop_father != data->current_loop
1038           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1039         continue;
1040
1041       iv->biv_p = true;
1042       incr_iv->biv_p = true;
1043     }
1044 }
1045
1046 /* Checks whether STMT defines a linear induction variable and stores its
1047    parameters to BASE and STEP.  */
1048
1049 static bool
1050 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1051                         tree *base, tree *step)
1052 {
1053   tree lhs;
1054   struct loop *loop = data->current_loop;
1055
1056   *base = NULL_TREE;
1057   *step = NULL_TREE;
1058
1059   if (TREE_CODE (stmt) != MODIFY_EXPR)
1060     return false;
1061
1062   lhs = TREE_OPERAND (stmt, 0);
1063   if (TREE_CODE (lhs) != SSA_NAME)
1064     return false;
1065
1066   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1067     return false;
1068   *base = expand_simple_operations (*base);
1069
1070   if (contains_abnormal_ssa_name_p (*base)
1071       || contains_abnormal_ssa_name_p (*step))
1072     return false;
1073
1074   return true;
1075 }
1076
1077 /* Finds general ivs in statement STMT.  */
1078
1079 static void
1080 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1081 {
1082   tree base, step;
1083
1084   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1085     return;
1086
1087   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1088 }
1089
1090 /* Finds general ivs in basic block BB.  */
1091
1092 static void
1093 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1094 {
1095   block_stmt_iterator bsi;
1096
1097   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1098     find_givs_in_stmt (data, bsi_stmt (bsi));
1099 }
1100
1101 /* Finds general ivs.  */
1102
1103 static void
1104 find_givs (struct ivopts_data *data)
1105 {
1106   struct loop *loop = data->current_loop;
1107   basic_block *body = get_loop_body_in_dom_order (loop);
1108   unsigned i;
1109
1110   for (i = 0; i < loop->num_nodes; i++)
1111     find_givs_in_bb (data, body[i]);
1112   free (body);
1113 }
1114
1115 /* For each ssa name defined in LOOP determines whether it is an induction
1116    variable and if so, its initial value and step.  */
1117
1118 static bool
1119 find_induction_variables (struct ivopts_data *data)
1120 {
1121   unsigned i;
1122   bitmap_iterator bi;
1123
1124   if (!find_bivs (data))
1125     return false;
1126
1127   find_givs (data);
1128   mark_bivs (data);
1129
1130   if (dump_file && (dump_flags & TDF_DETAILS))
1131     {
1132       struct tree_niter_desc *niter;
1133
1134       niter = niter_for_single_dom_exit (data);
1135
1136       if (niter)
1137         {
1138           fprintf (dump_file, "  number of iterations ");
1139           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1140           fprintf (dump_file, "\n");
1141
1142           fprintf (dump_file, "  may be zero if ");
1143           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1144           fprintf (dump_file, "\n");
1145           fprintf (dump_file, "\n");
1146         };
1147
1148       fprintf (dump_file, "Induction variables:\n\n");
1149
1150       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1151         {
1152           if (ver_info (data, i)->iv)
1153             dump_iv (dump_file, ver_info (data, i)->iv);
1154         }
1155     }
1156
1157   return true;
1158 }
1159
1160 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1161
1162 static struct iv_use *
1163 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1164             tree stmt, enum use_type use_type)
1165 {
1166   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1167
1168   use->id = n_iv_uses (data);
1169   use->type = use_type;
1170   use->iv = iv;
1171   use->stmt = stmt;
1172   use->op_p = use_p;
1173   use->related_cands = BITMAP_ALLOC (NULL);
1174
1175   /* To avoid showing ssa name in the dumps, if it was not reset by the
1176      caller.  */
1177   iv->ssa_name = NULL_TREE;
1178
1179   if (dump_file && (dump_flags & TDF_DETAILS))
1180     dump_use (dump_file, use);
1181
1182   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1183
1184   return use;
1185 }
1186
1187 /* Checks whether OP is a loop-level invariant and if so, records it.
1188    NONLINEAR_USE is true if the invariant is used in a way we do not
1189    handle specially.  */
1190
1191 static void
1192 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1193 {
1194   basic_block bb;
1195   struct version_info *info;
1196
1197   if (TREE_CODE (op) != SSA_NAME
1198       || !is_gimple_reg (op))
1199     return;
1200
1201   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1202   if (bb
1203       && flow_bb_inside_loop_p (data->current_loop, bb))
1204     return;
1205
1206   info = name_info (data, op);
1207   info->name = op;
1208   info->has_nonlin_use |= nonlinear_use;
1209   if (!info->inv_id)
1210     info->inv_id = ++data->max_inv_id;
1211   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1212 }
1213
1214 /* Checks whether the use OP is interesting and if so, records it
1215    as TYPE.  */
1216
1217 static struct iv_use *
1218 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1219                                        enum use_type type)
1220 {
1221   struct iv *iv;
1222   struct iv *civ;
1223   tree stmt;
1224   struct iv_use *use;
1225
1226   if (TREE_CODE (op) != SSA_NAME)
1227     return NULL;
1228
1229   iv = get_iv (data, op);
1230   if (!iv)
1231     return NULL;
1232
1233   if (iv->have_use_for)
1234     {
1235       use = iv_use (data, iv->use_id);
1236
1237       gcc_assert (use->type == USE_NONLINEAR_EXPR
1238                   || use->type == USE_OUTER);
1239
1240       if (type == USE_NONLINEAR_EXPR)
1241         use->type = USE_NONLINEAR_EXPR;
1242       return use;
1243     }
1244
1245   if (zero_p (iv->step))
1246     {
1247       record_invariant (data, op, true);
1248       return NULL;
1249     }
1250   iv->have_use_for = true;
1251
1252   civ = xmalloc (sizeof (struct iv));
1253   *civ = *iv;
1254
1255   stmt = SSA_NAME_DEF_STMT (op);
1256   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1257               || TREE_CODE (stmt) == MODIFY_EXPR);
1258
1259   use = record_use (data, NULL, civ, stmt, type);
1260   iv->use_id = use->id;
1261
1262   return use;
1263 }
1264
1265 /* Checks whether the use OP is interesting and if so, records it.  */
1266
1267 static struct iv_use *
1268 find_interesting_uses_op (struct ivopts_data *data, tree op)
1269 {
1270   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1271 }
1272
1273 /* Records a definition of induction variable OP that is used outside of the
1274    loop.  */
1275
1276 static struct iv_use *
1277 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1278 {
1279   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1280 }
1281
1282 /* Checks whether the condition *COND_P in STMT is interesting
1283    and if so, records it.  */
1284
1285 static void
1286 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1287 {
1288   tree *op0_p;
1289   tree *op1_p;
1290   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1291   struct iv const_iv;
1292   tree zero = integer_zero_node;
1293
1294   const_iv.step = NULL_TREE;
1295
1296   if (TREE_CODE (*cond_p) != SSA_NAME
1297       && !COMPARISON_CLASS_P (*cond_p))
1298     return;
1299
1300   if (TREE_CODE (*cond_p) == SSA_NAME)
1301     {
1302       op0_p = cond_p;
1303       op1_p = &zero;
1304     }
1305   else
1306     {
1307       op0_p = &TREE_OPERAND (*cond_p, 0);
1308       op1_p = &TREE_OPERAND (*cond_p, 1);
1309     }
1310
1311   if (TREE_CODE (*op0_p) == SSA_NAME)
1312     iv0 = get_iv (data, *op0_p);
1313   else
1314     iv0 = &const_iv;
1315
1316   if (TREE_CODE (*op1_p) == SSA_NAME)
1317     iv1 = get_iv (data, *op1_p);
1318   else
1319     iv1 = &const_iv;
1320
1321   if (/* When comparing with non-invariant value, we may not do any senseful
1322          induction variable elimination.  */
1323       (!iv0 || !iv1)
1324       /* Eliminating condition based on two ivs would be nontrivial.
1325          ??? TODO -- it is not really important to handle this case.  */
1326       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1327     {
1328       find_interesting_uses_op (data, *op0_p);
1329       find_interesting_uses_op (data, *op1_p);
1330       return;
1331     }
1332
1333   if (zero_p (iv0->step) && zero_p (iv1->step))
1334     {
1335       /* If both are invariants, this is a work for unswitching.  */
1336       return;
1337     }
1338
1339   civ = xmalloc (sizeof (struct iv));
1340   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1341   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1342 }
1343
1344 /* Returns true if expression EXPR is obviously invariant in LOOP,
1345    i.e. if all its operands are defined outside of the LOOP.  */
1346
1347 bool
1348 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1349 {
1350   basic_block def_bb;
1351   unsigned i, len;
1352
1353   if (is_gimple_min_invariant (expr))
1354     return true;
1355
1356   if (TREE_CODE (expr) == SSA_NAME)
1357     {
1358       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1359       if (def_bb
1360           && flow_bb_inside_loop_p (loop, def_bb))
1361         return false;
1362
1363       return true;
1364     }
1365
1366   if (!EXPR_P (expr))
1367     return false;
1368
1369   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1370   for (i = 0; i < len; i++)
1371     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1372       return false;
1373
1374   return true;
1375 }
1376
1377 /* Cumulates the steps of indices into DATA and replaces their values with the
1378    initial ones.  Returns false when the value of the index cannot be determined.
1379    Callback for for_each_index.  */
1380
1381 struct ifs_ivopts_data
1382 {
1383   struct ivopts_data *ivopts_data;
1384   tree stmt;
1385   tree *step_p;
1386 };
1387
1388 static bool
1389 idx_find_step (tree base, tree *idx, void *data)
1390 {
1391   struct ifs_ivopts_data *dta = data;
1392   struct iv *iv;
1393   tree step, iv_step, lbound, off;
1394   struct loop *loop = dta->ivopts_data->current_loop;
1395
1396   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1397       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1398     return false;
1399
1400   /* If base is a component ref, require that the offset of the reference
1401      be invariant.  */
1402   if (TREE_CODE (base) == COMPONENT_REF)
1403     {
1404       off = component_ref_field_offset (base);
1405       return expr_invariant_in_loop_p (loop, off);
1406     }
1407
1408   /* If base is array, first check whether we will be able to move the
1409      reference out of the loop (in order to take its address in strength
1410      reduction).  In order for this to work we need both lower bound
1411      and step to be loop invariants.  */
1412   if (TREE_CODE (base) == ARRAY_REF)
1413     {
1414       step = array_ref_element_size (base);
1415       lbound = array_ref_low_bound (base);
1416
1417       if (!expr_invariant_in_loop_p (loop, step)
1418           || !expr_invariant_in_loop_p (loop, lbound))
1419         return false;
1420     }
1421
1422   if (TREE_CODE (*idx) != SSA_NAME)
1423     return true;
1424
1425   iv = get_iv (dta->ivopts_data, *idx);
1426   if (!iv)
1427     return false;
1428
1429   *idx = iv->base;
1430
1431   if (!iv->step)
1432     return true;
1433
1434   if (TREE_CODE (base) == ARRAY_REF)
1435     {
1436       step = array_ref_element_size (base);
1437
1438       /* We only handle addresses whose step is an integer constant.  */
1439       if (TREE_CODE (step) != INTEGER_CST)
1440         return false;
1441     }
1442   else
1443     /* The step for pointer arithmetics already is 1 byte.  */
1444     step = build_int_cst (sizetype, 1);
1445
1446   /* FIXME: convert_step should not be used outside chrec_convert: fix
1447      this by calling chrec_convert.  */
1448   iv_step = convert_step (dta->ivopts_data->current_loop,
1449                           sizetype, iv->base, iv->step, dta->stmt);
1450
1451   if (!iv_step)
1452     {
1453       /* The index might wrap.  */
1454       return false;
1455     }
1456
1457   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1458
1459   if (!*dta->step_p)
1460     *dta->step_p = step;
1461   else
1462     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1463
1464   return true;
1465 }
1466
1467 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1468    object is passed to it in DATA.  */
1469
1470 static bool
1471 idx_record_use (tree base, tree *idx,
1472                 void *data)
1473 {
1474   find_interesting_uses_op (data, *idx);
1475   if (TREE_CODE (base) == ARRAY_REF)
1476     {
1477       find_interesting_uses_op (data, array_ref_element_size (base));
1478       find_interesting_uses_op (data, array_ref_low_bound (base));
1479     }
1480   return true;
1481 }
1482
1483 /* Returns true if memory reference REF may be unaligned.  */
1484
1485 static bool
1486 may_be_unaligned_p (tree ref)
1487 {
1488   tree base;
1489   tree base_type;
1490   HOST_WIDE_INT bitsize;
1491   HOST_WIDE_INT bitpos;
1492   tree toffset;
1493   enum machine_mode mode;
1494   int unsignedp, volatilep;
1495   unsigned base_align;
1496
1497   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1498      thus they are not misaligned.  */
1499   if (TREE_CODE (ref) == TARGET_MEM_REF)
1500     return false;
1501
1502   /* The test below is basically copy of what expr.c:normal_inner_ref
1503      does to check whether the object must be loaded by parts when
1504      STRICT_ALIGNMENT is true.  */
1505   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1506                               &unsignedp, &volatilep, true);
1507   base_type = TREE_TYPE (base);
1508   base_align = TYPE_ALIGN (base_type);
1509
1510   if (mode != BLKmode
1511       && (base_align < GET_MODE_ALIGNMENT (mode)
1512           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1513           || bitpos % BITS_PER_UNIT != 0))
1514     return true;
1515
1516   return false;
1517 }
1518
1519 /* Finds addresses in *OP_P inside STMT.  */
1520
1521 static void
1522 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1523 {
1524   tree base = *op_p, step = NULL;
1525   struct iv *civ;
1526   struct ifs_ivopts_data ifs_ivopts_data;
1527
1528   /* Do not play with volatile memory references.  A bit too conservative,
1529      perhaps, but safe.  */
1530   if (stmt_ann (stmt)->has_volatile_ops)
1531     goto fail;
1532
1533   /* Ignore bitfields for now.  Not really something terribly complicated
1534      to handle.  TODO.  */
1535   if (TREE_CODE (base) == COMPONENT_REF
1536       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1537     goto fail;
1538
1539   if (STRICT_ALIGNMENT
1540       && may_be_unaligned_p (base))
1541     goto fail;
1542
1543   base = unshare_expr (base);
1544
1545   if (TREE_CODE (base) == TARGET_MEM_REF)
1546     {
1547       tree type = build_pointer_type (TREE_TYPE (base));
1548       tree astep;
1549
1550       if (TMR_BASE (base)
1551           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1552         {
1553           civ = get_iv (data, TMR_BASE (base));
1554           if (!civ)
1555             goto fail;
1556
1557           TMR_BASE (base) = civ->base;
1558           step = civ->step;
1559         }
1560       if (TMR_INDEX (base)
1561           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1562         {
1563           civ = get_iv (data, TMR_INDEX (base));
1564           if (!civ)
1565             goto fail;
1566
1567           TMR_INDEX (base) = civ->base;
1568           astep = civ->step;
1569
1570           if (astep)
1571             {
1572               if (TMR_STEP (base))
1573                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1574
1575               if (step)
1576                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1577               else
1578                 step = astep;
1579             }
1580         }
1581
1582       if (zero_p (step))
1583         goto fail;
1584       base = tree_mem_ref_addr (type, base);
1585     }
1586   else
1587     {
1588       ifs_ivopts_data.ivopts_data = data;
1589       ifs_ivopts_data.stmt = stmt;
1590       ifs_ivopts_data.step_p = &step;
1591       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1592           || zero_p (step))
1593         goto fail;
1594
1595       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1596       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1597
1598       base = build_fold_addr_expr (base);
1599     }
1600
1601   civ = alloc_iv (base, step);
1602   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1603   return;
1604
1605 fail:
1606   for_each_index (op_p, idx_record_use, data);
1607 }
1608
1609 /* Finds and records invariants used in STMT.  */
1610
1611 static void
1612 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1613 {
1614   ssa_op_iter iter;
1615   use_operand_p use_p;
1616   tree op;
1617
1618   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1619     {
1620       op = USE_FROM_PTR (use_p);
1621       record_invariant (data, op, false);
1622     }
1623 }
1624
1625 /* Finds interesting uses of induction variables in the statement STMT.  */
1626
1627 static void
1628 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1629 {
1630   struct iv *iv;
1631   tree op, lhs, rhs;
1632   ssa_op_iter iter;
1633   use_operand_p use_p;
1634
1635   find_invariants_stmt (data, stmt);
1636
1637   if (TREE_CODE (stmt) == COND_EXPR)
1638     {
1639       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1640       return;
1641     }
1642
1643   if (TREE_CODE (stmt) == MODIFY_EXPR)
1644     {
1645       lhs = TREE_OPERAND (stmt, 0);
1646       rhs = TREE_OPERAND (stmt, 1);
1647
1648       if (TREE_CODE (lhs) == SSA_NAME)
1649         {
1650           /* If the statement defines an induction variable, the uses are not
1651              interesting by themselves.  */
1652
1653           iv = get_iv (data, lhs);
1654
1655           if (iv && !zero_p (iv->step))
1656             return;
1657         }
1658
1659       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1660         {
1661         case tcc_comparison:
1662           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1663           return;
1664
1665         case tcc_reference:
1666           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1667           if (REFERENCE_CLASS_P (lhs))
1668             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1669           return;
1670
1671         default: ;
1672         }
1673
1674       if (REFERENCE_CLASS_P (lhs)
1675           && is_gimple_val (rhs))
1676         {
1677           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1678           find_interesting_uses_op (data, rhs);
1679           return;
1680         }
1681
1682       /* TODO -- we should also handle address uses of type
1683
1684          memory = call (whatever);
1685
1686          and
1687
1688          call (memory).  */
1689     }
1690
1691   if (TREE_CODE (stmt) == PHI_NODE
1692       && bb_for_stmt (stmt) == data->current_loop->header)
1693     {
1694       lhs = PHI_RESULT (stmt);
1695       iv = get_iv (data, lhs);
1696
1697       if (iv && !zero_p (iv->step))
1698         return;
1699     }
1700
1701   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1702     {
1703       op = USE_FROM_PTR (use_p);
1704
1705       if (TREE_CODE (op) != SSA_NAME)
1706         continue;
1707
1708       iv = get_iv (data, op);
1709       if (!iv)
1710         continue;
1711
1712       find_interesting_uses_op (data, op);
1713     }
1714 }
1715
1716 /* Finds interesting uses of induction variables outside of loops
1717    on loop exit edge EXIT.  */
1718
1719 static void
1720 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1721 {
1722   tree phi, def;
1723
1724   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1725     {
1726       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1727       find_interesting_uses_outer (data, def);
1728     }
1729 }
1730
1731 /* Finds uses of the induction variables that are interesting.  */
1732
1733 static void
1734 find_interesting_uses (struct ivopts_data *data)
1735 {
1736   basic_block bb;
1737   block_stmt_iterator bsi;
1738   tree phi;
1739   basic_block *body = get_loop_body (data->current_loop);
1740   unsigned i;
1741   struct version_info *info;
1742   edge e;
1743
1744   if (dump_file && (dump_flags & TDF_DETAILS))
1745     fprintf (dump_file, "Uses:\n\n");
1746
1747   for (i = 0; i < data->current_loop->num_nodes; i++)
1748     {
1749       edge_iterator ei;
1750       bb = body[i];
1751
1752       FOR_EACH_EDGE (e, ei, bb->succs)
1753         if (e->dest != EXIT_BLOCK_PTR
1754             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1755           find_interesting_uses_outside (data, e);
1756
1757       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1758         find_interesting_uses_stmt (data, phi);
1759       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1760         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1761     }
1762
1763   if (dump_file && (dump_flags & TDF_DETAILS))
1764     {
1765       bitmap_iterator bi;
1766
1767       fprintf (dump_file, "\n");
1768
1769       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1770         {
1771           info = ver_info (data, i);
1772           if (info->inv_id)
1773             {
1774               fprintf (dump_file, "  ");
1775               print_generic_expr (dump_file, info->name, TDF_SLIM);
1776               fprintf (dump_file, " is invariant (%d)%s\n",
1777                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1778             }
1779         }
1780
1781       fprintf (dump_file, "\n");
1782     }
1783
1784   free (body);
1785 }
1786
1787 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1788    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1789    we are at the top-level of the processed address.  */
1790
1791 static tree
1792 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1793                 unsigned HOST_WIDE_INT *offset)
1794 {
1795   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1796   enum tree_code code;
1797   tree type, orig_type = TREE_TYPE (expr);
1798   unsigned HOST_WIDE_INT off0, off1, st;
1799   tree orig_expr = expr;
1800
1801   STRIP_NOPS (expr);
1802
1803   type = TREE_TYPE (expr);
1804   code = TREE_CODE (expr);
1805   *offset = 0;
1806
1807   switch (code)
1808     {
1809     case INTEGER_CST:
1810       if (!cst_and_fits_in_hwi (expr)
1811           || zero_p (expr))
1812         return orig_expr;
1813
1814       *offset = int_cst_value (expr);
1815       return build_int_cst_type (orig_type, 0);
1816
1817     case PLUS_EXPR:
1818     case MINUS_EXPR:
1819       op0 = TREE_OPERAND (expr, 0);
1820       op1 = TREE_OPERAND (expr, 1);
1821
1822       op0 = strip_offset_1 (op0, false, false, &off0);
1823       op1 = strip_offset_1 (op1, false, false, &off1);
1824
1825       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1826       if (op0 == TREE_OPERAND (expr, 0)
1827           && op1 == TREE_OPERAND (expr, 1))
1828         return orig_expr;
1829
1830       if (zero_p (op1))
1831         expr = op0;
1832       else if (zero_p (op0))
1833         {
1834           if (code == PLUS_EXPR)
1835             expr = op1;
1836           else
1837             expr = fold_build1 (NEGATE_EXPR, type, op1);
1838         }
1839       else
1840         expr = fold_build2 (code, type, op0, op1);
1841
1842       return fold_convert (orig_type, expr);
1843
1844     case ARRAY_REF:
1845       if (!inside_addr)
1846         return orig_expr;
1847
1848       step = array_ref_element_size (expr);
1849       if (!cst_and_fits_in_hwi (step))
1850         break;
1851
1852       st = int_cst_value (step);
1853       op1 = TREE_OPERAND (expr, 1);
1854       op1 = strip_offset_1 (op1, false, false, &off1);
1855       *offset = off1 * st;
1856
1857       if (top_compref
1858           && zero_p (op1))
1859         {
1860           /* Strip the component reference completely.  */
1861           op0 = TREE_OPERAND (expr, 0);
1862           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1863           *offset += off0;
1864           return op0;
1865         }
1866       break;
1867
1868     case COMPONENT_REF:
1869       if (!inside_addr)
1870         return orig_expr;
1871
1872       tmp = component_ref_field_offset (expr);
1873       if (top_compref
1874           && cst_and_fits_in_hwi (tmp))
1875         {
1876           /* Strip the component reference completely.  */
1877           op0 = TREE_OPERAND (expr, 0);
1878           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1879           *offset = off0 + int_cst_value (tmp);
1880           return op0;
1881         }
1882       break;
1883
1884     case ADDR_EXPR:
1885       op0 = TREE_OPERAND (expr, 0);
1886       op0 = strip_offset_1 (op0, true, true, &off0);
1887       *offset += off0;
1888
1889       if (op0 == TREE_OPERAND (expr, 0))
1890         return orig_expr;
1891
1892       expr = build_fold_addr_expr (op0);
1893       return fold_convert (orig_type, expr);
1894
1895     case INDIRECT_REF:
1896       inside_addr = false;
1897       break;
1898
1899     default:
1900       return orig_expr;
1901     }
1902
1903   /* Default handling of expressions for that we want to recurse into
1904      the first operand.  */
1905   op0 = TREE_OPERAND (expr, 0);
1906   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1907   *offset += off0;
1908
1909   if (op0 == TREE_OPERAND (expr, 0)
1910       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1911     return orig_expr;
1912
1913   expr = copy_node (expr);
1914   TREE_OPERAND (expr, 0) = op0;
1915   if (op1)
1916     TREE_OPERAND (expr, 1) = op1;
1917
1918   /* Inside address, we might strip the top level component references,
1919      thus changing type of the expression.  Handling of ADDR_EXPR
1920      will fix that.  */
1921   expr = fold_convert (orig_type, expr);
1922
1923   return expr;
1924 }
1925
1926 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1927
1928 static tree
1929 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1930 {
1931   return strip_offset_1 (expr, false, false, offset);
1932 }
1933
1934 /* Returns variant of TYPE that can be used as base for different uses.
1935    For integer types, we return unsigned variant of the type, which
1936    avoids problems with overflows.  For pointer types, we return void *.  */
1937
1938 static tree
1939 generic_type_for (tree type)
1940 {
1941   if (POINTER_TYPE_P (type))
1942     return ptr_type_node;
1943
1944   if (TYPE_UNSIGNED (type))
1945     return type;
1946
1947   return unsigned_type_for (type);
1948 }
1949
1950 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1951    the bitmap to that we should store it.  */
1952
1953 static struct ivopts_data *fd_ivopts_data;
1954 static tree
1955 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1956 {
1957   bitmap *depends_on = data;
1958   struct version_info *info;
1959
1960   if (TREE_CODE (*expr_p) != SSA_NAME)
1961     return NULL_TREE;
1962   info = name_info (fd_ivopts_data, *expr_p);
1963
1964   if (!info->inv_id || info->has_nonlin_use)
1965     return NULL_TREE;
1966
1967   if (!*depends_on)
1968     *depends_on = BITMAP_ALLOC (NULL);
1969   bitmap_set_bit (*depends_on, info->inv_id);
1970
1971   return NULL_TREE;
1972 }
1973
1974 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1975    position to POS.  If USE is not NULL, the candidate is set as related to
1976    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1977    replacement of the final value of the iv by a direct computation.  */
1978
1979 static struct iv_cand *
1980 add_candidate_1 (struct ivopts_data *data,
1981                  tree base, tree step, bool important, enum iv_position pos,
1982                  struct iv_use *use, tree incremented_at)
1983 {
1984   unsigned i;
1985   struct iv_cand *cand = NULL;
1986   tree type, orig_type;
1987
1988   if (base)
1989     {
1990       orig_type = TREE_TYPE (base);
1991       type = generic_type_for (orig_type);
1992       if (type != orig_type)
1993         {
1994           base = fold_convert (type, base);
1995           if (step)
1996             step = fold_convert (type, step);
1997         }
1998     }
1999
2000   for (i = 0; i < n_iv_cands (data); i++)
2001     {
2002       cand = iv_cand (data, i);
2003
2004       if (cand->pos != pos)
2005         continue;
2006
2007       if (cand->incremented_at != incremented_at)
2008         continue;
2009
2010       if (!cand->iv)
2011         {
2012           if (!base && !step)
2013             break;
2014
2015           continue;
2016         }
2017
2018       if (!base && !step)
2019         continue;
2020
2021       if (!operand_equal_p (base, cand->iv->base, 0))
2022         continue;
2023
2024       if (zero_p (cand->iv->step))
2025         {
2026           if (zero_p (step))
2027             break;
2028         }
2029       else
2030         {
2031           if (step && operand_equal_p (step, cand->iv->step, 0))
2032             break;
2033         }
2034     }
2035
2036   if (i == n_iv_cands (data))
2037     {
2038       cand = xcalloc (1, sizeof (struct iv_cand));
2039       cand->id = i;
2040
2041       if (!base && !step)
2042         cand->iv = NULL;
2043       else
2044         cand->iv = alloc_iv (base, step);
2045
2046       cand->pos = pos;
2047       if (pos != IP_ORIGINAL && cand->iv)
2048         {
2049           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2050           cand->var_after = cand->var_before;
2051         }
2052       cand->important = important;
2053       cand->incremented_at = incremented_at;
2054       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2055
2056       if (step
2057           && TREE_CODE (step) != INTEGER_CST)
2058         {
2059           fd_ivopts_data = data;
2060           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2061         }
2062
2063       if (dump_file && (dump_flags & TDF_DETAILS))
2064         dump_cand (dump_file, cand);
2065     }
2066
2067   if (important && !cand->important)
2068     {
2069       cand->important = true;
2070       if (dump_file && (dump_flags & TDF_DETAILS))
2071         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2072     }
2073
2074   if (use)
2075     {
2076       bitmap_set_bit (use->related_cands, i);
2077       if (dump_file && (dump_flags & TDF_DETAILS))
2078         fprintf (dump_file, "Candidate %d is related to use %d\n",
2079                  cand->id, use->id);
2080     }
2081
2082   return cand;
2083 }
2084
2085 /* Returns true if incrementing the induction variable at the end of the LOOP
2086    is allowed.
2087
2088    The purpose is to avoid splitting latch edge with a biv increment, thus
2089    creating a jump, possibly confusing other optimization passes and leaving
2090    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2091    is not available (so we do not have a better alternative), or if the latch
2092    edge is already nonempty.  */
2093
2094 static bool
2095 allow_ip_end_pos_p (struct loop *loop)
2096 {
2097   if (!ip_normal_pos (loop))
2098     return true;
2099
2100   if (!empty_block_p (ip_end_pos (loop)))
2101     return true;
2102
2103   return false;
2104 }
2105
2106 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2107    position to POS.  If USE is not NULL, the candidate is set as related to
2108    it.  The candidate computation is scheduled on all available positions.  */
2109
2110 static void
2111 add_candidate (struct ivopts_data *data,
2112                tree base, tree step, bool important, struct iv_use *use)
2113 {
2114   if (ip_normal_pos (data->current_loop))
2115     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2116   if (ip_end_pos (data->current_loop)
2117       && allow_ip_end_pos_p (data->current_loop))
2118     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2119 }
2120
2121 /* Add a standard "0 + 1 * iteration" iv candidate for a
2122    type with SIZE bits.  */
2123
2124 static void
2125 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2126                                      unsigned int size)
2127 {
2128   tree type = lang_hooks.types.type_for_size (size, true);
2129   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2130                  true, NULL);
2131 }
2132
2133 /* Adds standard iv candidates.  */
2134
2135 static void
2136 add_standard_iv_candidates (struct ivopts_data *data)
2137 {
2138   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2139
2140   /* The same for a double-integer type if it is still fast enough.  */
2141   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2142     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2143 }
2144
2145
2146 /* Adds candidates bases on the old induction variable IV.  */
2147
2148 static void
2149 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2150 {
2151   tree phi, def;
2152   struct iv_cand *cand;
2153
2154   add_candidate (data, iv->base, iv->step, true, NULL);
2155
2156   /* The same, but with initial value zero.  */
2157   add_candidate (data,
2158                  build_int_cst (TREE_TYPE (iv->base), 0),
2159                  iv->step, true, NULL);
2160
2161   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2162   if (TREE_CODE (phi) == PHI_NODE)
2163     {
2164       /* Additionally record the possibility of leaving the original iv
2165          untouched.  */
2166       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2167       cand = add_candidate_1 (data,
2168                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2169                               SSA_NAME_DEF_STMT (def));
2170       cand->var_before = iv->ssa_name;
2171       cand->var_after = def;
2172     }
2173 }
2174
2175 /* Adds candidates based on the old induction variables.  */
2176
2177 static void
2178 add_old_ivs_candidates (struct ivopts_data *data)
2179 {
2180   unsigned i;
2181   struct iv *iv;
2182   bitmap_iterator bi;
2183
2184   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2185     {
2186       iv = ver_info (data, i)->iv;
2187       if (iv && iv->biv_p && !zero_p (iv->step))
2188         add_old_iv_candidates (data, iv);
2189     }
2190 }
2191
2192 /* Adds candidates based on the value of the induction variable IV and USE.  */
2193
2194 static void
2195 add_iv_value_candidates (struct ivopts_data *data,
2196                          struct iv *iv, struct iv_use *use)
2197 {
2198   unsigned HOST_WIDE_INT offset;
2199   tree base;
2200
2201   add_candidate (data, iv->base, iv->step, false, use);
2202
2203   /* The same, but with initial value zero.  Make such variable important,
2204      since it is generic enough so that possibly many uses may be based
2205      on it.  */
2206   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2207                  iv->step, true, use);
2208
2209   /* Third, try removing the constant offset.  */
2210   base = strip_offset (iv->base, &offset);
2211   if (offset)
2212     add_candidate (data, base, iv->step, false, use);
2213 }
2214
2215 /* Possibly adds pseudocandidate for replacing the final value of USE by
2216    a direct computation.  */
2217
2218 static void
2219 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2220 {
2221   struct tree_niter_desc *niter;
2222
2223   /* We must know where we exit the loop and how many times does it roll.  */
2224   niter = niter_for_single_dom_exit (data);
2225   if (!niter
2226       || !zero_p (niter->may_be_zero))
2227     return;
2228
2229   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2230 }
2231
2232 /* Adds candidates based on the uses.  */
2233
2234 static void
2235 add_derived_ivs_candidates (struct ivopts_data *data)
2236 {
2237   unsigned i;
2238
2239   for (i = 0; i < n_iv_uses (data); i++)
2240     {
2241       struct iv_use *use = iv_use (data, i);
2242
2243       if (!use)
2244         continue;
2245
2246       switch (use->type)
2247         {
2248         case USE_NONLINEAR_EXPR:
2249         case USE_COMPARE:
2250         case USE_ADDRESS:
2251           /* Just add the ivs based on the value of the iv used here.  */
2252           add_iv_value_candidates (data, use->iv, use);
2253           break;
2254
2255         case USE_OUTER:
2256           add_iv_value_candidates (data, use->iv, use);
2257
2258           /* Additionally, add the pseudocandidate for the possibility to
2259              replace the final value by a direct computation.  */
2260           add_iv_outer_candidates (data, use);
2261           break;
2262
2263         default:
2264           gcc_unreachable ();
2265         }
2266     }
2267 }
2268
2269 /* Record important candidates and add them to related_cands bitmaps
2270    if needed.  */
2271
2272 static void
2273 record_important_candidates (struct ivopts_data *data)
2274 {
2275   unsigned i;
2276   struct iv_use *use;
2277
2278   for (i = 0; i < n_iv_cands (data); i++)
2279     {
2280       struct iv_cand *cand = iv_cand (data, i);
2281
2282       if (cand->important)
2283         bitmap_set_bit (data->important_candidates, i);
2284     }
2285
2286   data->consider_all_candidates = (n_iv_cands (data)
2287                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2288
2289   if (data->consider_all_candidates)
2290     {
2291       /* We will not need "related_cands" bitmaps in this case,
2292          so release them to decrease peak memory consumption.  */
2293       for (i = 0; i < n_iv_uses (data); i++)
2294         {
2295           use = iv_use (data, i);
2296           BITMAP_FREE (use->related_cands);
2297         }
2298     }
2299   else
2300     {
2301       /* Add important candidates to the related_cands bitmaps.  */
2302       for (i = 0; i < n_iv_uses (data); i++)
2303         bitmap_ior_into (iv_use (data, i)->related_cands,
2304                          data->important_candidates);
2305     }
2306 }
2307
2308 /* Finds the candidates for the induction variables.  */
2309
2310 static void
2311 find_iv_candidates (struct ivopts_data *data)
2312 {
2313   /* Add commonly used ivs.  */
2314   add_standard_iv_candidates (data);
2315
2316   /* Add old induction variables.  */
2317   add_old_ivs_candidates (data);
2318
2319   /* Add induction variables derived from uses.  */
2320   add_derived_ivs_candidates (data);
2321
2322   /* Record the important candidates.  */
2323   record_important_candidates (data);
2324 }
2325
2326 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2327    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2328    we allocate a simple list to every use.  */
2329
2330 static void
2331 alloc_use_cost_map (struct ivopts_data *data)
2332 {
2333   unsigned i, size, s, j;
2334
2335   for (i = 0; i < n_iv_uses (data); i++)
2336     {
2337       struct iv_use *use = iv_use (data, i);
2338       bitmap_iterator bi;
2339
2340       if (data->consider_all_candidates)
2341         size = n_iv_cands (data);
2342       else
2343         {
2344           s = 0;
2345           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2346             {
2347               s++;
2348             }
2349
2350           /* Round up to the power of two, so that moduling by it is fast.  */
2351           for (size = 1; size < s; size <<= 1)
2352             continue;
2353         }
2354
2355       use->n_map_members = size;
2356       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2357     }
2358 }
2359
2360 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2361    on invariants DEPENDS_ON and that the value used in expressing it
2362    is VALUE.*/
2363
2364 static void
2365 set_use_iv_cost (struct ivopts_data *data,
2366                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2367                  bitmap depends_on, tree value)
2368 {
2369   unsigned i, s;
2370
2371   if (cost == INFTY)
2372     {
2373       BITMAP_FREE (depends_on);
2374       return;
2375     }
2376
2377   if (data->consider_all_candidates)
2378     {
2379       use->cost_map[cand->id].cand = cand;
2380       use->cost_map[cand->id].cost = cost;
2381       use->cost_map[cand->id].depends_on = depends_on;
2382       use->cost_map[cand->id].value = value;
2383       return;
2384     }
2385
2386   /* n_map_members is a power of two, so this computes modulo.  */
2387   s = cand->id & (use->n_map_members - 1);
2388   for (i = s; i < use->n_map_members; i++)
2389     if (!use->cost_map[i].cand)
2390       goto found;
2391   for (i = 0; i < s; i++)
2392     if (!use->cost_map[i].cand)
2393       goto found;
2394
2395   gcc_unreachable ();
2396
2397 found:
2398   use->cost_map[i].cand = cand;
2399   use->cost_map[i].cost = cost;
2400   use->cost_map[i].depends_on = depends_on;
2401   use->cost_map[i].value = value;
2402 }
2403
2404 /* Gets cost of (USE, CANDIDATE) pair.  */
2405
2406 static struct cost_pair *
2407 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2408                  struct iv_cand *cand)
2409 {
2410   unsigned i, s;
2411   struct cost_pair *ret;
2412
2413   if (!cand)
2414     return NULL;
2415
2416   if (data->consider_all_candidates)
2417     {
2418       ret = use->cost_map + cand->id;
2419       if (!ret->cand)
2420         return NULL;
2421
2422       return ret;
2423     }
2424
2425   /* n_map_members is a power of two, so this computes modulo.  */
2426   s = cand->id & (use->n_map_members - 1);
2427   for (i = s; i < use->n_map_members; i++)
2428     if (use->cost_map[i].cand == cand)
2429       return use->cost_map + i;
2430
2431   for (i = 0; i < s; i++)
2432     if (use->cost_map[i].cand == cand)
2433       return use->cost_map + i;
2434
2435   return NULL;
2436 }
2437
2438 /* Returns estimate on cost of computing SEQ.  */
2439
2440 static unsigned
2441 seq_cost (rtx seq)
2442 {
2443   unsigned cost = 0;
2444   rtx set;
2445
2446   for (; seq; seq = NEXT_INSN (seq))
2447     {
2448       set = single_set (seq);
2449       if (set)
2450         cost += rtx_cost (set, SET);
2451       else
2452         cost++;
2453     }
2454
2455   return cost;
2456 }
2457
2458 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2459 static rtx
2460 produce_memory_decl_rtl (tree obj, int *regno)
2461 {
2462   rtx x;
2463
2464   gcc_assert (obj);
2465   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2466     {
2467       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2468       x = gen_rtx_SYMBOL_REF (Pmode, name);
2469     }
2470   else
2471     x = gen_raw_REG (Pmode, (*regno)++);
2472
2473   return gen_rtx_MEM (DECL_MODE (obj), x);
2474 }
2475
2476 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2477    walk_tree.  DATA contains the actual fake register number.  */
2478
2479 static tree
2480 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2481 {
2482   tree obj = NULL_TREE;
2483   rtx x = NULL_RTX;
2484   int *regno = data;
2485
2486   switch (TREE_CODE (*expr_p))
2487     {
2488     case ADDR_EXPR:
2489       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2490            handled_component_p (*expr_p);
2491            expr_p = &TREE_OPERAND (*expr_p, 0))
2492         continue;
2493       obj = *expr_p;
2494       if (DECL_P (obj))
2495         x = produce_memory_decl_rtl (obj, regno);
2496       break;
2497
2498     case SSA_NAME:
2499       *ws = 0;
2500       obj = SSA_NAME_VAR (*expr_p);
2501       if (!DECL_RTL_SET_P (obj))
2502         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2503       break;
2504
2505     case VAR_DECL:
2506     case PARM_DECL:
2507     case RESULT_DECL:
2508       *ws = 0;
2509       obj = *expr_p;
2510
2511       if (DECL_RTL_SET_P (obj))
2512         break;
2513
2514       if (DECL_MODE (obj) == BLKmode)
2515         x = produce_memory_decl_rtl (obj, regno);
2516       else
2517         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2518
2519       break;
2520
2521     default:
2522       break;
2523     }
2524
2525   if (x)
2526     {
2527       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2528       SET_DECL_RTL (obj, x);
2529     }
2530
2531   return NULL_TREE;
2532 }
2533
2534 /* Determines cost of the computation of EXPR.  */
2535
2536 static unsigned
2537 computation_cost (tree expr)
2538 {
2539   rtx seq, rslt;
2540   tree type = TREE_TYPE (expr);
2541   unsigned cost;
2542   /* Avoid using hard regs in ways which may be unsupported.  */
2543   int regno = LAST_VIRTUAL_REGISTER + 1;
2544
2545   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2546   start_sequence ();
2547   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2548   seq = get_insns ();
2549   end_sequence ();
2550
2551   cost = seq_cost (seq);
2552   if (MEM_P (rslt))
2553     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2554
2555   return cost;
2556 }
2557
2558 /* Returns variable containing the value of candidate CAND at statement AT.  */
2559
2560 static tree
2561 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2562 {
2563   if (stmt_after_increment (loop, cand, stmt))
2564     return cand->var_after;
2565   else
2566     return cand->var_before;
2567 }
2568
2569 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2570    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2571
2572 int
2573 tree_int_cst_sign_bit (tree t)
2574 {
2575   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2576   unsigned HOST_WIDE_INT w;
2577
2578   if (bitno < HOST_BITS_PER_WIDE_INT)
2579     w = TREE_INT_CST_LOW (t);
2580   else
2581     {
2582       w = TREE_INT_CST_HIGH (t);
2583       bitno -= HOST_BITS_PER_WIDE_INT;
2584     }
2585
2586   return (w >> bitno) & 1;
2587 }
2588
2589 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2590    return cst.  Otherwise return NULL_TREE.  */
2591
2592 static tree
2593 constant_multiple_of (tree type, tree top, tree bot)
2594 {
2595   tree res, mby, p0, p1;
2596   enum tree_code code;
2597   bool negate;
2598
2599   STRIP_NOPS (top);
2600   STRIP_NOPS (bot);
2601
2602   if (operand_equal_p (top, bot, 0))
2603     return build_int_cst (type, 1);
2604
2605   code = TREE_CODE (top);
2606   switch (code)
2607     {
2608     case MULT_EXPR:
2609       mby = TREE_OPERAND (top, 1);
2610       if (TREE_CODE (mby) != INTEGER_CST)
2611         return NULL_TREE;
2612
2613       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2614       if (!res)
2615         return NULL_TREE;
2616
2617       return fold_binary_to_constant (MULT_EXPR, type, res,
2618                                       fold_convert (type, mby));
2619
2620     case PLUS_EXPR:
2621     case MINUS_EXPR:
2622       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2623       if (!p0)
2624         return NULL_TREE;
2625       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2626       if (!p1)
2627         return NULL_TREE;
2628
2629       return fold_binary_to_constant (code, type, p0, p1);
2630
2631     case INTEGER_CST:
2632       if (TREE_CODE (bot) != INTEGER_CST)
2633         return NULL_TREE;
2634
2635       bot = fold_convert (type, bot);
2636       top = fold_convert (type, top);
2637
2638       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2639          the result afterwards.  */
2640       if (tree_int_cst_sign_bit (bot))
2641         {
2642           negate = true;
2643           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2644         }
2645       else
2646         negate = false;
2647
2648       /* Ditto for TOP.  */
2649       if (tree_int_cst_sign_bit (top))
2650         {
2651           negate = !negate;
2652           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2653         }
2654
2655       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2656         return NULL_TREE;
2657
2658       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2659       if (negate)
2660         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2661       return res;
2662
2663     default:
2664       return NULL_TREE;
2665     }
2666 }
2667
2668 /* Sets COMB to CST.  */
2669
2670 static void
2671 aff_combination_const (struct affine_tree_combination *comb, tree type,
2672                        unsigned HOST_WIDE_INT cst)
2673 {
2674   unsigned prec = TYPE_PRECISION (type);
2675
2676   comb->type = type;
2677   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2678
2679   comb->n = 0;
2680   comb->rest = NULL_TREE;
2681   comb->offset = cst & comb->mask;
2682 }
2683
2684 /* Sets COMB to single element ELT.  */
2685
2686 static void
2687 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2688 {
2689   unsigned prec = TYPE_PRECISION (type);
2690
2691   comb->type = type;
2692   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2693
2694   comb->n = 1;
2695   comb->elts[0] = elt;
2696   comb->coefs[0] = 1;
2697   comb->rest = NULL_TREE;
2698   comb->offset = 0;
2699 }
2700
2701 /* Scales COMB by SCALE.  */
2702
2703 static void
2704 aff_combination_scale (struct affine_tree_combination *comb,
2705                        unsigned HOST_WIDE_INT scale)
2706 {
2707   unsigned i, j;
2708
2709   if (scale == 1)
2710     return;
2711
2712   if (scale == 0)
2713     {
2714       aff_combination_const (comb, comb->type, 0);
2715       return;
2716     }
2717
2718   comb->offset = (scale * comb->offset) & comb->mask;
2719   for (i = 0, j = 0; i < comb->n; i++)
2720     {
2721       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2722       comb->elts[j] = comb->elts[i];
2723       if (comb->coefs[j] != 0)
2724         j++;
2725     }
2726   comb->n = j;
2727
2728   if (comb->rest)
2729     {
2730       if (comb->n < MAX_AFF_ELTS)
2731         {
2732           comb->coefs[comb->n] = scale;
2733           comb->elts[comb->n] = comb->rest;
2734           comb->rest = NULL_TREE;
2735           comb->n++;
2736         }
2737       else
2738         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2739                                   build_int_cst_type (comb->type, scale));
2740     }
2741 }
2742
2743 /* Adds ELT * SCALE to COMB.  */
2744
2745 static void
2746 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2747                          unsigned HOST_WIDE_INT scale)
2748 {
2749   unsigned i;
2750
2751   if (scale == 0)
2752     return;
2753
2754   for (i = 0; i < comb->n; i++)
2755     if (operand_equal_p (comb->elts[i], elt, 0))
2756       {
2757         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2758         if (comb->coefs[i])
2759           return;
2760
2761         comb->n--;
2762         comb->coefs[i] = comb->coefs[comb->n];
2763         comb->elts[i] = comb->elts[comb->n];
2764
2765         if (comb->rest)
2766           {
2767             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2768             comb->coefs[comb->n] = 1;
2769             comb->elts[comb->n] = comb->rest;
2770             comb->rest = NULL_TREE;
2771             comb->n++;
2772           }
2773         return;
2774       }
2775   if (comb->n < MAX_AFF_ELTS)
2776     {
2777       comb->coefs[comb->n] = scale;
2778       comb->elts[comb->n] = elt;
2779       comb->n++;
2780       return;
2781     }
2782
2783   if (scale == 1)
2784     elt = fold_convert (comb->type, elt);
2785   else
2786     elt = fold_build2 (MULT_EXPR, comb->type,
2787                        fold_convert (comb->type, elt),
2788                        build_int_cst_type (comb->type, scale));
2789
2790   if (comb->rest)
2791     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2792   else
2793     comb->rest = elt;
2794 }
2795
2796 /* Adds COMB2 to COMB1.  */
2797
2798 static void
2799 aff_combination_add (struct affine_tree_combination *comb1,
2800                      struct affine_tree_combination *comb2)
2801 {
2802   unsigned i;
2803
2804   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2805   for (i = 0; i < comb2->n; i++)
2806     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2807   if (comb2->rest)
2808     aff_combination_add_elt (comb1, comb2->rest, 1);
2809 }
2810
2811 /* Splits EXPR into an affine combination of parts.  */
2812
2813 static void
2814 tree_to_aff_combination (tree expr, tree type,
2815                          struct affine_tree_combination *comb)
2816 {
2817   struct affine_tree_combination tmp;
2818   enum tree_code code;
2819   tree cst, core, toffset;
2820   HOST_WIDE_INT bitpos, bitsize;
2821   enum machine_mode mode;
2822   int unsignedp, volatilep;
2823
2824   STRIP_NOPS (expr);
2825
2826   code = TREE_CODE (expr);
2827   switch (code)
2828     {
2829     case INTEGER_CST:
2830       aff_combination_const (comb, type, int_cst_value (expr));
2831       return;
2832
2833     case PLUS_EXPR:
2834     case MINUS_EXPR:
2835       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2836       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2837       if (code == MINUS_EXPR)
2838         aff_combination_scale (&tmp, -1);
2839       aff_combination_add (comb, &tmp);
2840       return;
2841
2842     case MULT_EXPR:
2843       cst = TREE_OPERAND (expr, 1);
2844       if (TREE_CODE (cst) != INTEGER_CST)
2845         break;
2846       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2847       aff_combination_scale (comb, int_cst_value (cst));
2848       return;
2849
2850     case NEGATE_EXPR:
2851       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2852       aff_combination_scale (comb, -1);
2853       return;
2854
2855     case ADDR_EXPR:
2856       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2857                                   &toffset, &mode, &unsignedp, &volatilep,
2858                                   false);
2859       if (bitpos % BITS_PER_UNIT != 0)
2860         break;
2861       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2862       core = build_fold_addr_expr (core);
2863       if (TREE_CODE (core) == ADDR_EXPR)
2864         aff_combination_add_elt (comb, core, 1);
2865       else
2866         {
2867           tree_to_aff_combination (core, type, &tmp);
2868           aff_combination_add (comb, &tmp);
2869         }
2870       if (toffset)
2871         {
2872           tree_to_aff_combination (toffset, type, &tmp);
2873           aff_combination_add (comb, &tmp);
2874         }
2875       return;
2876
2877     default:
2878       break;
2879     }
2880
2881   aff_combination_elt (comb, type, expr);
2882 }
2883
2884 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2885
2886 static tree
2887 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2888                  unsigned HOST_WIDE_INT mask)
2889 {
2890   enum tree_code code;
2891
2892   scale &= mask;
2893   elt = fold_convert (type, elt);
2894
2895   if (scale == 1)
2896     {
2897       if (!expr)
2898         return elt;
2899
2900       return fold_build2 (PLUS_EXPR, type, expr, elt);
2901     }
2902
2903   if (scale == mask)
2904     {
2905       if (!expr)
2906         return fold_build1 (NEGATE_EXPR, type, elt);
2907
2908       return fold_build2 (MINUS_EXPR, type, expr, elt);
2909     }
2910
2911   if (!expr)
2912     return fold_build2 (MULT_EXPR, type, elt,
2913                         build_int_cst_type (type, scale));
2914
2915   if ((scale | (mask >> 1)) == mask)
2916     {
2917       /* Scale is negative.  */
2918       code = MINUS_EXPR;
2919       scale = (-scale) & mask;
2920     }
2921   else
2922     code = PLUS_EXPR;
2923
2924   elt = fold_build2 (MULT_EXPR, type, elt,
2925                      build_int_cst_type (type, scale));
2926   return fold_build2 (code, type, expr, elt);
2927 }
2928
2929 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2930
2931 static void
2932 unshare_aff_combination (struct affine_tree_combination *comb)
2933 {
2934   unsigned i;
2935
2936   for (i = 0; i < comb->n; i++)
2937     comb->elts[i] = unshare_expr (comb->elts[i]);
2938   if (comb->rest)
2939     comb->rest = unshare_expr (comb->rest);
2940 }
2941
2942 /* Makes tree from the affine combination COMB.  */
2943
2944 static tree
2945 aff_combination_to_tree (struct affine_tree_combination *comb)
2946 {
2947   tree type = comb->type;
2948   tree expr = comb->rest;
2949   unsigned i;
2950   unsigned HOST_WIDE_INT off, sgn;
2951
2952   /* Handle the special case produced by get_computation_aff when
2953      the type does not fit in HOST_WIDE_INT.  */
2954   if (comb->n == 0 && comb->offset == 0)
2955     return fold_convert (type, expr);
2956
2957   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2958
2959   for (i = 0; i < comb->n; i++)
2960     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2961                             comb->mask);
2962
2963   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2964     {
2965       /* Offset is negative.  */
2966       off = (-comb->offset) & comb->mask;
2967       sgn = comb->mask;
2968     }
2969   else
2970     {
2971       off = comb->offset;
2972       sgn = 1;
2973     }
2974   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2975                           comb->mask);
2976 }
2977
2978 /* Determines the expression by that USE is expressed from induction variable
2979    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2980    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2981
2982 static bool
2983 get_computation_aff (struct loop *loop,
2984                      struct iv_use *use, struct iv_cand *cand, tree at,
2985                      struct affine_tree_combination *aff)
2986 {
2987   tree ubase = use->iv->base;
2988   tree ustep = use->iv->step;
2989   tree cbase = cand->iv->base;
2990   tree cstep = cand->iv->step;
2991   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2992   tree uutype;
2993   tree expr, delta;
2994   tree ratio;
2995   unsigned HOST_WIDE_INT ustepi, cstepi;
2996   HOST_WIDE_INT ratioi;
2997   struct affine_tree_combination cbase_aff, expr_aff;
2998   tree cstep_orig = cstep, ustep_orig = ustep;
2999
3000   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3001     {
3002       /* We do not have a precision to express the values of use.  */
3003       return false;
3004     }
3005
3006   expr = var_at_stmt (loop, cand, at);
3007
3008   if (TREE_TYPE (expr) != ctype)
3009     {
3010       /* This may happen with the original ivs.  */
3011       expr = fold_convert (ctype, expr);
3012     }
3013
3014   if (TYPE_UNSIGNED (utype))
3015     uutype = utype;
3016   else
3017     {
3018       uutype = unsigned_type_for (utype);
3019       ubase = fold_convert (uutype, ubase);
3020       ustep = fold_convert (uutype, ustep);
3021     }
3022
3023   if (uutype != ctype)
3024     {
3025       expr = fold_convert (uutype, expr);
3026       cbase = fold_convert (uutype, cbase);
3027       cstep = fold_convert (uutype, cstep);
3028
3029       /* If the conversion is not noop, we must take it into account when
3030          considering the value of the step.  */
3031       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3032         cstep_orig = cstep;
3033     }
3034
3035   if (cst_and_fits_in_hwi (cstep_orig)
3036       && cst_and_fits_in_hwi (ustep_orig))
3037     {
3038       ustepi = int_cst_value (ustep_orig);
3039       cstepi = int_cst_value (cstep_orig);
3040
3041       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3042         {
3043           /* TODO maybe consider case when ustep divides cstep and the ratio is
3044              a power of 2 (so that the division is fast to execute)?  We would
3045              need to be much more careful with overflows etc. then.  */
3046           return false;
3047         }
3048
3049       ratio = build_int_cst_type (uutype, ratioi);
3050     }
3051   else
3052     {
3053       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3054       if (!ratio)
3055         return false;
3056
3057       /* Ratioi is only used to detect special cases when the multiplicative
3058          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3059          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3060          to integer_onep/integer_all_onesp, since the former ignores
3061          TREE_OVERFLOW.  */
3062       if (cst_and_fits_in_hwi (ratio))
3063         ratioi = int_cst_value (ratio);
3064       else if (integer_onep (ratio))
3065         ratioi = 1;
3066       else if (integer_all_onesp (ratio))
3067         ratioi = -1;
3068       else
3069         ratioi = 0;
3070     }
3071
3072   /* We may need to shift the value if we are after the increment.  */
3073   if (stmt_after_increment (loop, cand, at))
3074     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3075
3076   /* use = ubase - ratio * cbase + ratio * var.
3077
3078      In general case ubase + ratio * (var - cbase) could be better (one less
3079      multiplication), but often it is possible to eliminate redundant parts
3080      of computations from (ubase - ratio * cbase) term, and if it does not
3081      happen, fold is able to apply the distributive law to obtain this form
3082      anyway.  */
3083
3084   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3085     {
3086       /* Let's compute in trees and just return the result in AFF.  This case
3087          should not be very common, and fold itself is not that bad either,
3088          so making the aff. functions more complicated to handle this case
3089          is not that urgent.  */
3090       if (ratioi == 1)
3091         {
3092           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3093           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3094         }
3095       else if (ratioi == -1)
3096         {
3097           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3098           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3099         }
3100       else
3101         {
3102           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3103           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3104           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3105           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3106         }
3107
3108       aff->type = uutype;
3109       aff->n = 0;
3110       aff->offset = 0;
3111       aff->mask = 0;
3112       aff->rest = expr;
3113       return true;
3114     }
3115
3116   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3117      possible to compute ratioi.  */
3118   gcc_assert (ratioi);
3119
3120   tree_to_aff_combination (ubase, uutype, aff);
3121   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3122   tree_to_aff_combination (expr, uutype, &expr_aff);
3123   aff_combination_scale (&cbase_aff, -ratioi);
3124   aff_combination_scale (&expr_aff, ratioi);
3125   aff_combination_add (aff, &cbase_aff);
3126   aff_combination_add (aff, &expr_aff);
3127
3128   return true;
3129 }
3130
3131 /* Determines the expression by that USE is expressed from induction variable
3132    CAND at statement AT in LOOP.  The computation is unshared.  */
3133
3134 static tree
3135 get_computation_at (struct loop *loop,
3136                     struct iv_use *use, struct iv_cand *cand, tree at)
3137 {
3138   struct affine_tree_combination aff;
3139   tree type = TREE_TYPE (use->iv->base);
3140
3141   if (!get_computation_aff (loop, use, cand, at, &aff))
3142     return NULL_TREE;
3143   unshare_aff_combination (&aff);
3144   return fold_convert (type, aff_combination_to_tree (&aff));
3145 }
3146
3147 /* Determines the expression by that USE is expressed from induction variable
3148    CAND in LOOP.  The computation is unshared.  */
3149
3150 static tree
3151 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3152 {
3153   return get_computation_at (loop, use, cand, use->stmt);
3154 }
3155
3156 /* Returns cost of addition in MODE.  */
3157
3158 static unsigned
3159 add_cost (enum machine_mode mode)
3160 {
3161   static unsigned costs[NUM_MACHINE_MODES];
3162   rtx seq;
3163   unsigned cost;
3164
3165   if (costs[mode])
3166     return costs[mode];
3167
3168   start_sequence ();
3169   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3170                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3171                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3172                  NULL_RTX);
3173   seq = get_insns ();
3174   end_sequence ();
3175
3176   cost = seq_cost (seq);
3177   if (!cost)
3178     cost = 1;
3179
3180   costs[mode] = cost;
3181
3182   if (dump_file && (dump_flags & TDF_DETAILS))
3183     fprintf (dump_file, "Addition in %s costs %d\n",
3184              GET_MODE_NAME (mode), cost);
3185   return cost;
3186 }
3187
3188 /* Entry in a hashtable of already known costs for multiplication.  */
3189 struct mbc_entry
3190 {
3191   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3192   enum machine_mode mode;       /* In mode.  */
3193   unsigned cost;                /* The cost.  */
3194 };
3195
3196 /* Counts hash value for the ENTRY.  */
3197
3198 static hashval_t
3199 mbc_entry_hash (const void *entry)
3200 {
3201   const struct mbc_entry *e = entry;
3202
3203   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3204 }
3205
3206 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3207
3208 static int
3209 mbc_entry_eq (const void *entry1, const void *entry2)
3210 {
3211   const struct mbc_entry *e1 = entry1;
3212   const struct mbc_entry *e2 = entry2;
3213
3214   return (e1->mode == e2->mode
3215           && e1->cst == e2->cst);
3216 }
3217
3218 /* Returns cost of multiplication by constant CST in MODE.  */
3219
3220 unsigned
3221 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3222 {
3223   static htab_t costs;
3224   struct mbc_entry **cached, act;
3225   rtx seq;
3226   unsigned cost;
3227
3228   if (!costs)
3229     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3230
3231   act.mode = mode;
3232   act.cst = cst;
3233   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3234   if (*cached)
3235     return (*cached)->cost;
3236
3237   *cached = xmalloc (sizeof (struct mbc_entry));
3238   (*cached)->mode = mode;
3239   (*cached)->cst = cst;
3240
3241   start_sequence ();
3242   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3243                gen_int_mode (cst, mode), NULL_RTX, 0);
3244   seq = get_insns ();
3245   end_sequence ();
3246
3247   cost = seq_cost (seq);
3248
3249   if (dump_file && (dump_flags & TDF_DETAILS))
3250     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3251              (int) cst, GET_MODE_NAME (mode), cost);
3252
3253   (*cached)->cost = cost;
3254
3255   return cost;
3256 }
3257
3258 /* Returns true if multiplying by RATIO is allowed in address.  */
3259
3260 bool
3261 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3262 {
3263 #define MAX_RATIO 128
3264   static sbitmap valid_mult;
3265
3266   if (!valid_mult)
3267     {
3268       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3269       rtx addr;
3270       HOST_WIDE_INT i;
3271
3272       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3273       sbitmap_zero (valid_mult);
3274       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3275       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3276         {
3277           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3278           if (memory_address_p (Pmode, addr))
3279             SET_BIT (valid_mult, i + MAX_RATIO);
3280         }
3281
3282       if (dump_file && (dump_flags & TDF_DETAILS))
3283         {
3284           fprintf (dump_file, "  allowed multipliers:");
3285           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3286             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3287               fprintf (dump_file, " %d", (int) i);
3288           fprintf (dump_file, "\n");
3289           fprintf (dump_file, "\n");
3290         }
3291     }
3292
3293   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3294     return false;
3295
3296   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3297 }
3298
3299 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3300    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3301    variable is omitted.  The created memory accesses MODE.
3302
3303    TODO -- there must be some better way.  This all is quite crude.  */
3304
3305 static unsigned
3306 get_address_cost (bool symbol_present, bool var_present,
3307                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3308 {
3309   static bool initialized = false;
3310   static HOST_WIDE_INT rat, off;
3311   static HOST_WIDE_INT min_offset, max_offset;
3312   static unsigned costs[2][2][2][2];
3313   unsigned cost, acost;
3314   rtx seq, addr, base;
3315   bool offset_p, ratio_p;
3316   rtx reg1;
3317   HOST_WIDE_INT s_offset;
3318   unsigned HOST_WIDE_INT mask;
3319   unsigned bits;
3320
3321   if (!initialized)
3322     {
3323       HOST_WIDE_INT i;
3324       initialized = true;
3325
3326       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3327
3328       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3329       for (i = 1; i <= 1 << 20; i <<= 1)
3330         {
3331           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3332           if (!memory_address_p (Pmode, addr))
3333             break;
3334         }
3335       max_offset = i >> 1;
3336       off = max_offset;
3337
3338       for (i = 1; i <= 1 << 20; i <<= 1)
3339         {
3340           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3341           if (!memory_address_p (Pmode, addr))
3342             break;
3343         }
3344       min_offset = -(i >> 1);
3345
3346       if (dump_file && (dump_flags & TDF_DETAILS))
3347         {
3348           fprintf (dump_file, "get_address_cost:\n");
3349           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3350           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3351         }
3352
3353       rat = 1;
3354       for (i = 2; i <= MAX_RATIO; i++)
3355         if (multiplier_allowed_in_address_p (i))
3356           {
3357             rat = i;
3358             break;
3359           }
3360     }
3361
3362   bits = GET_MODE_BITSIZE (Pmode);
3363   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3364   offset &= mask;
3365   if ((offset >> (bits - 1) & 1))
3366     offset |= ~mask;
3367   s_offset = offset;
3368
3369   cost = 0;
3370   offset_p = (s_offset != 0
3371               && min_offset <= s_offset && s_offset <= max_offset);
3372   ratio_p = (ratio != 1
3373              && multiplier_allowed_in_address_p (ratio));
3374
3375   if (ratio != 1 && !ratio_p)
3376     cost += multiply_by_cost (ratio, Pmode);
3377
3378   if (s_offset && !offset_p && !symbol_present)
3379     {
3380       cost += add_cost (Pmode);
3381       var_present = true;
3382     }
3383
3384   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3385   if (!acost)
3386     {
3387       int old_cse_not_expected;
3388       acost = 0;
3389
3390       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3391       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3392       if (ratio_p)
3393         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3394
3395       if (var_present)
3396         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3397
3398       if (symbol_present)
3399         {
3400           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3401           if (offset_p)
3402             base = gen_rtx_fmt_e (CONST, Pmode,
3403                                   gen_rtx_fmt_ee (PLUS, Pmode,
3404                                                   base,
3405                                                   gen_int_mode (off, Pmode)));
3406         }
3407       else if (offset_p)
3408         base = gen_int_mode (off, Pmode);
3409       else
3410         base = NULL_RTX;
3411
3412       if (base)
3413         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3414
3415       start_sequence ();
3416       /* To avoid splitting addressing modes, pretend that no cse will
3417          follow.  */
3418       old_cse_not_expected = cse_not_expected;
3419       cse_not_expected = true;
3420       addr = memory_address (Pmode, addr);
3421       cse_not_expected = old_cse_not_expected;
3422       seq = get_insns ();
3423       end_sequence ();
3424
3425       acost = seq_cost (seq);
3426       acost += address_cost (addr, Pmode);
3427
3428       if (!acost)
3429         acost = 1;
3430       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3431     }
3432
3433   return cost + acost;
3434 }
3435
3436 /* Estimates cost of forcing expression EXPR into a variable.  */
3437
3438 unsigned
3439 force_expr_to_var_cost (tree expr)
3440 {
3441   static bool costs_initialized = false;
3442   static unsigned integer_cost;
3443   static unsigned symbol_cost;
3444   static unsigned address_cost;
3445   tree op0, op1;
3446   unsigned cost0, cost1, cost;
3447   enum machine_mode mode;
3448
3449   if (!costs_initialized)
3450     {
3451       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3452       rtx x = gen_rtx_MEM (DECL_MODE (var),
3453                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3454       tree addr;
3455       tree type = build_pointer_type (integer_type_node);
3456
3457       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3458                                                            2000));
3459
3460       SET_DECL_RTL (var, x);
3461       TREE_STATIC (var) = 1;
3462       addr = build1 (ADDR_EXPR, type, var);
3463       symbol_cost = computation_cost (addr) + 1;
3464
3465       address_cost
3466         = computation_cost (build2 (PLUS_EXPR, type,
3467                                     addr,
3468                                     build_int_cst_type (type, 2000))) + 1;
3469       if (dump_file && (dump_flags & TDF_DETAILS))
3470         {
3471           fprintf (dump_file, "force_expr_to_var_cost:\n");
3472           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3473           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3474           fprintf (dump_file, "  address %d\n", (int) address_cost);
3475           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3476           fprintf (dump_file, "\n");
3477         }
3478
3479       costs_initialized = true;
3480     }
3481
3482   STRIP_NOPS (expr);
3483
3484   if (SSA_VAR_P (expr))
3485     return 0;
3486
3487   if (TREE_INVARIANT (expr))
3488     {
3489       if (TREE_CODE (expr) == INTEGER_CST)
3490         return integer_cost;
3491
3492       if (TREE_CODE (expr) == ADDR_EXPR)
3493         {
3494           tree obj = TREE_OPERAND (expr, 0);
3495
3496           if (TREE_CODE (obj) == VAR_DECL
3497               || TREE_CODE (obj) == PARM_DECL
3498               || TREE_CODE (obj) == RESULT_DECL)
3499             return symbol_cost;
3500         }
3501
3502       return address_cost;
3503     }
3504
3505   switch (TREE_CODE (expr))
3506     {
3507     case PLUS_EXPR:
3508     case MINUS_EXPR:
3509     case MULT_EXPR:
3510       op0 = TREE_OPERAND (expr, 0);
3511       op1 = TREE_OPERAND (expr, 1);
3512       STRIP_NOPS (op0);
3513       STRIP_NOPS (op1);
3514
3515       if (is_gimple_val (op0))
3516         cost0 = 0;
3517       else
3518         cost0 = force_expr_to_var_cost (op0);
3519
3520       if (is_gimple_val (op1))
3521         cost1 = 0;
3522       else
3523         cost1 = force_expr_to_var_cost (op1);
3524
3525       break;
3526
3527     default:
3528       /* Just an arbitrary value, FIXME.  */
3529       return target_spill_cost;
3530     }
3531
3532   mode = TYPE_MODE (TREE_TYPE (expr));
3533   switch (TREE_CODE (expr))
3534     {
3535     case PLUS_EXPR:
3536     case MINUS_EXPR:
3537       cost = add_cost (mode);
3538       break;
3539
3540     case MULT_EXPR:
3541       if (cst_and_fits_in_hwi (op0))
3542         cost = multiply_by_cost (int_cst_value (op0), mode);
3543       else if (cst_and_fits_in_hwi (op1))
3544         cost = multiply_by_cost (int_cst_value (op1), mode);
3545       else
3546         return target_spill_cost;
3547       break;
3548
3549     default:
3550       gcc_unreachable ();
3551     }
3552
3553   cost += cost0;
3554   cost += cost1;
3555
3556   /* Bound the cost by target_spill_cost.  The parts of complicated
3557      computations often are either loop invariant or at least can
3558      be shared between several iv uses, so letting this grow without
3559      limits would not give reasonable results.  */
3560   return cost < target_spill_cost ? cost : target_spill_cost;
3561 }
3562
3563 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3564    invariants the computation depends on.  */
3565
3566 static unsigned
3567 force_var_cost (struct ivopts_data *data,
3568                 tree expr, bitmap *depends_on)
3569 {
3570   if (depends_on)
3571     {
3572       fd_ivopts_data = data;
3573       walk_tree (&expr, find_depends, depends_on, NULL);
3574     }
3575
3576   return force_expr_to_var_cost (expr);
3577 }
3578
3579 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3580    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3581    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3582    invariants the computation depends on.  */
3583
3584 static unsigned
3585 split_address_cost (struct ivopts_data *data,
3586                     tree addr, bool *symbol_present, bool *var_present,
3587                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3588 {
3589   tree core;
3590   HOST_WIDE_INT bitsize;
3591   HOST_WIDE_INT bitpos;
3592   tree toffset;
3593   enum machine_mode mode;
3594   int unsignedp, volatilep;
3595
3596   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3597                               &unsignedp, &volatilep, false);
3598
3599   if (toffset != 0
3600       || bitpos % BITS_PER_UNIT != 0
3601       || TREE_CODE (core) != VAR_DECL)
3602     {
3603       *symbol_present = false;
3604       *var_present = true;
3605       fd_ivopts_data = data;
3606       walk_tree (&addr, find_depends, depends_on, NULL);
3607       return target_spill_cost;
3608     }
3609
3610   *offset += bitpos / BITS_PER_UNIT;
3611   if (TREE_STATIC (core)
3612       || DECL_EXTERNAL (core))
3613     {
3614       *symbol_present = true;
3615       *var_present = false;
3616       return 0;
3617     }
3618
3619   *symbol_present = false;
3620   *var_present = true;
3621   return 0;
3622 }
3623
3624 /* Estimates cost of expressing difference of addresses E1 - E2 as
3625    var + symbol + offset.  The value of offset is added to OFFSET,
3626    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3627    part is missing.  DEPENDS_ON is a set of the invariants the computation
3628    depends on.  */
3629
3630 static unsigned
3631 ptr_difference_cost (struct ivopts_data *data,
3632                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3633                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3634 {
3635   HOST_WIDE_INT diff = 0;
3636   unsigned cost;
3637
3638   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3639
3640   if (ptr_difference_const (e1, e2, &diff))
3641     {
3642       *offset += diff;
3643       *symbol_present = false;
3644       *var_present = false;
3645       return 0;
3646     }
3647
3648   if (e2 == integer_zero_node)
3649     return split_address_cost (data, TREE_OPERAND (e1, 0),
3650                                symbol_present, var_present, offset, depends_on);
3651
3652   *symbol_present = false;
3653   *var_present = true;
3654
3655   cost = force_var_cost (data, e1, depends_on);
3656   cost += force_var_cost (data, e2, depends_on);
3657   cost += add_cost (Pmode);
3658
3659   return cost;
3660 }
3661
3662 /* Estimates cost of expressing difference E1 - E2 as
3663    var + symbol + offset.  The value of offset is added to OFFSET,
3664    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3665    part is missing.  DEPENDS_ON is a set of the invariants the computation
3666    depends on.  */
3667
3668 static unsigned
3669 difference_cost (struct ivopts_data *data,
3670                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3671                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3672 {
3673   unsigned cost;
3674   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3675   unsigned HOST_WIDE_INT off1, off2;
3676
3677   e1 = strip_offset (e1, &off1);
3678   e2 = strip_offset (e2, &off2);
3679   *offset += off1 - off2;
3680
3681   STRIP_NOPS (e1);
3682   STRIP_NOPS (e2);
3683
3684   if (TREE_CODE (e1) == ADDR_EXPR)
3685     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3686                                 depends_on);
3687   *symbol_present = false;
3688
3689   if (operand_equal_p (e1, e2, 0))
3690     {
3691       *var_present = false;
3692       return 0;
3693     }
3694   *var_present = true;
3695   if (zero_p (e2))
3696     return force_var_cost (data, e1, depends_on);
3697
3698   if (zero_p (e1))
3699     {
3700       cost = force_var_cost (data, e2, depends_on);
3701       cost += multiply_by_cost (-1, mode);
3702
3703       return cost;
3704     }
3705
3706   cost = force_var_cost (data, e1, depends_on);
3707   cost += force_var_cost (data, e2, depends_on);
3708   cost += add_cost (mode);
3709
3710   return cost;
3711 }
3712
3713 /* Determines the cost of the computation by that USE is expressed
3714    from induction variable CAND.  If ADDRESS_P is true, we just need
3715    to create an address from it, otherwise we want to get it into
3716    register.  A set of invariants we depend on is stored in
3717    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3718
3719 static unsigned
3720 get_computation_cost_at (struct ivopts_data *data,
3721                          struct iv_use *use, struct iv_cand *cand,
3722                          bool address_p, bitmap *depends_on, tree at)
3723 {
3724   tree ubase = use->iv->base, ustep = use->iv->step;
3725   tree cbase, cstep;
3726   tree utype = TREE_TYPE (ubase), ctype;
3727   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3728   HOST_WIDE_INT ratio, aratio;
3729   bool var_present, symbol_present;
3730   unsigned cost = 0, n_sums;
3731
3732   *depends_on = NULL;
3733
3734   /* Only consider real candidates.  */
3735   if (!cand->iv)
3736     return INFTY;
3737
3738   cbase = cand->iv->base;
3739   cstep = cand->iv->step;
3740   ctype = TREE_TYPE (cbase);
3741
3742   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3743     {
3744       /* We do not have a precision to express the values of use.  */
3745       return INFTY;
3746     }
3747
3748   if (address_p)
3749     {
3750       /* Do not try to express address of an object with computation based
3751          on address of a different object.  This may cause problems in rtl
3752          level alias analysis (that does not expect this to be happening,
3753          as this is illegal in C), and would be unlikely to be useful
3754          anyway.  */
3755       if (use->iv->base_object
3756           && cand->iv->base_object
3757           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3758         return INFTY;
3759     }
3760
3761   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3762     {
3763       /* TODO -- add direct handling of this case.  */
3764       goto fallback;
3765     }
3766
3767   /* CSTEPI is removed from the offset in case statement is after the
3768      increment.  If the step is not constant, we use zero instead.
3769      This is a bit imprecise (there is the extra addition), but
3770      redundancy elimination is likely to transform the code so that
3771      it uses value of the variable before increment anyway,
3772      so it is not that much unrealistic.  */
3773   if (cst_and_fits_in_hwi (cstep))
3774     cstepi = int_cst_value (cstep);
3775   else
3776     cstepi = 0;
3777
3778   if (cst_and_fits_in_hwi (ustep)
3779       && cst_and_fits_in_hwi (cstep))
3780     {
3781       ustepi = int_cst_value (ustep);
3782
3783       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3784         return INFTY;
3785     }
3786   else
3787     {
3788       tree rat;
3789
3790       rat = constant_multiple_of (utype, ustep, cstep);
3791
3792       if (!rat)
3793         return INFTY;
3794
3795       if (cst_and_fits_in_hwi (rat))
3796         ratio = int_cst_value (rat);
3797       else if (integer_onep (rat))
3798         ratio = 1;
3799       else if (integer_all_onesp (rat))
3800         ratio = -1;
3801       else
3802         return INFTY;
3803     }
3804
3805   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3806      or ratio == 1, it is better to handle this like
3807
3808      ubase - ratio * cbase + ratio * var
3809
3810      (also holds in the case ratio == -1, TODO.  */
3811
3812   if (cst_and_fits_in_hwi (cbase))
3813     {
3814       offset = - ratio * int_cst_value (cbase);
3815       cost += difference_cost (data,
3816                                ubase, integer_zero_node,
3817                                &symbol_present, &var_present, &offset,
3818                                depends_on);
3819     }
3820   else if (ratio == 1)
3821     {
3822       cost += difference_cost (data,
3823                                ubase, cbase,
3824                                &symbol_present, &var_present, &offset,
3825                                depends_on);
3826     }
3827   else
3828     {
3829       cost += force_var_cost (data, cbase, depends_on);
3830       cost += add_cost (TYPE_MODE (ctype));
3831       cost += difference_cost (data,
3832                                ubase, integer_zero_node,
3833                                &symbol_present, &var_present, &offset,
3834                                depends_on);
3835     }
3836
3837   /* If we are after the increment, the value of the candidate is higher by
3838      one iteration.  */
3839   if (stmt_after_increment (data->current_loop, cand, at))
3840     offset -= ratio * cstepi;
3841
3842   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3843      (symbol/var/const parts may be omitted).  If we are looking for an address,
3844      find the cost of addressing this.  */
3845   if (address_p)
3846     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3847
3848   /* Otherwise estimate the costs for computing the expression.  */
3849   aratio = ratio > 0 ? ratio : -ratio;
3850   if (!symbol_present && !var_present && !offset)
3851     {
3852       if (ratio != 1)
3853         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3854
3855       return cost;
3856     }
3857
3858   if (aratio != 1)
3859     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3860
3861   n_sums = 1;
3862   if (var_present
3863       /* Symbol + offset should be compile-time computable.  */
3864       && (symbol_present || offset))
3865     n_sums++;
3866
3867   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3868
3869 fallback:
3870   {
3871     /* Just get the expression, expand it and measure the cost.  */
3872     tree comp = get_computation_at (data->current_loop, use, cand, at);
3873
3874     if (!comp)
3875       return INFTY;
3876
3877     if (address_p)
3878       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3879
3880     return computation_cost (comp);
3881   }
3882 }
3883
3884 /* Determines the cost of the computation by that USE is expressed
3885    from induction variable CAND.  If ADDRESS_P is true, we just need
3886    to create an address from it, otherwise we want to get it into
3887    register.  A set of invariants we depend on is stored in
3888    DEPENDS_ON.  */
3889
3890 static unsigned
3891 get_computation_cost (struct ivopts_data *data,
3892                       struct iv_use *use, struct iv_cand *cand,
3893                       bool address_p, bitmap *depends_on)
3894 {
3895   return get_computation_cost_at (data,
3896                                   use, cand, address_p, depends_on, use->stmt);
3897 }
3898
3899 /* Determines cost of basing replacement of USE on CAND in a generic
3900    expression.  */
3901
3902 static bool
3903 determine_use_iv_cost_generic (struct ivopts_data *data,
3904                                struct iv_use *use, struct iv_cand *cand)
3905 {
3906   bitmap depends_on;
3907   unsigned cost;
3908
3909   /* The simple case first -- if we need to express value of the preserved
3910      original biv, the cost is 0.  This also prevents us from counting the
3911      cost of increment twice -- once at this use and once in the cost of
3912      the candidate.  */
3913   if (cand->pos == IP_ORIGINAL
3914       && cand->incremented_at == use->stmt)
3915     {
3916       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3917       return true;
3918     }
3919
3920   cost = get_computation_cost (data, use, cand, false, &depends_on);
3921   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3922
3923   return cost != INFTY;
3924 }
3925
3926 /* Determines cost of basing replacement of USE on CAND in an address.  */
3927
3928 static bool
3929 determine_use_iv_cost_address (struct ivopts_data *data,
3930                                struct iv_use *use, struct iv_cand *cand)
3931 {
3932   bitmap depends_on;
3933   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3934
3935   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3936
3937   return cost != INFTY;
3938 }
3939
3940 /* Computes value of induction variable IV in iteration NITER.  */
3941
3942 static tree
3943 iv_value (struct iv *iv, tree niter)
3944 {
3945   tree val;
3946   tree type = TREE_TYPE (iv->base);
3947
3948   niter = fold_convert (type, niter);
3949   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3950
3951   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3952 }
3953
3954 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3955
3956 static tree
3957 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3958 {
3959   tree val = iv_value (cand->iv, niter);
3960   tree type = TREE_TYPE (cand->iv->base);
3961
3962   if (stmt_after_increment (loop, cand, at))
3963     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3964
3965   return val;
3966 }
3967
3968 /* Returns period of induction variable iv.  */
3969
3970 static tree
3971 iv_period (struct iv *iv)
3972 {
3973   tree step = iv->step, period, type;
3974   tree pow2div;
3975
3976   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3977
3978   /* Period of the iv is gcd (step, type range).  Since type range is power
3979      of two, it suffices to determine the maximum power of two that divides
3980      step.  */
3981   pow2div = num_ending_zeros (step);
3982   type = unsigned_type_for (TREE_TYPE (step));
3983
3984   period = build_low_bits_mask (type,
3985                                 (TYPE_PRECISION (type)
3986                                  - tree_low_cst (pow2div, 1)));
3987
3988   return period;
3989 }
3990
3991 /* Returns the comparison operator used when eliminating the iv USE.  */
3992
3993 static enum tree_code
3994 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3995 {
3996   struct loop *loop = data->current_loop;
3997   basic_block ex_bb;
3998   edge exit;
3999
4000   ex_bb = bb_for_stmt (use->stmt);
4001   exit = EDGE_SUCC (ex_bb, 0);
4002   if (flow_bb_inside_loop_p (loop, exit->dest))
4003     exit = EDGE_SUCC (ex_bb, 1);
4004
4005   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4006 }
4007
4008 /* Check whether it is possible to express the condition in USE by comparison
4009    of candidate CAND.  If so, store the value compared with to BOUND.  */
4010
4011 static bool
4012 may_eliminate_iv (struct ivopts_data *data,
4013                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4014 {
4015   basic_block ex_bb;
4016   edge exit;
4017   struct tree_niter_desc *niter;
4018   tree nit, nit_type;
4019   tree wider_type, period, per_type;
4020   struct loop *loop = data->current_loop;
4021
4022   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4023     return false;
4024
4025   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4026      for other conditions inside loop body.  */
4027   ex_bb = bb_for_stmt (use->stmt);
4028   if (use->stmt != last_stmt (ex_bb)
4029       || TREE_CODE (use->stmt) != COND_EXPR)
4030     return false;
4031   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4032     return false;
4033
4034   exit = EDGE_SUCC (ex_bb, 0);
4035   if (flow_bb_inside_loop_p (loop, exit->dest))
4036     exit = EDGE_SUCC (ex_bb, 1);
4037   if (flow_bb_inside_loop_p (loop, exit->dest))
4038     return false;
4039
4040   niter = niter_for_exit (data, exit);
4041   if (!niter
4042       || !zero_p (niter->may_be_zero))
4043     return false;
4044
4045   nit = niter->niter;
4046   nit_type = TREE_TYPE (nit);
4047
4048   /* Determine whether we may use the variable to test whether niter iterations
4049      elapsed.  This is the case iff the period of the induction variable is
4050      greater than the number of iterations.  */
4051   period = iv_period (cand->iv);
4052   if (!period)
4053     return false;
4054   per_type = TREE_TYPE (period);
4055
4056   wider_type = TREE_TYPE (period);
4057   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4058     wider_type = per_type;
4059   else
4060     wider_type = nit_type;
4061
4062   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4063                                       fold_convert (wider_type, period),
4064                                       fold_convert (wider_type, nit))))
4065     return false;
4066
4067   *bound = cand_value_at (loop, cand, use->stmt, nit);
4068   return true;
4069 }
4070
4071 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4072
4073 static bool
4074 determine_use_iv_cost_condition (struct ivopts_data *data,
4075                                  struct iv_use *use, struct iv_cand *cand)
4076 {
4077   tree bound = NULL_TREE, op, cond;
4078   bitmap depends_on = NULL;
4079   unsigned cost;
4080
4081   /* Only consider real candidates.  */
4082   if (!cand->iv)
4083     {
4084       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4085       return false;
4086     }
4087
4088   if (may_eliminate_iv (data, use, cand, &bound))
4089     {
4090       cost = force_var_cost (data, bound, &depends_on);
4091
4092       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4093       return cost != INFTY;
4094     }
4095
4096   /* The induction variable elimination failed; just express the original
4097      giv.  If it is compared with an invariant, note that we cannot get
4098      rid of it.  */
4099   cost = get_computation_cost (data, use, cand, false, &depends_on);
4100
4101   cond = *use->op_p;
4102   if (TREE_CODE (cond) != SSA_NAME)
4103     {
4104       op = TREE_OPERAND (cond, 0);
4105       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4106         op = TREE_OPERAND (cond, 1);
4107       if (TREE_CODE (op) == SSA_NAME)
4108         {
4109           op = get_iv (data, op)->base;
4110           fd_ivopts_data = data;
4111           walk_tree (&op, find_depends, &depends_on, NULL);
4112         }
4113     }
4114
4115   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4116   return cost != INFTY;
4117 }
4118
4119 /* Checks whether it is possible to replace the final value of USE by
4120    a direct computation.  If so, the formula is stored to *VALUE.  */
4121
4122 static bool
4123 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4124                          tree *value)
4125 {
4126   struct loop *loop = data->current_loop;
4127   edge exit;
4128   struct tree_niter_desc *niter;
4129
4130   exit = single_dom_exit (loop);
4131   if (!exit)
4132     return false;
4133
4134   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4135                               bb_for_stmt (use->stmt)));
4136
4137   niter = niter_for_single_dom_exit (data);
4138   if (!niter
4139       || !zero_p (niter->may_be_zero))
4140     return false;
4141
4142   *value = iv_value (use->iv, niter->niter);
4143
4144   return true;
4145 }
4146
4147 /* Determines cost of replacing final value of USE using CAND.  */
4148
4149 static bool
4150 determine_use_iv_cost_outer (struct ivopts_data *data,
4151                              struct iv_use *use, struct iv_cand *cand)
4152 {
4153   bitmap depends_on;
4154   unsigned cost;
4155   edge exit;
4156   tree value = NULL_TREE;
4157   struct loop *loop = data->current_loop;
4158
4159   /* The simple case first -- if we need to express value of the preserved
4160      original biv, the cost is 0.  This also prevents us from counting the
4161      cost of increment twice -- once at this use and once in the cost of
4162      the candidate.  */
4163   if (cand->pos == IP_ORIGINAL
4164       && cand->incremented_at == use->stmt)
4165     {
4166       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4167       return true;
4168     }
4169
4170   if (!cand->iv)
4171     {
4172       if (!may_replace_final_value (data, use, &value))
4173         {
4174           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4175           return false;
4176         }
4177
4178       depends_on = NULL;
4179       cost = force_var_cost (data, value, &depends_on);
4180
4181       cost /= AVG_LOOP_NITER (loop);
4182
4183       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4184       return cost != INFTY;
4185     }
4186
4187   exit = single_dom_exit (loop);
4188   if (exit)
4189     {
4190       /* If there is just a single exit, we may use value of the candidate
4191          after we take it to determine the value of use.  */
4192       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4193                                       last_stmt (exit->src));
4194       if (cost != INFTY)
4195         cost /= AVG_LOOP_NITER (loop);
4196     }
4197   else
4198     {
4199       /* Otherwise we just need to compute the iv.  */
4200       cost = get_computation_cost (data, use, cand, false, &depends_on);
4201     }
4202
4203   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4204
4205   return cost != INFTY;
4206 }
4207
4208 /* Determines cost of basing replacement of USE on CAND.  Returns false
4209    if USE cannot be based on CAND.  */
4210
4211 static bool
4212 determine_use_iv_cost (struct ivopts_data *data,
4213                        struct iv_use *use, struct iv_cand *cand)
4214 {
4215   switch (use->type)
4216     {
4217     case USE_NONLINEAR_EXPR:
4218       return determine_use_iv_cost_generic (data, use, cand);
4219
4220     case USE_OUTER:
4221       return determine_use_iv_cost_outer (data, use, cand);
4222
4223     case USE_ADDRESS:
4224       return determine_use_iv_cost_address (data, use, cand);
4225
4226     case USE_COMPARE:
4227       return determine_use_iv_cost_condition (data, use, cand);
4228
4229     default:
4230       gcc_unreachable ();
4231     }
4232 }
4233
4234 /* Determines costs of basing the use of the iv on an iv candidate.  */
4235
4236 static void
4237 determine_use_iv_costs (struct ivopts_data *data)
4238 {
4239   unsigned i, j;
4240   struct iv_use *use;
4241   struct iv_cand *cand;
4242   bitmap to_clear = BITMAP_ALLOC (NULL);
4243
4244   alloc_use_cost_map (data);
4245
4246   for (i = 0; i < n_iv_uses (data); i++)
4247     {
4248       use = iv_use (data, i);
4249
4250       if (data->consider_all_candidates)
4251         {
4252           for (j = 0; j < n_iv_cands (data); j++)
4253             {
4254               cand = iv_cand (data, j);
4255               determine_use_iv_cost (data, use, cand);
4256             }
4257         }
4258       else
4259         {
4260           bitmap_iterator bi;
4261
4262           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4263             {
4264               cand = iv_cand (data, j);
4265               if (!determine_use_iv_cost (data, use, cand))
4266                 bitmap_set_bit (to_clear, j);
4267             }
4268
4269           /* Remove the candidates for that the cost is infinite from
4270              the list of related candidates.  */
4271           bitmap_and_compl_into (use->related_cands, to_clear);
4272           bitmap_clear (to_clear);
4273         }
4274     }
4275
4276   BITMAP_FREE (to_clear);
4277
4278   if (dump_file && (dump_flags & TDF_DETAILS))
4279     {
4280       fprintf (dump_file, "Use-candidate costs:\n");
4281
4282       for (i = 0; i < n_iv_uses (data); i++)
4283         {
4284           use = iv_use (data, i);
4285
4286           fprintf (dump_file, "Use %d:\n", i);
4287           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4288           for (j = 0; j < use->n_map_members; j++)
4289             {
4290               if (!use->cost_map[j].cand
4291                   || use->cost_map[j].cost == INFTY)
4292                 continue;
4293
4294               fprintf (dump_file, "  %d\t%d\t",
4295                        use->cost_map[j].cand->id,
4296                        use->cost_map[j].cost);
4297               if (use->cost_map[j].depends_on)
4298                 bitmap_print (dump_file,
4299                               use->cost_map[j].depends_on, "","");
4300               fprintf (dump_file, "\n");
4301             }
4302
4303           fprintf (dump_file, "\n");
4304         }
4305       fprintf (dump_file, "\n");
4306     }
4307 }
4308
4309 /* Determines cost of the candidate CAND.  */
4310
4311 static void
4312 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4313 {
4314   unsigned cost_base, cost_step;
4315   tree base;
4316
4317   if (!cand->iv)
4318     {
4319       cand->cost = 0;
4320       return;
4321     }
4322
4323   /* There are two costs associated with the candidate -- its increment
4324      and its initialization.  The second is almost negligible for any loop
4325      that rolls enough, so we take it just very little into account.  */
4326
4327   base = cand->iv->base;
4328   cost_base = force_var_cost (data, base, NULL);
4329   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4330
4331   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4332
4333   /* Prefer the original iv unless we may gain something by replacing it;
4334      this is not really relevant for artificial ivs created by other
4335      passes.  */
4336   if (cand->pos == IP_ORIGINAL
4337       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4338     cand->cost--;
4339
4340   /* Prefer not to insert statements into latch unless there are some
4341      already (so that we do not create unnecessary jumps).  */
4342   if (cand->pos == IP_END
4343       && empty_block_p (ip_end_pos (data->current_loop)))
4344     cand->cost++;
4345 }
4346
4347 /* Determines costs of computation of the candidates.  */
4348
4349 static void
4350 determine_iv_costs (struct ivopts_data *data)
4351 {
4352   unsigned i;
4353
4354   if (dump_file && (dump_flags & TDF_DETAILS))
4355     {
4356       fprintf (dump_file, "Candidate costs:\n");
4357       fprintf (dump_file, "  cand\tcost\n");
4358     }
4359
4360   for (i = 0; i < n_iv_cands (data); i++)
4361     {
4362       struct iv_cand *cand = iv_cand (data, i);
4363
4364       determine_iv_cost (data, cand);
4365
4366       if (dump_file && (dump_flags & TDF_DETAILS))
4367         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4368     }
4369
4370 if (dump_file && (dump_flags & TDF_DETAILS))
4371       fprintf (dump_file, "\n");
4372 }
4373
4374 /* Calculates cost for having SIZE induction variables.  */
4375
4376 static unsigned
4377 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4378 {
4379   return global_cost_for_size (size,
4380                                loop_data (data->current_loop)->regs_used,
4381                                n_iv_uses (data));
4382 }
4383
4384 /* For each size of the induction variable set determine the penalty.  */
4385
4386 static void
4387 determine_set_costs (struct ivopts_data *data)
4388 {
4389   unsigned j, n;
4390   tree phi, op;
4391   struct loop *loop = data->current_loop;
4392   bitmap_iterator bi;
4393
4394   /* We use the following model (definitely improvable, especially the
4395      cost function -- TODO):
4396
4397      We estimate the number of registers available (using MD data), name it A.
4398
4399      We estimate the number of registers used by the loop, name it U.  This
4400      number is obtained as the number of loop phi nodes (not counting virtual
4401      registers and bivs) + the number of variables from outside of the loop.
4402
4403      We set a reserve R (free regs that are used for temporary computations,
4404      etc.).  For now the reserve is a constant 3.
4405
4406      Let I be the number of induction variables.
4407
4408      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4409         make a lot of ivs without a reason).
4410      -- if A - R < U + I <= A, the cost is I * PRES_COST
4411      -- if U + I > A, the cost is I * PRES_COST and
4412         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4413
4414   if (dump_file && (dump_flags & TDF_DETAILS))
4415     {
4416       fprintf (dump_file, "Global costs:\n");
4417       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4418       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4419       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4420       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4421     }
4422
4423   n = 0;
4424   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4425     {
4426       op = PHI_RESULT (phi);
4427
4428       if (!is_gimple_reg (op))
4429         continue;
4430
4431       if (get_iv (data, op))
4432         continue;
4433
4434       n++;
4435     }
4436
4437   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4438     {
4439       struct version_info *info = ver_info (data, j);
4440
4441       if (info->inv_id && info->has_nonlin_use)
4442         n++;
4443     }
4444
4445   loop_data (loop)->regs_used = n;
4446   if (dump_file && (dump_flags & TDF_DETAILS))
4447     fprintf (dump_file, "  regs_used %d\n", n);
4448
4449   if (dump_file && (dump_flags & TDF_DETAILS))
4450     {
4451       fprintf (dump_file, "  cost for size:\n");
4452       fprintf (dump_file, "  ivs\tcost\n");
4453       for (j = 0; j <= 2 * target_avail_regs; j++)
4454         fprintf (dump_file, "  %d\t%d\n", j,
4455                  ivopts_global_cost_for_size (data, j));
4456       fprintf (dump_file, "\n");
4457     }
4458 }
4459
4460 /* Returns true if A is a cheaper cost pair than B.  */
4461
4462 static bool
4463 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4464 {
4465   if (!a)
4466     return false;
4467
4468   if (!b)
4469     return true;
4470
4471   if (a->cost < b->cost)
4472     return true;
4473
4474   if (a->cost > b->cost)
4475     return false;
4476
4477   /* In case the costs are the same, prefer the cheaper candidate.  */
4478   if (a->cand->cost < b->cand->cost)
4479     return true;
4480
4481   return false;
4482 }
4483
4484 /* Computes the cost field of IVS structure.  */
4485
4486 static void
4487 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4488 {
4489   unsigned cost = 0;
4490
4491   cost += ivs->cand_use_cost;
4492   cost += ivs->cand_cost;
4493   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4494
4495   ivs->cost = cost;
4496 }
4497
4498 /* Remove invariants in set INVS to set IVS.  */
4499
4500 static void
4501 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4502 {
4503   bitmap_iterator bi;
4504   unsigned iid;
4505
4506   if (!invs)
4507     return;
4508
4509   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4510     {
4511       ivs->n_invariant_uses[iid]--;
4512       if (ivs->n_invariant_uses[iid] == 0)
4513         ivs->n_regs--;
4514     }
4515 }
4516
4517 /* Set USE not to be expressed by any candidate in IVS.  */
4518
4519 static void
4520 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4521                  struct iv_use *use)
4522 {
4523   unsigned uid = use->id, cid;
4524   struct cost_pair *cp;
4525
4526   cp = ivs->cand_for_use[uid];
4527   if (!cp)
4528     return;
4529   cid = cp->cand->id;
4530
4531   ivs->bad_uses++;
4532   ivs->cand_for_use[uid] = NULL;
4533   ivs->n_cand_uses[cid]--;
4534
4535   if (ivs->n_cand_uses[cid] == 0)
4536     {
4537       bitmap_clear_bit (ivs->cands, cid);
4538       /* Do not count the pseudocandidates.  */
4539       if (cp->cand->iv)
4540         ivs->n_regs--;
4541       ivs->n_cands--;
4542       ivs->cand_cost -= cp->cand->cost;
4543
4544       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4545     }
4546
4547   ivs->cand_use_cost -= cp->cost;
4548
4549   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4550   iv_ca_recount_cost (data, ivs);
4551 }
4552
4553 /* Add invariants in set INVS to set IVS.  */
4554
4555 static void
4556 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4557 {
4558   bitmap_iterator bi;
4559   unsigned iid;
4560
4561   if (!invs)
4562     return;
4563
4564   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4565     {
4566       ivs->n_invariant_uses[iid]++;
4567       if (ivs->n_invariant_uses[iid] == 1)
4568         ivs->n_regs++;
4569     }
4570 }
4571
4572 /* Set cost pair for USE in set IVS to CP.  */
4573
4574 static void
4575 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4576               struct iv_use *use, struct cost_pair *cp)
4577 {
4578   unsigned uid = use->id, cid;
4579
4580   if (ivs->cand_for_use[uid] == cp)
4581     return;
4582
4583   if (ivs->cand_for_use[uid])
4584     iv_ca_set_no_cp (data, ivs, use);
4585
4586   if (cp)
4587     {
4588       cid = cp->cand->id;
4589
4590       ivs->bad_uses--;
4591       ivs->cand_for_use[uid] = cp;
4592       ivs->n_cand_uses[cid]++;
4593       if (ivs->n_cand_uses[cid] == 1)
4594         {
4595           bitmap_set_bit (ivs->cands, cid);
4596           /* Do not count the pseudocandidates.  */
4597           if (cp->cand->iv)
4598             ivs->n_regs++;
4599           ivs->n_cands++;
4600           ivs->cand_cost += cp->cand->cost;
4601
4602           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4603         }
4604
4605       ivs->cand_use_cost += cp->cost;
4606       iv_ca_set_add_invariants (ivs, cp->depends_on);
4607       iv_ca_recount_cost (data, ivs);
4608     }
4609 }
4610
4611 /* Extend set IVS by expressing USE by some of the candidates in it
4612    if possible.  */
4613
4614 static void
4615 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4616                struct iv_use *use)
4617 {
4618   struct cost_pair *best_cp = NULL, *cp;
4619   bitmap_iterator bi;
4620   unsigned i;
4621
4622   gcc_assert (ivs->upto >= use->id);
4623
4624   if (ivs->upto == use->id)
4625     {
4626       ivs->upto++;
4627       ivs->bad_uses++;
4628     }
4629
4630   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4631     {
4632       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4633
4634       if (cheaper_cost_pair (cp, best_cp))
4635         best_cp = cp;
4636     }
4637
4638   iv_ca_set_cp (data, ivs, use, best_cp);
4639 }
4640
4641 /* Get cost for assignment IVS.  */
4642
4643 static unsigned
4644 iv_ca_cost (struct iv_ca *ivs)
4645 {
4646   return (ivs->bad_uses ? INFTY : ivs->cost);
4647 }
4648
4649 /* Returns true if all dependences of CP are among invariants in IVS.  */
4650
4651 static bool
4652 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4653 {
4654   unsigned i;
4655   bitmap_iterator bi;
4656
4657   if (!cp->depends_on)
4658     return true;
4659
4660   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4661     {
4662       if (ivs->n_invariant_uses[i] == 0)
4663         return false;
4664     }
4665
4666   return true;
4667 }
4668
4669 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4670    it before NEXT_CHANGE.  */
4671
4672 static struct iv_ca_delta *
4673 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4674                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4675 {
4676   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4677
4678   change->use = use;
4679   change->old_cp = old_cp;
4680   change->new_cp = new_cp;
4681   change->next_change = next_change;
4682
4683   return change;
4684 }
4685
4686 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4687    are rewritten.  */
4688
4689 static struct iv_ca_delta *
4690 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4691 {
4692   struct iv_ca_delta *last;
4693
4694   if (!l2)
4695     return l1;
4696
4697   if (!l1)
4698     return l2;
4699
4700   for (last = l1; last->next_change; last = last->next_change)
4701     continue;
4702   last->next_change = l2;
4703
4704   return l1;
4705 }
4706
4707 /* Returns candidate by that USE is expressed in IVS.  */
4708
4709 static struct cost_pair *
4710 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4711 {
4712   return ivs->cand_for_use[use->id];
4713 }
4714
4715 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4716
4717 static struct iv_ca_delta *
4718 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4719 {
4720   struct iv_ca_delta *act, *next, *prev = NULL;
4721   struct cost_pair *tmp;
4722
4723   for (act = delta; act; act = next)
4724     {
4725       next = act->next_change;
4726       act->next_change = prev;
4727       prev = act;
4728
4729       tmp = act->old_cp;
4730       act->old_cp = act->new_cp;
4731       act->new_cp = tmp;
4732     }
4733
4734   return prev;
4735 }
4736
4737 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4738    reverted instead.  */
4739
4740 static void
4741 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4742                     struct iv_ca_delta *delta, bool forward)
4743 {
4744   struct cost_pair *from, *to;
4745   struct iv_ca_delta *act;
4746
4747   if (!forward)
4748     delta = iv_ca_delta_reverse (delta);
4749
4750   for (act = delta; act; act = act->next_change)
4751     {
4752       from = act->old_cp;
4753       to = act->new_cp;
4754       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4755       iv_ca_set_cp (data, ivs, act->use, to);
4756     }
4757
4758   if (!forward)
4759     iv_ca_delta_reverse (delta);
4760 }
4761
4762 /* Returns true if CAND is used in IVS.  */
4763
4764 static bool
4765 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4766 {
4767   return ivs->n_cand_uses[cand->id] > 0;
4768 }
4769
4770 /* Returns number of induction variable candidates in the set IVS.  */
4771
4772 static unsigned
4773 iv_ca_n_cands (struct iv_ca *ivs)
4774 {
4775   return ivs->n_cands;
4776 }
4777
4778 /* Free the list of changes DELTA.  */
4779
4780 static void
4781 iv_ca_delta_free (struct iv_ca_delta **delta)
4782 {
4783   struct iv_ca_delta *act, *next;
4784
4785   for (act = *delta; act; act = next)
4786     {
4787       next = act->next_change;
4788       free (act);
4789     }
4790
4791   *delta = NULL;
4792 }
4793
4794 /* Allocates new iv candidates assignment.  */
4795
4796 static struct iv_ca *
4797 iv_ca_new (struct ivopts_data *data)
4798 {
4799   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4800
4801   nw->upto = 0;
4802   nw->bad_uses = 0;
4803   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4804   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4805   nw->cands = BITMAP_ALLOC (NULL);
4806   nw->n_cands = 0;
4807   nw->n_regs = 0;
4808   nw->cand_use_cost = 0;
4809   nw->cand_cost = 0;
4810   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4811   nw->cost = 0;
4812
4813   return nw;
4814 }
4815
4816 /* Free memory occupied by the set IVS.  */
4817
4818 static void
4819 iv_ca_free (struct iv_ca **ivs)
4820 {
4821   free ((*ivs)->cand_for_use);
4822   free ((*ivs)->n_cand_uses);
4823   BITMAP_FREE ((*ivs)->cands);
4824   free ((*ivs)->n_invariant_uses);
4825   free (*ivs);
4826   *ivs = NULL;
4827 }
4828
4829 /* Dumps IVS to FILE.  */
4830
4831 static void
4832 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4833 {
4834   const char *pref = "  invariants ";
4835   unsigned i;
4836
4837   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4838   bitmap_print (file, ivs->cands, "  candidates ","\n");
4839
4840   for (i = 1; i <= data->max_inv_id; i++)
4841     if (ivs->n_invariant_uses[i])
4842       {
4843         fprintf (file, "%s%d", pref, i);
4844         pref = ", ";
4845       }
4846   fprintf (file, "\n");
4847 }
4848
4849 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4850    new set, and store differences in DELTA.  Number of induction variables
4851    in the new set is stored to N_IVS.  */
4852
4853 static unsigned
4854 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4855               struct iv_cand *cand, struct iv_ca_delta **delta,
4856               unsigned *n_ivs)
4857 {
4858   unsigned i, cost;
4859   struct iv_use *use;
4860   struct cost_pair *old_cp, *new_cp;
4861
4862   *delta = NULL;
4863   for (i = 0; i < ivs->upto; i++)
4864     {
4865       use = iv_use (data, i);
4866       old_cp = iv_ca_cand_for_use (ivs, use);
4867
4868       if (old_cp
4869           && old_cp->cand == cand)
4870         continue;
4871
4872       new_cp = get_use_iv_cost (data, use, cand);
4873       if (!new_cp)
4874         continue;
4875
4876       if (!iv_ca_has_deps (ivs, new_cp))
4877         continue;
4878
4879       if (!cheaper_cost_pair (new_cp, old_cp))
4880         continue;
4881
4882       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4883     }
4884
4885   iv_ca_delta_commit (data, ivs, *delta, true);
4886   cost = iv_ca_cost (ivs);
4887   if (n_ivs)
4888     *n_ivs = iv_ca_n_cands (ivs);
4889   iv_ca_delta_commit (data, ivs, *delta, false);
4890
4891   return cost;
4892 }
4893
4894 /* Try narrowing set IVS by removing CAND.  Return the cost of
4895    the new set and store the differences in DELTA.  */
4896
4897 static unsigned
4898 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4899               struct iv_cand *cand, struct iv_ca_delta **delta)
4900 {
4901   unsigned i, ci;
4902   struct iv_use *use;
4903   struct cost_pair *old_cp, *new_cp, *cp;
4904   bitmap_iterator bi;
4905   struct iv_cand *cnd;
4906   unsigned cost;
4907
4908   *delta = NULL;
4909   for (i = 0; i < n_iv_uses (data); i++)
4910     {
4911       use = iv_use (data, i);
4912
4913       old_cp = iv_ca_cand_for_use (ivs, use);
4914       if (old_cp->cand != cand)
4915         continue;
4916
4917       new_cp = NULL;
4918
4919       if (data->consider_all_candidates)
4920         {
4921           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4922             {
4923               if (ci == cand->id)
4924                 continue;
4925
4926               cnd = iv_cand (data, ci);
4927
4928               cp = get_use_iv_cost (data, use, cnd);
4929               if (!cp)
4930                 continue;
4931               if (!iv_ca_has_deps (ivs, cp))
4932                 continue;
4933
4934               if (!cheaper_cost_pair (cp, new_cp))
4935                 continue;
4936
4937               new_cp = cp;
4938             }
4939         }
4940       else
4941         {
4942           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4943             {
4944               if (ci == cand->id)
4945                 continue;
4946
4947               cnd = iv_cand (data, ci);
4948
4949               cp = get_use_iv_cost (data, use, cnd);
4950               if (!cp)
4951                 continue;
4952               if (!iv_ca_has_deps (ivs, cp))
4953                 continue;
4954
4955               if (!cheaper_cost_pair (cp, new_cp))
4956                 continue;
4957
4958               new_cp = cp;
4959             }
4960         }
4961
4962       if (!new_cp)
4963         {
4964           iv_ca_delta_free (delta);
4965           return INFTY;
4966         }
4967
4968       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4969     }
4970
4971   iv_ca_delta_commit (data, ivs, *delta, true);
4972   cost = iv_ca_cost (ivs);
4973   iv_ca_delta_commit (data, ivs, *delta, false);
4974
4975   return cost;
4976 }
4977
4978 /* Try optimizing the set of candidates IVS by removing candidates different
4979    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4980    differences in DELTA.  */
4981
4982 static unsigned
4983 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4984              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4985 {
4986   bitmap_iterator bi;
4987   struct iv_ca_delta *act_delta, *best_delta;
4988   unsigned i, best_cost, acost;
4989   struct iv_cand *cand;
4990
4991   best_delta = NULL;
4992   best_cost = iv_ca_cost (ivs);
4993
4994   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4995     {
4996       cand = iv_cand (data, i);
4997
4998       if (cand == except_cand)
4999         continue;
5000
5001       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5002
5003       if (acost < best_cost)
5004         {
5005           best_cost = acost;
5006           iv_ca_delta_free (&best_delta);
5007           best_delta = act_delta;
5008         }
5009       else
5010         iv_ca_delta_free (&act_delta);
5011     }
5012
5013   if (!best_delta)
5014     {
5015       *delta = NULL;
5016       return best_cost;
5017     }
5018
5019   /* Recurse to possibly remove other unnecessary ivs.  */
5020   iv_ca_delta_commit (data, ivs, best_delta, true);
5021   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5022   iv_ca_delta_commit (data, ivs, best_delta, false);
5023   *delta = iv_ca_delta_join (best_delta, *delta);
5024   return best_cost;
5025 }
5026
5027 /* Tries to extend the sets IVS in the best possible way in order
5028    to express the USE.  */
5029
5030 static bool
5031 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5032                   struct iv_use *use)
5033 {
5034   unsigned best_cost, act_cost;
5035   unsigned i;
5036   bitmap_iterator bi;
5037   struct iv_cand *cand;
5038   struct iv_ca_delta *best_delta = NULL, *act_delta;
5039   struct cost_pair *cp;
5040
5041   iv_ca_add_use (data, ivs, use);
5042   best_cost = iv_ca_cost (ivs);
5043
5044   cp = iv_ca_cand_for_use (ivs, use);
5045   if (cp)
5046     {
5047       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5048       iv_ca_set_no_cp (data, ivs, use);
5049     }
5050
5051   /* First try important candidates.  Only if it fails, try the specific ones.
5052      Rationale -- in loops with many variables the best choice often is to use
5053      just one generic biv.  If we added here many ivs specific to the uses,
5054      the optimization algorithm later would be likely to get stuck in a local
5055      minimum, thus causing us to create too many ivs.  The approach from
5056      few ivs to more seems more likely to be successful -- starting from few
5057      ivs, replacing an expensive use by a specific iv should always be a
5058      win.  */
5059   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5060     {
5061       cand = iv_cand (data, i);
5062
5063       if (iv_ca_cand_used_p (ivs, cand))
5064         continue;
5065
5066       cp = get_use_iv_cost (data, use, cand);
5067       if (!cp)
5068         continue;
5069
5070       iv_ca_set_cp (data, ivs, use, cp);
5071       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5072       iv_ca_set_no_cp (data, ivs, use);
5073       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5074
5075       if (act_cost < best_cost)
5076         {
5077           best_cost = act_cost;
5078
5079           iv_ca_delta_free (&best_delta);
5080           best_delta = act_delta;
5081         }
5082       else
5083         iv_ca_delta_free (&act_delta);
5084     }
5085
5086   if (best_cost == INFTY)
5087     {
5088       for (i = 0; i < use->n_map_members; i++)
5089         {
5090           cp = use->cost_map + i;
5091           cand = cp->cand;
5092           if (!cand)
5093             continue;
5094
5095           /* Already tried this.  */
5096           if (cand->important)
5097             continue;
5098
5099           if (iv_ca_cand_used_p (ivs, cand))
5100             continue;
5101
5102           act_delta = NULL;
5103           iv_ca_set_cp (data, ivs, use, cp);
5104           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5105           iv_ca_set_no_cp (data, ivs, use);
5106           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5107                                        cp, act_delta);
5108
5109           if (act_cost < best_cost)
5110             {
5111               best_cost = act_cost;
5112
5113               if (best_delta)
5114                 iv_ca_delta_free (&best_delta);
5115               best_delta = act_delta;
5116             }
5117           else
5118             iv_ca_delta_free (&act_delta);
5119         }
5120     }
5121
5122   iv_ca_delta_commit (data, ivs, best_delta, true);
5123   iv_ca_delta_free (&best_delta);
5124
5125   return (best_cost != INFTY);
5126 }
5127
5128 /* Finds an initial assignment of candidates to uses.  */
5129
5130 static struct iv_ca *
5131 get_initial_solution (struct ivopts_data *data)
5132 {
5133   struct iv_ca *ivs = iv_ca_new (data);
5134   unsigned i;
5135
5136   for (i = 0; i < n_iv_uses (data); i++)
5137     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5138       {
5139         iv_ca_free (&ivs);
5140         return NULL;
5141       }
5142
5143   return ivs;
5144 }
5145
5146 /* Tries to improve set of induction variables IVS.  */
5147
5148 static bool
5149 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5150 {
5151   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5152   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5153   struct iv_cand *cand;
5154
5155   /* Try extending the set of induction variables by one.  */
5156   for (i = 0; i < n_iv_cands (data); i++)
5157     {
5158       cand = iv_cand (data, i);
5159
5160       if (iv_ca_cand_used_p (ivs, cand))
5161         continue;
5162
5163       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5164       if (!act_delta)
5165         continue;
5166
5167       /* If we successfully added the candidate and the set is small enough,
5168          try optimizing it by removing other candidates.  */
5169       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5170         {
5171           iv_ca_delta_commit (data, ivs, act_delta, true);
5172           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5173           iv_ca_delta_commit (data, ivs, act_delta, false);
5174           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5175         }
5176
5177       if (acost < best_cost)
5178         {
5179           best_cost = acost;
5180           iv_ca_delta_free (&best_delta);
5181           best_delta = act_delta;
5182         }
5183       else
5184         iv_ca_delta_free (&act_delta);
5185     }
5186
5187   if (!best_delta)
5188     {
5189       /* Try removing the candidates from the set instead.  */
5190       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5191
5192       /* Nothing more we can do.  */
5193       if (!best_delta)
5194         return false;
5195     }
5196
5197   iv_ca_delta_commit (data, ivs, best_delta, true);
5198   gcc_assert (best_cost == iv_ca_cost (ivs));
5199   iv_ca_delta_free (&best_delta);
5200   return true;
5201 }
5202
5203 /* Attempts to find the optimal set of induction variables.  We do simple
5204    greedy heuristic -- we try to replace at most one candidate in the selected
5205    solution and remove the unused ivs while this improves the cost.  */
5206
5207 static struct iv_ca *
5208 find_optimal_iv_set (struct ivopts_data *data)
5209 {
5210   unsigned i;
5211   struct iv_ca *set;
5212   struct iv_use *use;
5213
5214   /* Get the initial solution.  */
5215   set = get_initial_solution (data);
5216   if (!set)
5217     {
5218       if (dump_file && (dump_flags & TDF_DETAILS))
5219         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5220       return NULL;
5221     }
5222
5223   if (dump_file && (dump_flags & TDF_DETAILS))
5224     {
5225       fprintf (dump_file, "Initial set of candidates:\n");
5226       iv_ca_dump (data, dump_file, set);
5227     }
5228
5229   while (try_improve_iv_set (data, set))
5230     {
5231       if (dump_file && (dump_flags & TDF_DETAILS))
5232         {
5233           fprintf (dump_file, "Improved to:\n");
5234           iv_ca_dump (data, dump_file, set);
5235         }
5236     }
5237
5238   if (dump_file && (dump_flags & TDF_DETAILS))
5239     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5240
5241   for (i = 0; i < n_iv_uses (data); i++)
5242     {
5243       use = iv_use (data, i);
5244       use->selected = iv_ca_cand_for_use (set, use)->cand;
5245     }
5246
5247   return set;
5248 }
5249
5250 /* Creates a new induction variable corresponding to CAND.  */
5251
5252 static void
5253 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5254 {
5255   block_stmt_iterator incr_pos;
5256   tree base;
5257   bool after = false;
5258
5259   if (!cand->iv)
5260     return;
5261
5262   switch (cand->pos)
5263     {
5264     case IP_NORMAL:
5265       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5266       break;
5267
5268     case IP_END:
5269       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5270       after = true;
5271       break;
5272
5273     case IP_ORIGINAL:
5274       /* Mark that the iv is preserved.  */
5275       name_info (data, cand->var_before)->preserve_biv = true;
5276       name_info (data, cand->var_after)->preserve_biv = true;
5277
5278       /* Rewrite the increment so that it uses var_before directly.  */
5279       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5280
5281       return;
5282     }
5283
5284   gimple_add_tmp_var (cand->var_before);
5285   add_referenced_tmp_var (cand->var_before);
5286
5287   base = unshare_expr (cand->iv->base);
5288
5289   create_iv (base, unshare_expr (cand->iv->step),
5290              cand->var_before, data->current_loop,
5291              &incr_pos, after, &cand->var_before, &cand->var_after);
5292 }
5293
5294 /* Creates new induction variables described in SET.  */
5295
5296 static void
5297 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5298 {
5299   unsigned i;
5300   struct iv_cand *cand;
5301   bitmap_iterator bi;
5302
5303   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5304     {
5305       cand = iv_cand (data, i);
5306       create_new_iv (data, cand);
5307     }
5308 }
5309
5310 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5311    is true, remove also the ssa name defined by the statement.  */
5312
5313 static void
5314 remove_statement (tree stmt, bool including_defined_name)
5315 {
5316   if (TREE_CODE (stmt) == PHI_NODE)
5317     {
5318       if (!including_defined_name)
5319         {
5320           /* Prevent the ssa name defined by the statement from being removed.  */
5321           SET_PHI_RESULT (stmt, NULL);
5322         }
5323       remove_phi_node (stmt, NULL_TREE);
5324     }
5325   else
5326     {
5327       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5328
5329       bsi_remove (&bsi);
5330     }
5331 }
5332
5333 /* Rewrites USE (definition of iv used in a nonlinear expression)
5334    using candidate CAND.  */
5335
5336 static void
5337 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5338                             struct iv_use *use, struct iv_cand *cand)
5339 {
5340   tree comp;
5341   tree op, stmts, tgt, ass;
5342   block_stmt_iterator bsi, pbsi;
5343
5344   /* An important special case -- if we are asked to express value of
5345      the original iv by itself, just exit; there is no need to
5346      introduce a new computation (that might also need casting the
5347      variable to unsigned and back).  */
5348   if (cand->pos == IP_ORIGINAL
5349       && cand->incremented_at == use->stmt)
5350     {
5351       tree step, ctype, utype;
5352       enum tree_code incr_code = PLUS_EXPR;
5353
5354       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5355       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5356
5357       step = cand->iv->step;
5358       ctype = TREE_TYPE (step);
5359       utype = TREE_TYPE (cand->var_after);
5360       if (TREE_CODE (step) == NEGATE_EXPR)
5361         {
5362           incr_code = MINUS_EXPR;
5363           step = TREE_OPERAND (step, 0);
5364         }
5365
5366       /* Check whether we may leave the computation unchanged.
5367          This is the case only if it does not rely on other
5368          computations in the loop -- otherwise, the computation
5369          we rely upon may be removed in remove_unused_ivs,
5370          thus leading to ICE.  */
5371       op = TREE_OPERAND (use->stmt, 1);
5372       if (TREE_CODE (op) == PLUS_EXPR
5373           || TREE_CODE (op) == MINUS_EXPR)
5374         {
5375           if (TREE_OPERAND (op, 0) == cand->var_before)
5376             op = TREE_OPERAND (op, 1);
5377           else if (TREE_CODE (op) == PLUS_EXPR
5378                    && TREE_OPERAND (op, 1) == cand->var_before)
5379             op = TREE_OPERAND (op, 0);
5380           else
5381             op = NULL_TREE;
5382         }
5383       else
5384         op = NULL_TREE;
5385
5386       if (op
5387           && (TREE_CODE (op) == INTEGER_CST
5388               || operand_equal_p (op, step, 0)))
5389         return;
5390
5391       /* Otherwise, add the necessary computations to express
5392          the iv.  */
5393       op = fold_convert (ctype, cand->var_before);
5394       comp = fold_convert (utype,
5395                            build2 (incr_code, ctype, op,
5396                                    unshare_expr (step)));
5397     }
5398   else
5399     comp = get_computation (data->current_loop, use, cand);
5400
5401   switch (TREE_CODE (use->stmt))
5402     {
5403     case PHI_NODE:
5404       tgt = PHI_RESULT (use->stmt);
5405
5406       /* If we should keep the biv, do not replace it.  */
5407       if (name_info (data, tgt)->preserve_biv)
5408         return;
5409
5410       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5411       while (!bsi_end_p (pbsi)
5412              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5413         {
5414           bsi = pbsi;
5415           bsi_next (&pbsi);
5416         }
5417       break;
5418
5419     case MODIFY_EXPR:
5420       tgt = TREE_OPERAND (use->stmt, 0);
5421       bsi = bsi_for_stmt (use->stmt);
5422       break;
5423
5424     default:
5425       gcc_unreachable ();
5426     }
5427
5428   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5429
5430   if (TREE_CODE (use->stmt) == PHI_NODE)
5431     {
5432       if (stmts)
5433         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5434       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5435       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5436       remove_statement (use->stmt, false);
5437       SSA_NAME_DEF_STMT (tgt) = ass;
5438     }
5439   else
5440     {
5441       if (stmts)
5442         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5443       TREE_OPERAND (use->stmt, 1) = op;
5444     }
5445 }
5446
5447 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5448    for_each_index.  */
5449
5450 static bool
5451 idx_remove_ssa_names (tree base, tree *idx,
5452                       void *data ATTRIBUTE_UNUSED)
5453 {
5454   tree *op;
5455
5456   if (TREE_CODE (*idx) == SSA_NAME)
5457     *idx = SSA_NAME_VAR (*idx);
5458
5459   if (TREE_CODE (base) == ARRAY_REF)
5460     {
5461       op = &TREE_OPERAND (base, 2);
5462       if (*op
5463           && TREE_CODE (*op) == SSA_NAME)
5464         *op = SSA_NAME_VAR (*op);
5465       op = &TREE_OPERAND (base, 3);
5466       if (*op
5467           && TREE_CODE (*op) == SSA_NAME)
5468         *op = SSA_NAME_VAR (*op);
5469     }
5470
5471   return true;
5472 }
5473
5474 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5475
5476 static tree
5477 unshare_and_remove_ssa_names (tree ref)
5478 {
5479   ref = unshare_expr (ref);
5480   for_each_index (&ref, idx_remove_ssa_names, NULL);
5481
5482   return ref;
5483 }
5484
5485 /* Extract the alias analysis info for the memory reference REF.  There are
5486    several ways how this information may be stored and what precisely is
5487    its semantics depending on the type of the reference, but there always is
5488    somewhere hidden one _DECL node that is used to determine the set of
5489    virtual operands for the reference.  The code below deciphers this jungle
5490    and extracts this single useful piece of information.  */
5491
5492 static tree
5493 get_ref_tag (tree ref)
5494 {
5495   tree var = get_base_address (ref);
5496   tree tag;
5497
5498   if (!var)
5499     return NULL_TREE;
5500
5501   if (TREE_CODE (var) == INDIRECT_REF)
5502     {
5503       /* In case the base is a dereference of a pointer, first check its name
5504          mem tag, and if it does not have one, use type mem tag.  */
5505       var = TREE_OPERAND (var, 0);
5506       if (TREE_CODE (var) != SSA_NAME)
5507         return NULL_TREE;
5508
5509       if (SSA_NAME_PTR_INFO (var))
5510         {
5511           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5512           if (tag)
5513             return tag;
5514         }
5515
5516       var = SSA_NAME_VAR (var);
5517       tag = var_ann (var)->type_mem_tag;
5518       gcc_assert (tag != NULL_TREE);
5519       return tag;
5520     }
5521   else
5522     {
5523       if (!DECL_P (var))
5524         return NULL_TREE;
5525
5526       tag = var_ann (var)->type_mem_tag;
5527       if (tag)
5528         return tag;
5529
5530       return var;
5531     }
5532 }
5533
5534 /* Copies the reference information from OLD_REF to NEW_REF.  */
5535
5536 static void
5537 copy_ref_info (tree new_ref, tree old_ref)
5538 {
5539   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5540     copy_mem_ref_info (new_ref, old_ref);
5541   else
5542     {
5543       TMR_TAG (new_ref) = get_ref_tag (old_ref);
5544       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5545     }
5546 }
5547
5548 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5549
5550 static void
5551 rewrite_use_address (struct ivopts_data *data,
5552                      struct iv_use *use, struct iv_cand *cand)
5553 {
5554   struct affine_tree_combination aff;
5555   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5556   tree ref;
5557
5558   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5559   unshare_aff_combination (&aff);
5560
5561   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5562   copy_ref_info (ref, *use->op_p);
5563   *use->op_p = ref;
5564 }
5565
5566 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5567    candidate CAND.  */
5568
5569 static void
5570 rewrite_use_compare (struct ivopts_data *data,
5571                      struct iv_use *use, struct iv_cand *cand)
5572 {
5573   tree comp;
5574   tree *op_p, cond, op, stmts, bound;
5575   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5576   enum tree_code compare;
5577   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5578
5579   bound = cp->value;
5580   if (bound)
5581     {
5582       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5583       tree var_type = TREE_TYPE (var);
5584
5585       compare = iv_elimination_compare (data, use);
5586       bound = fold_convert (var_type, bound);
5587       op = force_gimple_operand (unshare_expr (bound), &stmts,
5588                                  true, NULL_TREE);
5589
5590       if (stmts)
5591         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5592
5593       *use->op_p = build2 (compare, boolean_type_node, var, op);
5594       update_stmt (use->stmt);
5595       return;
5596     }
5597
5598   /* The induction variable elimination failed; just express the original
5599      giv.  */
5600   comp = get_computation (data->current_loop, use, cand);
5601
5602   cond = *use->op_p;
5603   op_p = &TREE_OPERAND (cond, 0);
5604   if (TREE_CODE (*op_p) != SSA_NAME
5605       || zero_p (get_iv (data, *op_p)->step))
5606     op_p = &TREE_OPERAND (cond, 1);
5607
5608   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5609   if (stmts)
5610     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5611
5612   *op_p = op;
5613 }
5614
5615 /* Ensure that operand *OP_P may be used at the end of EXIT without
5616    violating loop closed ssa form.  */
5617
5618 static void
5619 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5620 {
5621   basic_block def_bb;
5622   struct loop *def_loop;
5623   tree phi, use;
5624
5625   use = USE_FROM_PTR (op_p);
5626   if (TREE_CODE (use) != SSA_NAME)
5627     return;
5628
5629   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5630   if (!def_bb)
5631     return;
5632
5633   def_loop = def_bb->loop_father;
5634   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5635     return;
5636
5637   /* Try finding a phi node that copies the value out of the loop.  */
5638   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5639     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5640       break;
5641
5642   if (!phi)
5643     {
5644       /* Create such a phi node.  */
5645       tree new_name = duplicate_ssa_name (use, NULL);
5646
5647       phi = create_phi_node (new_name, exit->dest);
5648       SSA_NAME_DEF_STMT (new_name) = phi;
5649       add_phi_arg (phi, use, exit);
5650     }
5651
5652   SET_USE (op_p, PHI_RESULT (phi));
5653 }
5654
5655 /* Ensure that operands of STMT may be used at the end of EXIT without
5656    violating loop closed ssa form.  */
5657
5658 static void
5659 protect_loop_closed_ssa_form (edge exit, tree stmt)
5660 {
5661   ssa_op_iter iter;
5662   use_operand_p use_p;
5663
5664   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5665     protect_loop_closed_ssa_form_use (exit, use_p);
5666 }
5667
5668 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5669    so that they are emitted on the correct place, and so that the loop closed
5670    ssa form is preserved.  */
5671
5672 void
5673 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5674 {
5675   tree_stmt_iterator tsi;
5676   block_stmt_iterator bsi;
5677   tree phi, stmt, def, next;
5678
5679   if (!single_pred_p (exit->dest))
5680     split_loop_exit_edge (exit);
5681
5682   /* Ensure there is label in exit->dest, so that we can
5683      insert after it.  */
5684   tree_block_label (exit->dest);
5685   bsi = bsi_after_labels (exit->dest);
5686
5687   if (TREE_CODE (stmts) == STATEMENT_LIST)
5688     {
5689       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5690         {
5691           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5692           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5693         }
5694     }
5695   else
5696     {
5697       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5698       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5699     }
5700
5701   if (!op)
5702     return;
5703
5704   for (phi = phi_nodes (exit->dest); phi; phi = next)
5705     {
5706       next = PHI_CHAIN (phi);
5707
5708       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5709         {
5710           def = PHI_RESULT (phi);
5711           remove_statement (phi, false);
5712           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5713                         def, op);
5714           SSA_NAME_DEF_STMT (def) = stmt;
5715           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5716         }
5717     }
5718 }
5719
5720 /* Rewrites the final value of USE (that is only needed outside of the loop)
5721    using candidate CAND.  */
5722
5723 static void
5724 rewrite_use_outer (struct ivopts_data *data,
5725                    struct iv_use *use, struct iv_cand *cand)
5726 {
5727   edge exit;
5728   tree value, op, stmts, tgt;
5729   tree phi;
5730
5731   switch (TREE_CODE (use->stmt))
5732     {
5733     case PHI_NODE:
5734       tgt = PHI_RESULT (use->stmt);
5735       break;
5736     case MODIFY_EXPR:
5737       tgt = TREE_OPERAND (use->stmt, 0);
5738       break;
5739     default:
5740       gcc_unreachable ();
5741     }
5742
5743   exit = single_dom_exit (data->current_loop);
5744
5745   if (exit)
5746     {
5747       if (!cand->iv)
5748         {
5749           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5750           value = unshare_expr (cp->value);
5751         }
5752       else
5753         value = get_computation_at (data->current_loop,
5754                                     use, cand, last_stmt (exit->src));
5755
5756       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5757
5758       /* If we will preserve the iv anyway and we would need to perform
5759          some computation to replace the final value, do nothing.  */
5760       if (stmts && name_info (data, tgt)->preserve_biv)
5761         return;
5762
5763       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5764         {
5765           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5766
5767           if (USE_FROM_PTR (use_p) == tgt)
5768             SET_USE (use_p, op);
5769         }
5770
5771       if (stmts)
5772         compute_phi_arg_on_exit (exit, stmts, op);
5773
5774       /* Enable removal of the statement.  We cannot remove it directly,
5775          since we may still need the aliasing information attached to the
5776          ssa name defined by it.  */
5777       name_info (data, tgt)->iv->have_use_for = false;
5778       return;
5779     }
5780
5781   /* If the variable is going to be preserved anyway, there is nothing to
5782      do.  */
5783   if (name_info (data, tgt)->preserve_biv)
5784     return;
5785
5786   /* Otherwise we just need to compute the iv.  */
5787   rewrite_use_nonlinear_expr (data, use, cand);
5788 }
5789
5790 /* Rewrites USE using candidate CAND.  */
5791
5792 static void
5793 rewrite_use (struct ivopts_data *data,
5794              struct iv_use *use, struct iv_cand *cand)
5795 {
5796   switch (use->type)
5797     {
5798       case USE_NONLINEAR_EXPR:
5799         rewrite_use_nonlinear_expr (data, use, cand);
5800         break;
5801
5802       case USE_OUTER:
5803         rewrite_use_outer (data, use, cand);
5804         break;
5805
5806       case USE_ADDRESS:
5807         rewrite_use_address (data, use, cand);
5808         break;
5809
5810       case USE_COMPARE:
5811         rewrite_use_compare (data, use, cand);
5812         break;
5813
5814       default:
5815         gcc_unreachable ();
5816     }
5817   update_stmt (use->stmt);
5818 }
5819
5820 /* Rewrite the uses using the selected induction variables.  */
5821
5822 static void
5823 rewrite_uses (struct ivopts_data *data)
5824 {
5825   unsigned i;
5826   struct iv_cand *cand;
5827   struct iv_use *use;
5828
5829   for (i = 0; i < n_iv_uses (data); i++)
5830     {
5831       use = iv_use (data, i);
5832       cand = use->selected;
5833       gcc_assert (cand);
5834
5835       rewrite_use (data, use, cand);
5836     }
5837 }
5838
5839 /* Removes the ivs that are not used after rewriting.  */
5840
5841 static void
5842 remove_unused_ivs (struct ivopts_data *data)
5843 {
5844   unsigned j;
5845   bitmap_iterator bi;
5846
5847   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5848     {
5849       struct version_info *info;
5850
5851       info = ver_info (data, j);
5852       if (info->iv
5853           && !zero_p (info->iv->step)
5854           && !info->inv_id
5855           && !info->iv->have_use_for
5856           && !info->preserve_biv)
5857         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5858     }
5859 }
5860
5861 /* Frees data allocated by the optimization of a single loop.  */
5862
5863 static void
5864 free_loop_data (struct ivopts_data *data)
5865 {
5866   unsigned i, j;
5867   bitmap_iterator bi;
5868   tree obj;
5869
5870   htab_empty (data->niters);
5871
5872   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5873     {
5874       struct version_info *info;
5875
5876       info = ver_info (data, i);
5877       if (info->iv)
5878         free (info->iv);
5879       info->iv = NULL;
5880       info->has_nonlin_use = false;
5881       info->preserve_biv = false;
5882       info->inv_id = 0;
5883     }
5884   bitmap_clear (data->relevant);
5885   bitmap_clear (data->important_candidates);
5886
5887   for (i = 0; i < n_iv_uses (data); i++)
5888     {
5889       struct iv_use *use = iv_use (data, i);
5890
5891       free (use->iv);
5892       BITMAP_FREE (use->related_cands);
5893       for (j = 0; j < use->n_map_members; j++)
5894         if (use->cost_map[j].depends_on)
5895           BITMAP_FREE (use->cost_map[j].depends_on);
5896       free (use->cost_map);
5897       free (use);
5898     }
5899   VEC_truncate (iv_use_p, data->iv_uses, 0);
5900
5901   for (i = 0; i < n_iv_cands (data); i++)
5902     {
5903       struct iv_cand *cand = iv_cand (data, i);
5904
5905       if (cand->iv)
5906         free (cand->iv);
5907       if (cand->depends_on)
5908         BITMAP_FREE (cand->depends_on);
5909       free (cand);
5910     }
5911   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5912
5913   if (data->version_info_size < num_ssa_names)
5914     {
5915       data->version_info_size = 2 * num_ssa_names;
5916       free (data->version_info);
5917       data->version_info = xcalloc (data->version_info_size,
5918                                     sizeof (struct version_info));
5919     }
5920
5921   data->max_inv_id = 0;
5922
5923   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5924     SET_DECL_RTL (obj, NULL_RTX);
5925
5926   VEC_truncate (tree, decl_rtl_to_reset, 0);
5927 }
5928
5929 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5930    loop tree.  */
5931
5932 static void
5933 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5934 {
5935   unsigned i;
5936
5937   for (i = 1; i < loops->num; i++)
5938     if (loops->parray[i])
5939       {
5940         free (loops->parray[i]->aux);
5941         loops->parray[i]->aux = NULL;
5942       }
5943
5944   free_loop_data (data);
5945   free (data->version_info);
5946   BITMAP_FREE (data->relevant);
5947   BITMAP_FREE (data->important_candidates);
5948   htab_delete (data->niters);
5949
5950   VEC_free (tree, heap, decl_rtl_to_reset);
5951   VEC_free (iv_use_p, heap, data->iv_uses);
5952   VEC_free (iv_cand_p, heap, data->iv_candidates);
5953 }
5954
5955 /* Optimizes the LOOP.  Returns true if anything changed.  */
5956
5957 static bool
5958 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5959 {
5960   bool changed = false;
5961   struct iv_ca *iv_ca;
5962   edge exit;
5963
5964   data->current_loop = loop;
5965
5966   if (dump_file && (dump_flags & TDF_DETAILS))
5967     {
5968       fprintf (dump_file, "Processing loop %d\n", loop->num);
5969
5970       exit = single_dom_exit (loop);
5971       if (exit)
5972         {
5973           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5974                    exit->src->index, exit->dest->index);
5975           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5976           fprintf (dump_file, "\n");
5977         }
5978
5979       fprintf (dump_file, "\n");
5980     }
5981
5982   /* For each ssa name determines whether it behaves as an induction variable
5983      in some loop.  */
5984   if (!find_induction_variables (data))
5985     goto finish;
5986
5987   /* Finds interesting uses (item 1).  */
5988   find_interesting_uses (data);
5989   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5990     goto finish;
5991
5992   /* Finds candidates for the induction variables (item 2).  */
5993   find_iv_candidates (data);
5994
5995   /* Calculates the costs (item 3, part 1).  */
5996   determine_use_iv_costs (data);
5997   determine_iv_costs (data);
5998   determine_set_costs (data);
5999
6000   /* Find the optimal set of induction variables (item 3, part 2).  */
6001   iv_ca = find_optimal_iv_set (data);
6002   if (!iv_ca)
6003     goto finish;
6004   changed = true;
6005
6006   /* Create the new induction variables (item 4, part 1).  */
6007   create_new_ivs (data, iv_ca);
6008   iv_ca_free (&iv_ca);
6009
6010   /* Rewrite the uses (item 4, part 2).  */
6011   rewrite_uses (data);
6012
6013   /* Remove the ivs that are unused after rewriting.  */
6014   remove_unused_ivs (data);
6015
6016   /* We have changed the structure of induction variables; it might happen
6017      that definitions in the scev database refer to some of them that were
6018      eliminated.  */
6019   scev_reset ();
6020
6021 finish:
6022   free_loop_data (data);
6023
6024   return changed;
6025 }
6026
6027 /* Main entry point.  Optimizes induction variables in LOOPS.  */
6028
6029 void
6030 tree_ssa_iv_optimize (struct loops *loops)
6031 {
6032   struct loop *loop;
6033   struct ivopts_data data;
6034
6035   tree_ssa_iv_optimize_init (loops, &data);
6036
6037   /* Optimize the loops starting with the innermost ones.  */
6038   loop = loops->tree_root;
6039   while (loop->inner)
6040     loop = loop->inner;
6041
6042   /* Scan the loops, inner ones first.  */
6043   while (loop != loops->tree_root)
6044     {
6045       if (dump_file && (dump_flags & TDF_DETAILS))
6046         flow_loop_dump (loop, dump_file, NULL, 1);
6047
6048       tree_ssa_iv_optimize_loop (&data, loop);
6049
6050       if (loop->next)
6051         {
6052           loop = loop->next;
6053           while (loop->inner)
6054             loop = loop->inner;
6055         }
6056       else
6057         loop = loop->outer;
6058     }
6059
6060   tree_ssa_iv_optimize_finalize (loops, &data);
6061 }