gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter,
 716                                                      true);
 717       *slot = nfe_desc;
 718     }
 719   else
 720     nfe_desc = *slot;
 721
 722   if (!nfe_desc->valid_p)
 723     return NULL;
 724
 725   return &nfe_desc->niter;
 726 }
 727
 728 /* Returns structure describing number of iterations determined from
 729    single dominating exit of DATA->current_loop, or NULL if something
 730    goes wrong.  */
 731
 732 static struct tree_niter_desc *
 733 niter_for_single_dom_exit (struct ivopts_data *data)
 734 {
 735   edge exit = single_dom_exit (data->current_loop);
 736
 737   if (!exit)
 738     return NULL;
 739
 740   return niter_for_exit (data, exit);
 741 }
 742
 743 /* Initializes data structures used by the iv optimization pass, stored
 744    in DATA.  LOOPS is the loop tree.  */
 745
 746 static void
 747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 748 {
 749   unsigned i;
 750
 751   data->version_info_size = 2 * num_ssa_names;
 752   data->version_info = xcalloc (data->version_info_size,
 753                                 sizeof (struct version_info));
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 758
 759   for (i = 1; i < loops->num; i++)
 760     if (loops->parray[i])
 761       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 762
 763   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 764   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 765   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 766 }
 767
 768 /* Returns a memory object to that EXPR points.  In case we are able to
 769    determine that it does not point to any such object, NULL is returned.  */
 770
 771 static tree
 772 determine_base_object (tree expr)
 773 {
 774   enum tree_code code = TREE_CODE (expr);
 775   tree base, obj, op0, op1;
 776
 777   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 778     return NULL_TREE;
 779
 780   switch (code)
 781     {
 782     case INTEGER_CST:
 783       return NULL_TREE;
 784
 785     case ADDR_EXPR:
 786       obj = TREE_OPERAND (expr, 0);
 787       base = get_base_address (obj);
 788
 789       if (!base)
 790         return expr;
 791
 792       if (TREE_CODE (base) == INDIRECT_REF)
 793         return determine_base_object (TREE_OPERAND (base, 0));
 794
 795       return fold_convert (ptr_type_node,
 796                            build_fold_addr_expr (base));
 797
 798     case PLUS_EXPR:
 799     case MINUS_EXPR:
 800       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 801       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 802
 803       if (!op1)
 804         return op0;
 805
 806       if (!op0)
 807         return (code == PLUS_EXPR
 808                 ? op1
 809                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 810
 811       return fold_build2 (code, ptr_type_node, op0, op1);
 812
 813     case NOP_EXPR:
 814     case CONVERT_EXPR:
 815       return determine_base_object (TREE_OPERAND (expr, 0));
 816
 817     default:
 818       return fold_convert (ptr_type_node, expr);
 819     }
 820 }
 821
 822 /* Allocates an induction variable with given initial value BASE and step STEP
 823    for loop LOOP.  */
 824
 825 static struct iv *
 826 alloc_iv (tree base, tree step)
 827 {
 828   struct iv *iv = xcalloc (1, sizeof (struct iv));
 829
 830   if (step && integer_zerop (step))
 831     step = NULL_TREE;
 832
 833   iv->base = base;
 834   iv->base_object = determine_base_object (base);
 835   iv->step = step;
 836   iv->biv_p = false;
 837   iv->have_use_for = false;
 838   iv->use_id = 0;
 839   iv->ssa_name = NULL_TREE;
 840
 841   return iv;
 842 }
 843
 844 /* Sets STEP and BASE for induction variable IV.  */
 845
 846 static void
 847 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 848 {
 849   struct version_info *info = name_info (data, iv);
 850
 851   gcc_assert (!info->iv);
 852
 853   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 854   info->iv = alloc_iv (base, step);
 855   info->iv->ssa_name = iv;
 856 }
 857
 858 /* Finds induction variable declaration for VAR.  */
 859
 860 static struct iv *
 861 get_iv (struct ivopts_data *data, tree var)
 862 {
 863   basic_block bb;
 864
 865   if (!name_info (data, var)->iv)
 866     {
 867       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 868
 869       if (!bb
 870           || !flow_bb_inside_loop_p (data->current_loop, bb))
 871         set_iv (data, var, var, NULL_TREE);
 872     }
 873
 874   return name_info (data, var)->iv;
 875 }
 876
 877 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 878    not define a simple affine biv with nonzero step.  */
 879
 880 static tree
 881 determine_biv_step (tree phi)
 882 {
 883   struct loop *loop = bb_for_stmt (phi)->loop_father;
 884   tree name = PHI_RESULT (phi), base, step;
 885
 886   if (!is_gimple_reg (name))
 887     return NULL_TREE;
 888
 889   if (!simple_iv (loop, phi, name, &base, &step, true))
 890     return NULL_TREE;
 891
 892   if (zero_p (step))
 893     return NULL_TREE;
 894
 895   return step;
 896 }
 897
 898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 899
 900 static bool
 901 abnormal_ssa_name_p (tree exp)
 902 {
 903   if (!exp)
 904     return false;
 905
 906   if (TREE_CODE (exp) != SSA_NAME)
 907     return false;
 908
 909   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 910 }
 911
 912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 913    abnormal phi node.  Callback for for_each_index.  */
 914
 915 static bool
 916 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 917                                   void *data ATTRIBUTE_UNUSED)
 918 {
 919   if (TREE_CODE (base) == ARRAY_REF)
 920     {
 921       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 922         return false;
 923       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 924         return false;
 925     }
 926
 927   return !abnormal_ssa_name_p (*index);
 928 }
 929
 930 /* Returns true if EXPR contains a ssa name that occurs in an
 931    abnormal phi node.  */
 932
 933 static bool
 934 contains_abnormal_ssa_name_p (tree expr)
 935 {
 936   enum tree_code code;
 937   enum tree_code_class class;
 938
 939   if (!expr)
 940     return false;
 941
 942   code = TREE_CODE (expr);
 943   class = TREE_CODE_CLASS (code);
 944
 945   if (code == SSA_NAME)
 946     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 947
 948   if (code == INTEGER_CST
 949       || is_gimple_min_invariant (expr))
 950     return false;
 951
 952   if (code == ADDR_EXPR)
 953     return !for_each_index (&TREE_OPERAND (expr, 0),
 954                             idx_contains_abnormal_ssa_name_p,
 955                             NULL);
 956
 957   switch (class)
 958     {
 959     case tcc_binary:
 960     case tcc_comparison:
 961       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 962         return true;
 963
 964       /* Fallthru.  */
 965     case tcc_unary:
 966       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 967         return true;
 968
 969       break;
 970
 971     default:
 972       gcc_unreachable ();
 973     }
 974
 975   return false;
 976 }
 977
 978 /* Finds basic ivs.  */
 979
 980 static bool
 981 find_bivs (struct ivopts_data *data)
 982 {
 983   tree phi, step, type, base;
 984   bool found = false;
 985   struct loop *loop = data->current_loop;
 986
 987   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 988     {
 989       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 990         continue;
 991
 992       step = determine_biv_step (phi);
 993       if (!step)
 994         continue;
 995
 996       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 997       base = expand_simple_operations (base);
 998       if (contains_abnormal_ssa_name_p (base)
 999           || contains_abnormal_ssa_name_p (step))
1000         continue;
1001
1002       type = TREE_TYPE (PHI_RESULT (phi));
1003       base = fold_convert (type, base);
1004       if (step)
1005         step = fold_convert (type, step);
1006
1007       set_iv (data, PHI_RESULT (phi), base, step);
1008       found = true;
1009     }
1010
1011   return found;
1012 }
1013
1014 /* Marks basic ivs.  */
1015
1016 static void
1017 mark_bivs (struct ivopts_data *data)
1018 {
1019   tree phi, var;
1020   struct iv *iv, *incr_iv;
1021   struct loop *loop = data->current_loop;
1022   basic_block incr_bb;
1023
1024   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1025     {
1026       iv = get_iv (data, PHI_RESULT (phi));
1027       if (!iv)
1028         continue;
1029
1030       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1031       incr_iv = get_iv (data, var);
1032       if (!incr_iv)
1033         continue;
1034
1035       /* If the increment is in the subloop, ignore it.  */
1036       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1037       if (incr_bb->loop_father != data->current_loop
1038           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1039         continue;
1040
1041       iv->biv_p = true;
1042       incr_iv->biv_p = true;
1043     }
1044 }
1045
1046 /* Checks whether STMT defines a linear induction variable and stores its
1047    parameters to BASE and STEP.  */
1048
1049 static bool
1050 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1051                         tree *base, tree *step)
1052 {
1053   tree lhs;
1054   struct loop *loop = data->current_loop;
1055
1056   *base = NULL_TREE;
1057   *step = NULL_TREE;
1058
1059   if (TREE_CODE (stmt) != MODIFY_EXPR)
1060     return false;
1061
1062   lhs = TREE_OPERAND (stmt, 0);
1063   if (TREE_CODE (lhs) != SSA_NAME)
1064     return false;
1065
1066   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1067     return false;
1068   *base = expand_simple_operations (*base);
1069
1070   if (contains_abnormal_ssa_name_p (*base)
1071       || contains_abnormal_ssa_name_p (*step))
1072     return false;
1073
1074   return true;
1075 }
1076
1077 /* Finds general ivs in statement STMT.  */
1078
1079 static void
1080 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1081 {
1082   tree base, step;
1083
1084   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1085     return;
1086
1087   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1088 }
1089
1090 /* Finds general ivs in basic block BB.  */
1091
1092 static void
1093 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1094 {
1095   block_stmt_iterator bsi;
1096
1097   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1098     find_givs_in_stmt (data, bsi_stmt (bsi));
1099 }
1100
1101 /* Finds general ivs.  */
1102
1103 static void
1104 find_givs (struct ivopts_data *data)
1105 {
1106   struct loop *loop = data->current_loop;
1107   basic_block *body = get_loop_body_in_dom_order (loop);
1108   unsigned i;
1109
1110   for (i = 0; i < loop->num_nodes; i++)
1111     find_givs_in_bb (data, body[i]);
1112   free (body);
1113 }
1114
1115 /* For each ssa name defined in LOOP determines whether it is an induction
1116    variable and if so, its initial value and step.  */
1117
1118 static bool
1119 find_induction_variables (struct ivopts_data *data)
1120 {
1121   unsigned i;
1122   bitmap_iterator bi;
1123
1124   if (!find_bivs (data))
1125     return false;
1126
1127   find_givs (data);
1128   mark_bivs (data);
1129
1130   if (dump_file && (dump_flags & TDF_DETAILS))
1131     {
1132       struct tree_niter_desc *niter;
1133
1134       niter = niter_for_single_dom_exit (data);
1135
1136       if (niter)
1137         {
1138           fprintf (dump_file, "  number of iterations ");
1139           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1140           fprintf (dump_file, "\n");
1141
1142           fprintf (dump_file, "  may be zero if ");
1143           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1144           fprintf (dump_file, "\n");
1145           fprintf (dump_file, "\n");
1146         };
1147
1148       fprintf (dump_file, "Induction variables:\n\n");
1149
1150       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1151         {
1152           if (ver_info (data, i)->iv)
1153             dump_iv (dump_file, ver_info (data, i)->iv);
1154         }
1155     }
1156
1157   return true;
1158 }
1159
1160 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1161
1162 static struct iv_use *
1163 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1164             tree stmt, enum use_type use_type)
1165 {
1166   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1167
1168   use->id = n_iv_uses (data);
1169   use->type = use_type;
1170   use->iv = iv;
1171   use->stmt = stmt;
1172   use->op_p = use_p;
1173   use->related_cands = BITMAP_ALLOC (NULL);
1174
1175   /* To avoid showing ssa name in the dumps, if it was not reset by the
1176      caller.  */
1177   iv->ssa_name = NULL_TREE;
1178
1179   if (dump_file && (dump_flags & TDF_DETAILS))
1180     dump_use (dump_file, use);
1181
1182   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1183
1184   return use;
1185 }
1186
1187 /* Checks whether OP is a loop-level invariant and if so, records it.
1188    NONLINEAR_USE is true if the invariant is used in a way we do not
1189    handle specially.  */
1190
1191 static void
1192 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1193 {
1194   basic_block bb;
1195   struct version_info *info;
1196
1197   if (TREE_CODE (op) != SSA_NAME
1198       || !is_gimple_reg (op))
1199     return;
1200
1201   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1202   if (bb
1203       && flow_bb_inside_loop_p (data->current_loop, bb))
1204     return;
1205
1206   info = name_info (data, op);
1207   info->name = op;
1208   info->has_nonlin_use |= nonlinear_use;
1209   if (!info->inv_id)
1210     info->inv_id = ++data->max_inv_id;
1211   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1212 }
1213
1214 /* Checks whether the use OP is interesting and if so, records it
1215    as TYPE.  */
1216
1217 static struct iv_use *
1218 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1219                                        enum use_type type)
1220 {
1221   struct iv *iv;
1222   struct iv *civ;
1223   tree stmt;
1224   struct iv_use *use;
1225
1226   if (TREE_CODE (op) != SSA_NAME)
1227     return NULL;
1228
1229   iv = get_iv (data, op);
1230   if (!iv)
1231     return NULL;
1232
1233   if (iv->have_use_for)
1234     {
1235       use = iv_use (data, iv->use_id);
1236
1237       gcc_assert (use->type == USE_NONLINEAR_EXPR
1238                   || use->type == USE_OUTER);
1239
1240       if (type == USE_NONLINEAR_EXPR)
1241         use->type = USE_NONLINEAR_EXPR;
1242       return use;
1243     }
1244
1245   if (zero_p (iv->step))
1246     {
1247       record_invariant (data, op, true);
1248       return NULL;
1249     }
1250   iv->have_use_for = true;
1251
1252   civ = xmalloc (sizeof (struct iv));
1253   *civ = *iv;
1254
1255   stmt = SSA_NAME_DEF_STMT (op);
1256   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1257               || TREE_CODE (stmt) == MODIFY_EXPR);
1258
1259   use = record_use (data, NULL, civ, stmt, type);
1260   iv->use_id = use->id;
1261
1262   return use;
1263 }
1264
1265 /* Checks whether the use OP is interesting and if so, records it.  */
1266
1267 static struct iv_use *
1268 find_interesting_uses_op (struct ivopts_data *data, tree op)
1269 {
1270   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1271 }
1272
1273 /* Records a definition of induction variable OP that is used outside of the
1274    loop.  */
1275
1276 static struct iv_use *
1277 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1278 {
1279   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1280 }
1281
1282 /* Checks whether the condition *COND_P in STMT is interesting
1283    and if so, records it.  */
1284
1285 static void
1286 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1287 {
1288   tree *op0_p;
1289   tree *op1_p;
1290   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1291   struct iv const_iv;
1292   tree zero = integer_zero_node;
1293
1294   const_iv.step = NULL_TREE;
1295
1296   if (TREE_CODE (*cond_p) != SSA_NAME
1297       && !COMPARISON_CLASS_P (*cond_p))
1298     return;
1299
1300   if (TREE_CODE (*cond_p) == SSA_NAME)
1301     {
1302       op0_p = cond_p;
1303       op1_p = &zero;
1304     }
1305   else
1306     {
1307       op0_p = &TREE_OPERAND (*cond_p, 0);
1308       op1_p = &TREE_OPERAND (*cond_p, 1);
1309     }
1310
1311   if (TREE_CODE (*op0_p) == SSA_NAME)
1312     iv0 = get_iv (data, *op0_p);
1313   else
1314     iv0 = &const_iv;
1315
1316   if (TREE_CODE (*op1_p) == SSA_NAME)
1317     iv1 = get_iv (data, *op1_p);
1318   else
1319     iv1 = &const_iv;
1320
1321   if (/* When comparing with non-invariant value, we may not do any senseful
1322          induction variable elimination.  */
1323       (!iv0 || !iv1)
1324       /* Eliminating condition based on two ivs would be nontrivial.
1325          ??? TODO -- it is not really important to handle this case.  */
1326       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1327     {
1328       find_interesting_uses_op (data, *op0_p);
1329       find_interesting_uses_op (data, *op1_p);
1330       return;
1331     }
1332
1333   if (zero_p (iv0->step) && zero_p (iv1->step))
1334     {
1335       /* If both are invariants, this is a work for unswitching.  */
1336       return;
1337     }
1338
1339   civ = xmalloc (sizeof (struct iv));
1340   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1341   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1342 }
1343
1344 /* Returns true if expression EXPR is obviously invariant in LOOP,
1345    i.e. if all its operands are defined outside of the LOOP.  */
1346
1347 bool
1348 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1349 {
1350   basic_block def_bb;
1351   unsigned i, len;
1352
1353   if (is_gimple_min_invariant (expr))
1354     return true;
1355
1356   if (TREE_CODE (expr) == SSA_NAME)
1357     {
1358       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1359       if (def_bb
1360           && flow_bb_inside_loop_p (loop, def_bb))
1361         return false;
1362
1363       return true;
1364     }
1365
1366   if (!EXPR_P (expr))
1367     return false;
1368
1369   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1370   for (i = 0; i < len; i++)
1371     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1372       return false;
1373
1374   return true;
1375 }
1376
1377 /* Cumulates the steps of indices into DATA and replaces their values with the
1378    initial ones.  Returns false when the value of the index cannot be determined.
1379    Callback for for_each_index.  */
1380
1381 struct ifs_ivopts_data
1382 {
1383   struct ivopts_data *ivopts_data;
1384   tree stmt;
1385   tree *step_p;
1386 };
1387
1388 static bool
1389 idx_find_step (tree base, tree *idx, void *data)
1390 {
1391   struct ifs_ivopts_data *dta = data;
1392   struct iv *iv;
1393   tree step, iv_step, lbound, off;
1394   struct loop *loop = dta->ivopts_data->current_loop;
1395
1396   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1397       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1398     return false;
1399
1400   /* If base is a component ref, require that the offset of the reference
1401      be invariant.  */
1402   if (TREE_CODE (base) == COMPONENT_REF)
1403     {
1404       off = component_ref_field_offset (base);
1405       return expr_invariant_in_loop_p (loop, off);
1406     }
1407
1408   /* If base is array, first check whether we will be able to move the
1409      reference out of the loop (in order to take its address in strength
1410      reduction).  In order for this to work we need both lower bound
1411      and step to be loop invariants.  */
1412   if (TREE_CODE (base) == ARRAY_REF)
1413     {
1414       step = array_ref_element_size (base);
1415       lbound = array_ref_low_bound (base);
1416
1417       if (!expr_invariant_in_loop_p (loop, step)
1418           || !expr_invariant_in_loop_p (loop, lbound))
1419         return false;
1420     }
1421
1422   if (TREE_CODE (*idx) != SSA_NAME)
1423     return true;
1424
1425   iv = get_iv (dta->ivopts_data, *idx);
1426   if (!iv)
1427     return false;
1428
1429   *idx = iv->base;
1430
1431   if (!iv->step)
1432     return true;
1433
1434   if (TREE_CODE (base) == ARRAY_REF)
1435     {
1436       step = array_ref_element_size (base);
1437
1438       /* We only handle addresses whose step is an integer constant.  */
1439       if (TREE_CODE (step) != INTEGER_CST)
1440         return false;
1441     }
1442   else
1443     /* The step for pointer arithmetics already is 1 byte.  */
1444     step = build_int_cst (sizetype, 1);
1445
1446   iv_step = convert_step (dta->ivopts_data->current_loop,
1447                           sizetype, iv->base, iv->step, dta->stmt);
1448
1449   if (!iv_step)
1450     {
1451       /* The index might wrap.  */
1452       return false;
1453     }
1454
1455   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1456
1457   if (!*dta->step_p)
1458     *dta->step_p = step;
1459   else
1460     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1461
1462   return true;
1463 }
1464
1465 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1466    object is passed to it in DATA.  */
1467
1468 static bool
1469 idx_record_use (tree base, tree *idx,
1470                 void *data)
1471 {
1472   find_interesting_uses_op (data, *idx);
1473   if (TREE_CODE (base) == ARRAY_REF)
1474     {
1475       find_interesting_uses_op (data, array_ref_element_size (base));
1476       find_interesting_uses_op (data, array_ref_low_bound (base));
1477     }
1478   return true;
1479 }
1480
1481 /* Returns true if memory reference REF may be unaligned.  */
1482
1483 static bool
1484 may_be_unaligned_p (tree ref)
1485 {
1486   tree base;
1487   tree base_type;
1488   HOST_WIDE_INT bitsize;
1489   HOST_WIDE_INT bitpos;
1490   tree toffset;
1491   enum machine_mode mode;
1492   int unsignedp, volatilep;
1493   unsigned base_align;
1494
1495   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1496      thus they are not misaligned.  */
1497   if (TREE_CODE (ref) == TARGET_MEM_REF)
1498     return false;
1499
1500   /* The test below is basically copy of what expr.c:normal_inner_ref
1501      does to check whether the object must be loaded by parts when
1502      STRICT_ALIGNMENT is true.  */
1503   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1504                               &unsignedp, &volatilep, true);
1505   base_type = TREE_TYPE (base);
1506   base_align = TYPE_ALIGN (base_type);
1507
1508   if (mode != BLKmode
1509       && (base_align < GET_MODE_ALIGNMENT (mode)
1510           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1511           || bitpos % BITS_PER_UNIT != 0))
1512     return true;
1513
1514   return false;
1515 }
1516
1517 /* Finds addresses in *OP_P inside STMT.  */
1518
1519 static void
1520 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1521 {
1522   tree base = *op_p, step = NULL;
1523   struct iv *civ;
1524   struct ifs_ivopts_data ifs_ivopts_data;
1525
1526   /* Do not play with volatile memory references.  A bit too conservative,
1527      perhaps, but safe.  */
1528   if (stmt_ann (stmt)->has_volatile_ops)
1529     goto fail;
1530
1531   /* Ignore bitfields for now.  Not really something terribly complicated
1532      to handle.  TODO.  */
1533   if (TREE_CODE (base) == COMPONENT_REF
1534       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1535     goto fail;
1536
1537   if (STRICT_ALIGNMENT
1538       && may_be_unaligned_p (base))
1539     goto fail;
1540
1541   base = unshare_expr (base);
1542
1543   if (TREE_CODE (base) == TARGET_MEM_REF)
1544     {
1545       tree type = build_pointer_type (TREE_TYPE (base));
1546       tree astep;
1547
1548       if (TMR_BASE (base)
1549           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1550         {
1551           civ = get_iv (data, TMR_BASE (base));
1552           if (!civ)
1553             goto fail;
1554
1555           TMR_BASE (base) = civ->base;
1556           step = civ->step;
1557         }
1558       if (TMR_INDEX (base)
1559           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1560         {
1561           civ = get_iv (data, TMR_INDEX (base));
1562           if (!civ)
1563             goto fail;
1564
1565           TMR_INDEX (base) = civ->base;
1566           astep = civ->step;
1567
1568           if (astep)
1569             {
1570               if (TMR_STEP (base))
1571                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1572
1573               if (step)
1574                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1575               else
1576                 step = astep;
1577             }
1578         }
1579
1580       if (zero_p (step))
1581         goto fail;
1582       base = tree_mem_ref_addr (type, base);
1583     }
1584   else
1585     {
1586       ifs_ivopts_data.ivopts_data = data;
1587       ifs_ivopts_data.stmt = stmt;
1588       ifs_ivopts_data.step_p = &step;
1589       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1590           || zero_p (step))
1591         goto fail;
1592
1593       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1594       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1595
1596       base = build_fold_addr_expr (base);
1597     }
1598
1599   civ = alloc_iv (base, step);
1600   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1601   return;
1602
1603 fail:
1604   for_each_index (op_p, idx_record_use, data);
1605 }
1606
1607 /* Finds and records invariants used in STMT.  */
1608
1609 static void
1610 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1611 {
1612   ssa_op_iter iter;
1613   use_operand_p use_p;
1614   tree op;
1615
1616   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1617     {
1618       op = USE_FROM_PTR (use_p);
1619       record_invariant (data, op, false);
1620     }
1621 }
1622
1623 /* Finds interesting uses of induction variables in the statement STMT.  */
1624
1625 static void
1626 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1627 {
1628   struct iv *iv;
1629   tree op, lhs, rhs;
1630   ssa_op_iter iter;
1631   use_operand_p use_p;
1632
1633   find_invariants_stmt (data, stmt);
1634
1635   if (TREE_CODE (stmt) == COND_EXPR)
1636     {
1637       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1638       return;
1639     }
1640
1641   if (TREE_CODE (stmt) == MODIFY_EXPR)
1642     {
1643       lhs = TREE_OPERAND (stmt, 0);
1644       rhs = TREE_OPERAND (stmt, 1);
1645
1646       if (TREE_CODE (lhs) == SSA_NAME)
1647         {
1648           /* If the statement defines an induction variable, the uses are not
1649              interesting by themselves.  */
1650
1651           iv = get_iv (data, lhs);
1652
1653           if (iv && !zero_p (iv->step))
1654             return;
1655         }
1656
1657       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1658         {
1659         case tcc_comparison:
1660           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1661           return;
1662
1663         case tcc_reference:
1664           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1665           if (REFERENCE_CLASS_P (lhs))
1666             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1667           return;
1668
1669         default: ;
1670         }
1671
1672       if (REFERENCE_CLASS_P (lhs)
1673           && is_gimple_val (rhs))
1674         {
1675           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1676           find_interesting_uses_op (data, rhs);
1677           return;
1678         }
1679
1680       /* TODO -- we should also handle address uses of type
1681
1682          memory = call (whatever);
1683
1684          and
1685
1686          call (memory).  */
1687     }
1688
1689   if (TREE_CODE (stmt) == PHI_NODE
1690       && bb_for_stmt (stmt) == data->current_loop->header)
1691     {
1692       lhs = PHI_RESULT (stmt);
1693       iv = get_iv (data, lhs);
1694
1695       if (iv && !zero_p (iv->step))
1696         return;
1697     }
1698
1699   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1700     {
1701       op = USE_FROM_PTR (use_p);
1702
1703       if (TREE_CODE (op) != SSA_NAME)
1704         continue;
1705
1706       iv = get_iv (data, op);
1707       if (!iv)
1708         continue;
1709
1710       find_interesting_uses_op (data, op);
1711     }
1712 }
1713
1714 /* Finds interesting uses of induction variables outside of loops
1715    on loop exit edge EXIT.  */
1716
1717 static void
1718 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1719 {
1720   tree phi, def;
1721
1722   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1723     {
1724       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1725       find_interesting_uses_outer (data, def);
1726     }
1727 }
1728
1729 /* Finds uses of the induction variables that are interesting.  */
1730
1731 static void
1732 find_interesting_uses (struct ivopts_data *data)
1733 {
1734   basic_block bb;
1735   block_stmt_iterator bsi;
1736   tree phi;
1737   basic_block *body = get_loop_body (data->current_loop);
1738   unsigned i;
1739   struct version_info *info;
1740   edge e;
1741
1742   if (dump_file && (dump_flags & TDF_DETAILS))
1743     fprintf (dump_file, "Uses:\n\n");
1744
1745   for (i = 0; i < data->current_loop->num_nodes; i++)
1746     {
1747       edge_iterator ei;
1748       bb = body[i];
1749
1750       FOR_EACH_EDGE (e, ei, bb->succs)
1751         if (e->dest != EXIT_BLOCK_PTR
1752             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1753           find_interesting_uses_outside (data, e);
1754
1755       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1756         find_interesting_uses_stmt (data, phi);
1757       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1758         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1759     }
1760
1761   if (dump_file && (dump_flags & TDF_DETAILS))
1762     {
1763       bitmap_iterator bi;
1764
1765       fprintf (dump_file, "\n");
1766
1767       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1768         {
1769           info = ver_info (data, i);
1770           if (info->inv_id)
1771             {
1772               fprintf (dump_file, "  ");
1773               print_generic_expr (dump_file, info->name, TDF_SLIM);
1774               fprintf (dump_file, " is invariant (%d)%s\n",
1775                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1776             }
1777         }
1778
1779       fprintf (dump_file, "\n");
1780     }
1781
1782   free (body);
1783 }
1784
1785 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1786    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1787    we are at the top-level of the processed address.  */
1788
1789 static tree
1790 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1791                 unsigned HOST_WIDE_INT *offset)
1792 {
1793   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1794   enum tree_code code;
1795   tree type, orig_type = TREE_TYPE (expr);
1796   unsigned HOST_WIDE_INT off0, off1, st;
1797   tree orig_expr = expr;
1798
1799   STRIP_NOPS (expr);
1800
1801   type = TREE_TYPE (expr);
1802   code = TREE_CODE (expr);
1803   *offset = 0;
1804
1805   switch (code)
1806     {
1807     case INTEGER_CST:
1808       if (!cst_and_fits_in_hwi (expr)
1809           || zero_p (expr))
1810         return orig_expr;
1811
1812       *offset = int_cst_value (expr);
1813       return build_int_cst_type (orig_type, 0);
1814
1815     case PLUS_EXPR:
1816     case MINUS_EXPR:
1817       op0 = TREE_OPERAND (expr, 0);
1818       op1 = TREE_OPERAND (expr, 1);
1819
1820       op0 = strip_offset_1 (op0, false, false, &off0);
1821       op1 = strip_offset_1 (op1, false, false, &off1);
1822
1823       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1824       if (op0 == TREE_OPERAND (expr, 0)
1825           && op1 == TREE_OPERAND (expr, 1))
1826         return orig_expr;
1827
1828       if (zero_p (op1))
1829         expr = op0;
1830       else if (zero_p (op0))
1831         {
1832           if (code == PLUS_EXPR)
1833             expr = op1;
1834           else
1835             expr = fold_build1 (NEGATE_EXPR, type, op1);
1836         }
1837       else
1838         expr = fold_build2 (code, type, op0, op1);
1839
1840       return fold_convert (orig_type, expr);
1841
1842     case ARRAY_REF:
1843       if (!inside_addr)
1844         return orig_expr;
1845
1846       step = array_ref_element_size (expr);
1847       if (!cst_and_fits_in_hwi (step))
1848         break;
1849
1850       st = int_cst_value (step);
1851       op1 = TREE_OPERAND (expr, 1);
1852       op1 = strip_offset_1 (op1, false, false, &off1);
1853       *offset = off1 * st;
1854
1855       if (top_compref
1856           && zero_p (op1))
1857         {
1858           /* Strip the component reference completely.  */
1859           op0 = TREE_OPERAND (expr, 0);
1860           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1861           *offset += off0;
1862           return op0;
1863         }
1864       break;
1865
1866     case COMPONENT_REF:
1867       if (!inside_addr)
1868         return orig_expr;
1869
1870       tmp = component_ref_field_offset (expr);
1871       if (top_compref
1872           && cst_and_fits_in_hwi (tmp))
1873         {
1874           /* Strip the component reference completely.  */
1875           op0 = TREE_OPERAND (expr, 0);
1876           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1877           *offset = off0 + int_cst_value (tmp);
1878           return op0;
1879         }
1880       break;
1881
1882     case ADDR_EXPR:
1883       op0 = TREE_OPERAND (expr, 0);
1884       op0 = strip_offset_1 (op0, true, true, &off0);
1885       *offset += off0;
1886
1887       if (op0 == TREE_OPERAND (expr, 0))
1888         return orig_expr;
1889
1890       expr = build_fold_addr_expr (op0);
1891       return fold_convert (orig_type, expr);
1892
1893     case INDIRECT_REF:
1894       inside_addr = false;
1895       break;
1896
1897     default:
1898       return orig_expr;
1899     }
1900
1901   /* Default handling of expressions for that we want to recurse into
1902      the first operand.  */
1903   op0 = TREE_OPERAND (expr, 0);
1904   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1905   *offset += off0;
1906
1907   if (op0 == TREE_OPERAND (expr, 0)
1908       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1909     return orig_expr;
1910
1911   expr = copy_node (expr);
1912   TREE_OPERAND (expr, 0) = op0;
1913   if (op1)
1914     TREE_OPERAND (expr, 1) = op1;
1915
1916   /* Inside address, we might strip the top level component references,
1917      thus changing type of the expression.  Handling of ADDR_EXPR
1918      will fix that.  */
1919   expr = fold_convert (orig_type, expr);
1920
1921   return expr;
1922 }
1923
1924 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1925
1926 static tree
1927 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1928 {
1929   return strip_offset_1 (expr, false, false, offset);
1930 }
1931
1932 /* Returns variant of TYPE that can be used as base for different uses.
1933    For integer types, we return unsigned variant of the type, which
1934    avoids problems with overflows.  For pointer types, we return void *.  */
1935
1936 static tree
1937 generic_type_for (tree type)
1938 {
1939   if (POINTER_TYPE_P (type))
1940     return ptr_type_node;
1941
1942   if (TYPE_UNSIGNED (type))
1943     return type;
1944
1945   return unsigned_type_for (type);
1946 }
1947
1948 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1949    the bitmap to that we should store it.  */
1950
1951 static struct ivopts_data *fd_ivopts_data;
1952 static tree
1953 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1954 {
1955   bitmap *depends_on = data;
1956   struct version_info *info;
1957
1958   if (TREE_CODE (*expr_p) != SSA_NAME)
1959     return NULL_TREE;
1960   info = name_info (fd_ivopts_data, *expr_p);
1961
1962   if (!info->inv_id || info->has_nonlin_use)
1963     return NULL_TREE;
1964
1965   if (!*depends_on)
1966     *depends_on = BITMAP_ALLOC (NULL);
1967   bitmap_set_bit (*depends_on, info->inv_id);
1968
1969   return NULL_TREE;
1970 }
1971
1972 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1973    position to POS.  If USE is not NULL, the candidate is set as related to
1974    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1975    replacement of the final value of the iv by a direct computation.  */
1976
1977 static struct iv_cand *
1978 add_candidate_1 (struct ivopts_data *data,
1979                  tree base, tree step, bool important, enum iv_position pos,
1980                  struct iv_use *use, tree incremented_at)
1981 {
1982   unsigned i;
1983   struct iv_cand *cand = NULL;
1984   tree type, orig_type;
1985
1986   if (base)
1987     {
1988       orig_type = TREE_TYPE (base);
1989       type = generic_type_for (orig_type);
1990       if (type != orig_type)
1991         {
1992           base = fold_convert (type, base);
1993           if (step)
1994             step = fold_convert (type, step);
1995         }
1996     }
1997
1998   for (i = 0; i < n_iv_cands (data); i++)
1999     {
2000       cand = iv_cand (data, i);
2001
2002       if (cand->pos != pos)
2003         continue;
2004
2005       if (cand->incremented_at != incremented_at)
2006         continue;
2007
2008       if (!cand->iv)
2009         {
2010           if (!base && !step)
2011             break;
2012
2013           continue;
2014         }
2015
2016       if (!base && !step)
2017         continue;
2018
2019       if (!operand_equal_p (base, cand->iv->base, 0))
2020         continue;
2021
2022       if (zero_p (cand->iv->step))
2023         {
2024           if (zero_p (step))
2025             break;
2026         }
2027       else
2028         {
2029           if (step && operand_equal_p (step, cand->iv->step, 0))
2030             break;
2031         }
2032     }
2033
2034   if (i == n_iv_cands (data))
2035     {
2036       cand = xcalloc (1, sizeof (struct iv_cand));
2037       cand->id = i;
2038
2039       if (!base && !step)
2040         cand->iv = NULL;
2041       else
2042         cand->iv = alloc_iv (base, step);
2043
2044       cand->pos = pos;
2045       if (pos != IP_ORIGINAL && cand->iv)
2046         {
2047           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2048           cand->var_after = cand->var_before;
2049         }
2050       cand->important = important;
2051       cand->incremented_at = incremented_at;
2052       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2053
2054       if (step
2055           && TREE_CODE (step) != INTEGER_CST)
2056         {
2057           fd_ivopts_data = data;
2058           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2059         }
2060
2061       if (dump_file && (dump_flags & TDF_DETAILS))
2062         dump_cand (dump_file, cand);
2063     }
2064
2065   if (important && !cand->important)
2066     {
2067       cand->important = true;
2068       if (dump_file && (dump_flags & TDF_DETAILS))
2069         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2070     }
2071
2072   if (use)
2073     {
2074       bitmap_set_bit (use->related_cands, i);
2075       if (dump_file && (dump_flags & TDF_DETAILS))
2076         fprintf (dump_file, "Candidate %d is related to use %d\n",
2077                  cand->id, use->id);
2078     }
2079
2080   return cand;
2081 }
2082
2083 /* Returns true if incrementing the induction variable at the end of the LOOP
2084    is allowed.
2085
2086    The purpose is to avoid splitting latch edge with a biv increment, thus
2087    creating a jump, possibly confusing other optimization passes and leaving
2088    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2089    is not available (so we do not have a better alternative), or if the latch
2090    edge is already nonempty.  */
2091
2092 static bool
2093 allow_ip_end_pos_p (struct loop *loop)
2094 {
2095   if (!ip_normal_pos (loop))
2096     return true;
2097
2098   if (!empty_block_p (ip_end_pos (loop)))
2099     return true;
2100
2101   return false;
2102 }
2103
2104 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2105    position to POS.  If USE is not NULL, the candidate is set as related to
2106    it.  The candidate computation is scheduled on all available positions.  */
2107
2108 static void
2109 add_candidate (struct ivopts_data *data,
2110                tree base, tree step, bool important, struct iv_use *use)
2111 {
2112   if (ip_normal_pos (data->current_loop))
2113     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2114   if (ip_end_pos (data->current_loop)
2115       && allow_ip_end_pos_p (data->current_loop))
2116     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2117 }
2118
2119 /* Add a standard "0 + 1 * iteration" iv candidate for a
2120    type with SIZE bits.  */
2121
2122 static void
2123 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2124                                      unsigned int size)
2125 {
2126   tree type = lang_hooks.types.type_for_size (size, true);
2127   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2128                  true, NULL);
2129 }
2130
2131 /* Adds standard iv candidates.  */
2132
2133 static void
2134 add_standard_iv_candidates (struct ivopts_data *data)
2135 {
2136   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2137
2138   /* The same for a double-integer type if it is still fast enough.  */
2139   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2140     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2141 }
2142
2143
2144 /* Adds candidates bases on the old induction variable IV.  */
2145
2146 static void
2147 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2148 {
2149   tree phi, def;
2150   struct iv_cand *cand;
2151
2152   add_candidate (data, iv->base, iv->step, true, NULL);
2153
2154   /* The same, but with initial value zero.  */
2155   add_candidate (data,
2156                  build_int_cst (TREE_TYPE (iv->base), 0),
2157                  iv->step, true, NULL);
2158
2159   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2160   if (TREE_CODE (phi) == PHI_NODE)
2161     {
2162       /* Additionally record the possibility of leaving the original iv
2163          untouched.  */
2164       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2165       cand = add_candidate_1 (data,
2166                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2167                               SSA_NAME_DEF_STMT (def));
2168       cand->var_before = iv->ssa_name;
2169       cand->var_after = def;
2170     }
2171 }
2172
2173 /* Adds candidates based on the old induction variables.  */
2174
2175 static void
2176 add_old_ivs_candidates (struct ivopts_data *data)
2177 {
2178   unsigned i;
2179   struct iv *iv;
2180   bitmap_iterator bi;
2181
2182   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2183     {
2184       iv = ver_info (data, i)->iv;
2185       if (iv && iv->biv_p && !zero_p (iv->step))
2186         add_old_iv_candidates (data, iv);
2187     }
2188 }
2189
2190 /* Adds candidates based on the value of the induction variable IV and USE.  */
2191
2192 static void
2193 add_iv_value_candidates (struct ivopts_data *data,
2194                          struct iv *iv, struct iv_use *use)
2195 {
2196   unsigned HOST_WIDE_INT offset;
2197   tree base;
2198
2199   add_candidate (data, iv->base, iv->step, false, use);
2200
2201   /* The same, but with initial value zero.  Make such variable important,
2202      since it is generic enough so that possibly many uses may be based
2203      on it.  */
2204   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2205                  iv->step, true, use);
2206
2207   /* Third, try removing the constant offset.  */
2208   base = strip_offset (iv->base, &offset);
2209   if (offset)
2210     add_candidate (data, base, iv->step, false, use);
2211 }
2212
2213 /* Possibly adds pseudocandidate for replacing the final value of USE by
2214    a direct computation.  */
2215
2216 static void
2217 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2218 {
2219   struct tree_niter_desc *niter;
2220
2221   /* We must know where we exit the loop and how many times does it roll.  */
2222   niter = niter_for_single_dom_exit (data);
2223   if (!niter
2224       || !zero_p (niter->may_be_zero))
2225     return;
2226
2227   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2228 }
2229
2230 /* Adds candidates based on the uses.  */
2231
2232 static void
2233 add_derived_ivs_candidates (struct ivopts_data *data)
2234 {
2235   unsigned i;
2236
2237   for (i = 0; i < n_iv_uses (data); i++)
2238     {
2239       struct iv_use *use = iv_use (data, i);
2240
2241       if (!use)
2242         continue;
2243
2244       switch (use->type)
2245         {
2246         case USE_NONLINEAR_EXPR:
2247         case USE_COMPARE:
2248         case USE_ADDRESS:
2249           /* Just add the ivs based on the value of the iv used here.  */
2250           add_iv_value_candidates (data, use->iv, use);
2251           break;
2252
2253         case USE_OUTER:
2254           add_iv_value_candidates (data, use->iv, use);
2255
2256           /* Additionally, add the pseudocandidate for the possibility to
2257              replace the final value by a direct computation.  */
2258           add_iv_outer_candidates (data, use);
2259           break;
2260
2261         default:
2262           gcc_unreachable ();
2263         }
2264     }
2265 }
2266
2267 /* Record important candidates and add them to related_cands bitmaps
2268    if needed.  */
2269
2270 static void
2271 record_important_candidates (struct ivopts_data *data)
2272 {
2273   unsigned i;
2274   struct iv_use *use;
2275
2276   for (i = 0; i < n_iv_cands (data); i++)
2277     {
2278       struct iv_cand *cand = iv_cand (data, i);
2279
2280       if (cand->important)
2281         bitmap_set_bit (data->important_candidates, i);
2282     }
2283
2284   data->consider_all_candidates = (n_iv_cands (data)
2285                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2286
2287   if (data->consider_all_candidates)
2288     {
2289       /* We will not need "related_cands" bitmaps in this case,
2290          so release them to decrease peak memory consumption.  */
2291       for (i = 0; i < n_iv_uses (data); i++)
2292         {
2293           use = iv_use (data, i);
2294           BITMAP_FREE (use->related_cands);
2295         }
2296     }
2297   else
2298     {
2299       /* Add important candidates to the related_cands bitmaps.  */
2300       for (i = 0; i < n_iv_uses (data); i++)
2301         bitmap_ior_into (iv_use (data, i)->related_cands,
2302                          data->important_candidates);
2303     }
2304 }
2305
2306 /* Finds the candidates for the induction variables.  */
2307
2308 static void
2309 find_iv_candidates (struct ivopts_data *data)
2310 {
2311   /* Add commonly used ivs.  */
2312   add_standard_iv_candidates (data);
2313
2314   /* Add old induction variables.  */
2315   add_old_ivs_candidates (data);
2316
2317   /* Add induction variables derived from uses.  */
2318   add_derived_ivs_candidates (data);
2319
2320   /* Record the important candidates.  */
2321   record_important_candidates (data);
2322 }
2323
2324 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2325    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2326    we allocate a simple list to every use.  */
2327
2328 static void
2329 alloc_use_cost_map (struct ivopts_data *data)
2330 {
2331   unsigned i, size, s, j;
2332
2333   for (i = 0; i < n_iv_uses (data); i++)
2334     {
2335       struct iv_use *use = iv_use (data, i);
2336       bitmap_iterator bi;
2337
2338       if (data->consider_all_candidates)
2339         size = n_iv_cands (data);
2340       else
2341         {
2342           s = 0;
2343           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2344             {
2345               s++;
2346             }
2347
2348           /* Round up to the power of two, so that moduling by it is fast.  */
2349           for (size = 1; size < s; size <<= 1)
2350             continue;
2351         }
2352
2353       use->n_map_members = size;
2354       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2355     }
2356 }
2357
2358 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2359    on invariants DEPENDS_ON and that the value used in expressing it
2360    is VALUE.*/
2361
2362 static void
2363 set_use_iv_cost (struct ivopts_data *data,
2364                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2365                  bitmap depends_on, tree value)
2366 {
2367   unsigned i, s;
2368
2369   if (cost == INFTY)
2370     {
2371       BITMAP_FREE (depends_on);
2372       return;
2373     }
2374
2375   if (data->consider_all_candidates)
2376     {
2377       use->cost_map[cand->id].cand = cand;
2378       use->cost_map[cand->id].cost = cost;
2379       use->cost_map[cand->id].depends_on = depends_on;
2380       use->cost_map[cand->id].value = value;
2381       return;
2382     }
2383
2384   /* n_map_members is a power of two, so this computes modulo.  */
2385   s = cand->id & (use->n_map_members - 1);
2386   for (i = s; i < use->n_map_members; i++)
2387     if (!use->cost_map[i].cand)
2388       goto found;
2389   for (i = 0; i < s; i++)
2390     if (!use->cost_map[i].cand)
2391       goto found;
2392
2393   gcc_unreachable ();
2394
2395 found:
2396   use->cost_map[i].cand = cand;
2397   use->cost_map[i].cost = cost;
2398   use->cost_map[i].depends_on = depends_on;
2399   use->cost_map[i].value = value;
2400 }
2401
2402 /* Gets cost of (USE, CANDIDATE) pair.  */
2403
2404 static struct cost_pair *
2405 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2406                  struct iv_cand *cand)
2407 {
2408   unsigned i, s;
2409   struct cost_pair *ret;
2410
2411   if (!cand)
2412     return NULL;
2413
2414   if (data->consider_all_candidates)
2415     {
2416       ret = use->cost_map + cand->id;
2417       if (!ret->cand)
2418         return NULL;
2419
2420       return ret;
2421     }
2422
2423   /* n_map_members is a power of two, so this computes modulo.  */
2424   s = cand->id & (use->n_map_members - 1);
2425   for (i = s; i < use->n_map_members; i++)
2426     if (use->cost_map[i].cand == cand)
2427       return use->cost_map + i;
2428
2429   for (i = 0; i < s; i++)
2430     if (use->cost_map[i].cand == cand)
2431       return use->cost_map + i;
2432
2433   return NULL;
2434 }
2435
2436 /* Returns estimate on cost of computing SEQ.  */
2437
2438 static unsigned
2439 seq_cost (rtx seq)
2440 {
2441   unsigned cost = 0;
2442   rtx set;
2443
2444   for (; seq; seq = NEXT_INSN (seq))
2445     {
2446       set = single_set (seq);
2447       if (set)
2448         cost += rtx_cost (set, SET);
2449       else
2450         cost++;
2451     }
2452
2453   return cost;
2454 }
2455
2456 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2457 static rtx
2458 produce_memory_decl_rtl (tree obj, int *regno)
2459 {
2460   rtx x;
2461
2462   gcc_assert (obj);
2463   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2464     {
2465       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2466       x = gen_rtx_SYMBOL_REF (Pmode, name);
2467     }
2468   else
2469     x = gen_raw_REG (Pmode, (*regno)++);
2470
2471   return gen_rtx_MEM (DECL_MODE (obj), x);
2472 }
2473
2474 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2475    walk_tree.  DATA contains the actual fake register number.  */
2476
2477 static tree
2478 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2479 {
2480   tree obj = NULL_TREE;
2481   rtx x = NULL_RTX;
2482   int *regno = data;
2483
2484   switch (TREE_CODE (*expr_p))
2485     {
2486     case ADDR_EXPR:
2487       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2488            handled_component_p (*expr_p);
2489            expr_p = &TREE_OPERAND (*expr_p, 0))
2490         continue;
2491       obj = *expr_p;
2492       if (DECL_P (obj))
2493         x = produce_memory_decl_rtl (obj, regno);
2494       break;
2495
2496     case SSA_NAME:
2497       *ws = 0;
2498       obj = SSA_NAME_VAR (*expr_p);
2499       if (!DECL_RTL_SET_P (obj))
2500         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2501       break;
2502
2503     case VAR_DECL:
2504     case PARM_DECL:
2505     case RESULT_DECL:
2506       *ws = 0;
2507       obj = *expr_p;
2508
2509       if (DECL_RTL_SET_P (obj))
2510         break;
2511
2512       if (DECL_MODE (obj) == BLKmode)
2513         x = produce_memory_decl_rtl (obj, regno);
2514       else
2515         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2516
2517       break;
2518
2519     default:
2520       break;
2521     }
2522
2523   if (x)
2524     {
2525       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2526       SET_DECL_RTL (obj, x);
2527     }
2528
2529   return NULL_TREE;
2530 }
2531
2532 /* Determines cost of the computation of EXPR.  */
2533
2534 static unsigned
2535 computation_cost (tree expr)
2536 {
2537   rtx seq, rslt;
2538   tree type = TREE_TYPE (expr);
2539   unsigned cost;
2540   /* Avoid using hard regs in ways which may be unsupported.  */
2541   int regno = LAST_VIRTUAL_REGISTER + 1;
2542
2543   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2544   start_sequence ();
2545   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2546   seq = get_insns ();
2547   end_sequence ();
2548
2549   cost = seq_cost (seq);
2550   if (MEM_P (rslt))
2551     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2552
2553   return cost;
2554 }
2555
2556 /* Returns variable containing the value of candidate CAND at statement AT.  */
2557
2558 static tree
2559 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2560 {
2561   if (stmt_after_increment (loop, cand, stmt))
2562     return cand->var_after;
2563   else
2564     return cand->var_before;
2565 }
2566
2567 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2568    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2569
2570 int
2571 tree_int_cst_sign_bit (tree t)
2572 {
2573   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2574   unsigned HOST_WIDE_INT w;
2575
2576   if (bitno < HOST_BITS_PER_WIDE_INT)
2577     w = TREE_INT_CST_LOW (t);
2578   else
2579     {
2580       w = TREE_INT_CST_HIGH (t);
2581       bitno -= HOST_BITS_PER_WIDE_INT;
2582     }
2583
2584   return (w >> bitno) & 1;
2585 }
2586
2587 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2588    return cst.  Otherwise return NULL_TREE.  */
2589
2590 static tree
2591 constant_multiple_of (tree type, tree top, tree bot)
2592 {
2593   tree res, mby, p0, p1;
2594   enum tree_code code;
2595   bool negate;
2596
2597   STRIP_NOPS (top);
2598   STRIP_NOPS (bot);
2599
2600   if (operand_equal_p (top, bot, 0))
2601     return build_int_cst (type, 1);
2602
2603   code = TREE_CODE (top);
2604   switch (code)
2605     {
2606     case MULT_EXPR:
2607       mby = TREE_OPERAND (top, 1);
2608       if (TREE_CODE (mby) != INTEGER_CST)
2609         return NULL_TREE;
2610
2611       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2612       if (!res)
2613         return NULL_TREE;
2614
2615       return fold_binary_to_constant (MULT_EXPR, type, res,
2616                                       fold_convert (type, mby));
2617
2618     case PLUS_EXPR:
2619     case MINUS_EXPR:
2620       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2621       if (!p0)
2622         return NULL_TREE;
2623       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2624       if (!p1)
2625         return NULL_TREE;
2626
2627       return fold_binary_to_constant (code, type, p0, p1);
2628
2629     case INTEGER_CST:
2630       if (TREE_CODE (bot) != INTEGER_CST)
2631         return NULL_TREE;
2632
2633       bot = fold_convert (type, bot);
2634       top = fold_convert (type, top);
2635
2636       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2637          the result afterwards.  */
2638       if (tree_int_cst_sign_bit (bot))
2639         {
2640           negate = true;
2641           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2642         }
2643       else
2644         negate = false;
2645
2646       /* Ditto for TOP.  */
2647       if (tree_int_cst_sign_bit (top))
2648         {
2649           negate = !negate;
2650           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2651         }
2652
2653       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2654         return NULL_TREE;
2655
2656       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2657       if (negate)
2658         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2659       return res;
2660
2661     default:
2662       return NULL_TREE;
2663     }
2664 }
2665
2666 /* Sets COMB to CST.  */
2667
2668 static void
2669 aff_combination_const (struct affine_tree_combination *comb, tree type,
2670                        unsigned HOST_WIDE_INT cst)
2671 {
2672   unsigned prec = TYPE_PRECISION (type);
2673
2674   comb->type = type;
2675   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2676
2677   comb->n = 0;
2678   comb->rest = NULL_TREE;
2679   comb->offset = cst & comb->mask;
2680 }
2681
2682 /* Sets COMB to single element ELT.  */
2683
2684 static void
2685 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2686 {
2687   unsigned prec = TYPE_PRECISION (type);
2688
2689   comb->type = type;
2690   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2691
2692   comb->n = 1;
2693   comb->elts[0] = elt;
2694   comb->coefs[0] = 1;
2695   comb->rest = NULL_TREE;
2696   comb->offset = 0;
2697 }
2698
2699 /* Scales COMB by SCALE.  */
2700
2701 static void
2702 aff_combination_scale (struct affine_tree_combination *comb,
2703                        unsigned HOST_WIDE_INT scale)
2704 {
2705   unsigned i, j;
2706
2707   if (scale == 1)
2708     return;
2709
2710   if (scale == 0)
2711     {
2712       aff_combination_const (comb, comb->type, 0);
2713       return;
2714     }
2715
2716   comb->offset = (scale * comb->offset) & comb->mask;
2717   for (i = 0, j = 0; i < comb->n; i++)
2718     {
2719       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2720       comb->elts[j] = comb->elts[i];
2721       if (comb->coefs[j] != 0)
2722         j++;
2723     }
2724   comb->n = j;
2725
2726   if (comb->rest)
2727     {
2728       if (comb->n < MAX_AFF_ELTS)
2729         {
2730           comb->coefs[comb->n] = scale;
2731           comb->elts[comb->n] = comb->rest;
2732           comb->rest = NULL_TREE;
2733           comb->n++;
2734         }
2735       else
2736         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2737                                   build_int_cst_type (comb->type, scale));
2738     }
2739 }
2740
2741 /* Adds ELT * SCALE to COMB.  */
2742
2743 static void
2744 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2745                          unsigned HOST_WIDE_INT scale)
2746 {
2747   unsigned i;
2748
2749   if (scale == 0)
2750     return;
2751
2752   for (i = 0; i < comb->n; i++)
2753     if (operand_equal_p (comb->elts[i], elt, 0))
2754       {
2755         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2756         if (comb->coefs[i])
2757           return;
2758
2759         comb->n--;
2760         comb->coefs[i] = comb->coefs[comb->n];
2761         comb->elts[i] = comb->elts[comb->n];
2762         return;
2763       }
2764   if (comb->n < MAX_AFF_ELTS)
2765     {
2766       comb->coefs[comb->n] = scale;
2767       comb->elts[comb->n] = elt;
2768       comb->n++;
2769       return;
2770     }
2771
2772   if (scale == 1)
2773     elt = fold_convert (comb->type, elt);
2774   else
2775     elt = fold_build2 (MULT_EXPR, comb->type,
2776                        fold_convert (comb->type, elt),
2777                        build_int_cst_type (comb->type, scale));
2778
2779   if (comb->rest)
2780     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2781   else
2782     comb->rest = elt;
2783 }
2784
2785 /* Adds COMB2 to COMB1.  */
2786
2787 static void
2788 aff_combination_add (struct affine_tree_combination *comb1,
2789                      struct affine_tree_combination *comb2)
2790 {
2791   unsigned i;
2792
2793   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2794   for (i = 0; i < comb2-> n; i++)
2795     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2796   if (comb2->rest)
2797     aff_combination_add_elt (comb1, comb2->rest, 1);
2798 }
2799
2800 /* Splits EXPR into an affine combination of parts.  */
2801
2802 static void
2803 tree_to_aff_combination (tree expr, tree type,
2804                          struct affine_tree_combination *comb)
2805 {
2806   struct affine_tree_combination tmp;
2807   enum tree_code code;
2808   tree cst, core, toffset;
2809   HOST_WIDE_INT bitpos, bitsize;
2810   enum machine_mode mode;
2811   int unsignedp, volatilep;
2812
2813   STRIP_NOPS (expr);
2814
2815   code = TREE_CODE (expr);
2816   switch (code)
2817     {
2818     case INTEGER_CST:
2819       aff_combination_const (comb, type, int_cst_value (expr));
2820       return;
2821
2822     case PLUS_EXPR:
2823     case MINUS_EXPR:
2824       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2825       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2826       if (code == MINUS_EXPR)
2827         aff_combination_scale (&tmp, -1);
2828       aff_combination_add (comb, &tmp);
2829       return;
2830
2831     case MULT_EXPR:
2832       cst = TREE_OPERAND (expr, 1);
2833       if (TREE_CODE (cst) != INTEGER_CST)
2834         break;
2835       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2836       aff_combination_scale (comb, int_cst_value (cst));
2837       return;
2838
2839     case NEGATE_EXPR:
2840       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2841       aff_combination_scale (comb, -1);
2842       return;
2843
2844     case ADDR_EXPR:
2845       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2846                                   &toffset, &mode, &unsignedp, &volatilep,
2847                                   false);
2848       if (bitpos % BITS_PER_UNIT != 0)
2849         break;
2850       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2851       core = build_fold_addr_expr (core);
2852       if (TREE_CODE (core) == ADDR_EXPR)
2853         aff_combination_add_elt (comb, core, 1);
2854       else
2855         {
2856           tree_to_aff_combination (core, type, &tmp);
2857           aff_combination_add (comb, &tmp);
2858         }
2859       if (toffset)
2860         {
2861           tree_to_aff_combination (toffset, type, &tmp);
2862           aff_combination_add (comb, &tmp);
2863         }
2864       return;
2865
2866     default:
2867       break;
2868     }
2869
2870   aff_combination_elt (comb, type, expr);
2871 }
2872
2873 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2874
2875 static tree
2876 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2877                  unsigned HOST_WIDE_INT mask)
2878 {
2879   enum tree_code code;
2880
2881   scale &= mask;
2882   elt = fold_convert (type, elt);
2883
2884   if (scale == 1)
2885     {
2886       if (!expr)
2887         return elt;
2888
2889       return fold_build2 (PLUS_EXPR, type, expr, elt);
2890     }
2891
2892   if (scale == mask)
2893     {
2894       if (!expr)
2895         return fold_build1 (NEGATE_EXPR, type, elt);
2896
2897       return fold_build2 (MINUS_EXPR, type, expr, elt);
2898     }
2899
2900   if (!expr)
2901     return fold_build2 (MULT_EXPR, type, elt,
2902                         build_int_cst_type (type, scale));
2903
2904   if ((scale | (mask >> 1)) == mask)
2905     {
2906       /* Scale is negative.  */
2907       code = MINUS_EXPR;
2908       scale = (-scale) & mask;
2909     }
2910   else
2911     code = PLUS_EXPR;
2912
2913   elt = fold_build2 (MULT_EXPR, type, elt,
2914                      build_int_cst_type (type, scale));
2915   return fold_build2 (code, type, expr, elt);
2916 }
2917
2918 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2919
2920 static void
2921 unshare_aff_combination (struct affine_tree_combination *comb)
2922 {
2923   unsigned i;
2924
2925   for (i = 0; i < comb->n; i++)
2926     comb->elts[i] = unshare_expr (comb->elts[i]);
2927   if (comb->rest)
2928     comb->rest = unshare_expr (comb->rest);
2929 }
2930
2931 /* Makes tree from the affine combination COMB.  */
2932
2933 static tree
2934 aff_combination_to_tree (struct affine_tree_combination *comb)
2935 {
2936   tree type = comb->type;
2937   tree expr = comb->rest;
2938   unsigned i;
2939   unsigned HOST_WIDE_INT off, sgn;
2940
2941   /* Handle the special case produced by get_computation_aff when
2942      the type does not fit in HOST_WIDE_INT.  */
2943   if (comb->n == 0 && comb->offset == 0)
2944     return fold_convert (type, expr);
2945
2946   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2947
2948   for (i = 0; i < comb->n; i++)
2949     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2950                             comb->mask);
2951
2952   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2953     {
2954       /* Offset is negative.  */
2955       off = (-comb->offset) & comb->mask;
2956       sgn = comb->mask;
2957     }
2958   else
2959     {
2960       off = comb->offset;
2961       sgn = 1;
2962     }
2963   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2964                           comb->mask);
2965 }
2966
2967 /* Determines the expression by that USE is expressed from induction variable
2968    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2969    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2970
2971 static bool
2972 get_computation_aff (struct loop *loop,
2973                      struct iv_use *use, struct iv_cand *cand, tree at,
2974                      struct affine_tree_combination *aff)
2975 {
2976   tree ubase = use->iv->base;
2977   tree ustep = use->iv->step;
2978   tree cbase = cand->iv->base;
2979   tree cstep = cand->iv->step;
2980   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2981   tree uutype;
2982   tree expr, delta;
2983   tree ratio;
2984   unsigned HOST_WIDE_INT ustepi, cstepi;
2985   HOST_WIDE_INT ratioi;
2986   struct affine_tree_combination cbase_aff, expr_aff;
2987   tree cstep_orig = cstep, ustep_orig = ustep;
2988
2989   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2990     {
2991       /* We do not have a precision to express the values of use.  */
2992       return false;
2993     }
2994
2995   expr = var_at_stmt (loop, cand, at);
2996
2997   if (TREE_TYPE (expr) != ctype)
2998     {
2999       /* This may happen with the original ivs.  */
3000       expr = fold_convert (ctype, expr);
3001     }
3002
3003   if (TYPE_UNSIGNED (utype))
3004     uutype = utype;
3005   else
3006     {
3007       uutype = unsigned_type_for (utype);
3008       ubase = fold_convert (uutype, ubase);
3009       ustep = fold_convert (uutype, ustep);
3010     }
3011
3012   if (uutype != ctype)
3013     {
3014       expr = fold_convert (uutype, expr);
3015       cbase = fold_convert (uutype, cbase);
3016       cstep = fold_convert (uutype, cstep);
3017
3018       /* If the conversion is not noop, we must take it into account when
3019          considering the value of the step.  */
3020       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3021         cstep_orig = cstep;
3022     }
3023
3024   if (cst_and_fits_in_hwi (cstep_orig)
3025       && cst_and_fits_in_hwi (ustep_orig))
3026     {
3027       ustepi = int_cst_value (ustep_orig);
3028       cstepi = int_cst_value (cstep_orig);
3029
3030       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3031         {
3032           /* TODO maybe consider case when ustep divides cstep and the ratio is
3033              a power of 2 (so that the division is fast to execute)?  We would
3034              need to be much more careful with overflows etc. then.  */
3035           return false;
3036         }
3037
3038       ratio = build_int_cst_type (uutype, ratioi);
3039     }
3040   else
3041     {
3042       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3043       if (!ratio)
3044         return false;
3045
3046       /* Ratioi is only used to detect special cases when the multiplicative
3047          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3048          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3049          to integer_onep/integer_all_onesp, since the former ignores
3050          TREE_OVERFLOW.  */
3051       if (cst_and_fits_in_hwi (ratio))
3052         ratioi = int_cst_value (ratio);
3053       else if (integer_onep (ratio))
3054         ratioi = 1;
3055       else if (integer_all_onesp (ratio))
3056         ratioi = -1;
3057       else
3058         ratioi = 0;
3059     }
3060
3061   /* We may need to shift the value if we are after the increment.  */
3062   if (stmt_after_increment (loop, cand, at))
3063     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3064
3065   /* use = ubase - ratio * cbase + ratio * var.
3066
3067      In general case ubase + ratio * (var - cbase) could be better (one less
3068      multiplication), but often it is possible to eliminate redundant parts
3069      of computations from (ubase - ratio * cbase) term, and if it does not
3070      happen, fold is able to apply the distributive law to obtain this form
3071      anyway.  */
3072
3073   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3074     {
3075       /* Let's compute in trees and just return the result in AFF.  This case
3076          should not be very common, and fold itself is not that bad either,
3077          so making the aff. functions more complicated to handle this case
3078          is not that urgent.  */
3079       if (ratioi == 1)
3080         {
3081           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3082           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3083         }
3084       else if (ratioi == -1)
3085         {
3086           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3087           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3088         }
3089       else
3090         {
3091           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3092           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3093           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3094           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3095         }
3096
3097       aff->type = uutype;
3098       aff->n = 0;
3099       aff->offset = 0;
3100       aff->mask = 0;
3101       aff->rest = expr;
3102       return true;
3103     }
3104
3105   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3106      possible to compute ratioi.  */
3107   gcc_assert (ratioi);
3108
3109   tree_to_aff_combination (ubase, uutype, aff);
3110   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3111   tree_to_aff_combination (expr, uutype, &expr_aff);
3112   aff_combination_scale (&cbase_aff, -ratioi);
3113   aff_combination_scale (&expr_aff, ratioi);
3114   aff_combination_add (aff, &cbase_aff);
3115   aff_combination_add (aff, &expr_aff);
3116
3117   return true;
3118 }
3119
3120 /* Determines the expression by that USE is expressed from induction variable
3121    CAND at statement AT in LOOP.  The computation is unshared.  */
3122
3123 static tree
3124 get_computation_at (struct loop *loop,
3125                     struct iv_use *use, struct iv_cand *cand, tree at)
3126 {
3127   struct affine_tree_combination aff;
3128   tree type = TREE_TYPE (use->iv->base);
3129
3130   if (!get_computation_aff (loop, use, cand, at, &aff))
3131     return NULL_TREE;
3132   unshare_aff_combination (&aff);
3133   return fold_convert (type, aff_combination_to_tree (&aff));
3134 }
3135
3136 /* Determines the expression by that USE is expressed from induction variable
3137    CAND in LOOP.  The computation is unshared.  */
3138
3139 static tree
3140 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3141 {
3142   return get_computation_at (loop, use, cand, use->stmt);
3143 }
3144
3145 /* Returns cost of addition in MODE.  */
3146
3147 static unsigned
3148 add_cost (enum machine_mode mode)
3149 {
3150   static unsigned costs[NUM_MACHINE_MODES];
3151   rtx seq;
3152   unsigned cost;
3153
3154   if (costs[mode])
3155     return costs[mode];
3156
3157   start_sequence ();
3158   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3159                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3160                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3161                  NULL_RTX);
3162   seq = get_insns ();
3163   end_sequence ();
3164
3165   cost = seq_cost (seq);
3166   if (!cost)
3167     cost = 1;
3168
3169   costs[mode] = cost;
3170
3171   if (dump_file && (dump_flags & TDF_DETAILS))
3172     fprintf (dump_file, "Addition in %s costs %d\n",
3173              GET_MODE_NAME (mode), cost);
3174   return cost;
3175 }
3176
3177 /* Entry in a hashtable of already known costs for multiplication.  */
3178 struct mbc_entry
3179 {
3180   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3181   enum machine_mode mode;       /* In mode.  */
3182   unsigned cost;                /* The cost.  */
3183 };
3184
3185 /* Counts hash value for the ENTRY.  */
3186
3187 static hashval_t
3188 mbc_entry_hash (const void *entry)
3189 {
3190   const struct mbc_entry *e = entry;
3191
3192   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3193 }
3194
3195 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3196
3197 static int
3198 mbc_entry_eq (const void *entry1, const void *entry2)
3199 {
3200   const struct mbc_entry *e1 = entry1;
3201   const struct mbc_entry *e2 = entry2;
3202
3203   return (e1->mode == e2->mode
3204           && e1->cst == e2->cst);
3205 }
3206
3207 /* Returns cost of multiplication by constant CST in MODE.  */
3208
3209 unsigned
3210 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3211 {
3212   static htab_t costs;
3213   struct mbc_entry **cached, act;
3214   rtx seq;
3215   unsigned cost;
3216
3217   if (!costs)
3218     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3219
3220   act.mode = mode;
3221   act.cst = cst;
3222   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3223   if (*cached)
3224     return (*cached)->cost;
3225
3226   *cached = xmalloc (sizeof (struct mbc_entry));
3227   (*cached)->mode = mode;
3228   (*cached)->cst = cst;
3229
3230   start_sequence ();
3231   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3232                gen_int_mode (cst, mode), NULL_RTX, 0);
3233   seq = get_insns ();
3234   end_sequence ();
3235
3236   cost = seq_cost (seq);
3237
3238   if (dump_file && (dump_flags & TDF_DETAILS))
3239     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3240              (int) cst, GET_MODE_NAME (mode), cost);
3241
3242   (*cached)->cost = cost;
3243
3244   return cost;
3245 }
3246
3247 /* Returns true if multiplying by RATIO is allowed in address.  */
3248
3249 bool
3250 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3251 {
3252 #define MAX_RATIO 128
3253   static sbitmap valid_mult;
3254
3255   if (!valid_mult)
3256     {
3257       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3258       rtx addr;
3259       HOST_WIDE_INT i;
3260
3261       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3262       sbitmap_zero (valid_mult);
3263       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3264       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3265         {
3266           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3267           if (memory_address_p (Pmode, addr))
3268             SET_BIT (valid_mult, i + MAX_RATIO);
3269         }
3270
3271       if (dump_file && (dump_flags & TDF_DETAILS))
3272         {
3273           fprintf (dump_file, "  allowed multipliers:");
3274           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3275             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3276               fprintf (dump_file, " %d", (int) i);
3277           fprintf (dump_file, "\n");
3278           fprintf (dump_file, "\n");
3279         }
3280     }
3281
3282   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3283     return false;
3284
3285   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3286 }
3287
3288 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3289    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3290    variable is omitted.  The created memory accesses MODE.
3291
3292    TODO -- there must be some better way.  This all is quite crude.  */
3293
3294 static unsigned
3295 get_address_cost (bool symbol_present, bool var_present,
3296                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3297 {
3298   static bool initialized = false;
3299   static HOST_WIDE_INT rat, off;
3300   static HOST_WIDE_INT min_offset, max_offset;
3301   static unsigned costs[2][2][2][2];
3302   unsigned cost, acost;
3303   rtx seq, addr, base;
3304   bool offset_p, ratio_p;
3305   rtx reg1;
3306   HOST_WIDE_INT s_offset;
3307   unsigned HOST_WIDE_INT mask;
3308   unsigned bits;
3309
3310   if (!initialized)
3311     {
3312       HOST_WIDE_INT i;
3313       initialized = true;
3314
3315       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3316
3317       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3318       for (i = 1; i <= 1 << 20; i <<= 1)
3319         {
3320           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3321           if (!memory_address_p (Pmode, addr))
3322             break;
3323         }
3324       max_offset = i >> 1;
3325       off = max_offset;
3326
3327       for (i = 1; i <= 1 << 20; i <<= 1)
3328         {
3329           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3330           if (!memory_address_p (Pmode, addr))
3331             break;
3332         }
3333       min_offset = -(i >> 1);
3334
3335       if (dump_file && (dump_flags & TDF_DETAILS))
3336         {
3337           fprintf (dump_file, "get_address_cost:\n");
3338           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3339           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3340         }
3341
3342       rat = 1;
3343       for (i = 2; i <= MAX_RATIO; i++)
3344         if (multiplier_allowed_in_address_p (i))
3345           {
3346             rat = i;
3347             break;
3348           }
3349     }
3350
3351   bits = GET_MODE_BITSIZE (Pmode);
3352   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3353   offset &= mask;
3354   if ((offset >> (bits - 1) & 1))
3355     offset |= ~mask;
3356   s_offset = offset;
3357
3358   cost = 0;
3359   offset_p = (s_offset != 0
3360               && min_offset <= s_offset && s_offset <= max_offset);
3361   ratio_p = (ratio != 1
3362              && multiplier_allowed_in_address_p (ratio));
3363
3364   if (ratio != 1 && !ratio_p)
3365     cost += multiply_by_cost (ratio, Pmode);
3366
3367   if (s_offset && !offset_p && !symbol_present)
3368     {
3369       cost += add_cost (Pmode);
3370       var_present = true;
3371     }
3372
3373   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3374   if (!acost)
3375     {
3376       acost = 0;
3377
3378       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3379       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3380       if (ratio_p)
3381         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3382
3383       if (var_present)
3384         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3385
3386       if (symbol_present)
3387         {
3388           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3389           if (offset_p)
3390             base = gen_rtx_fmt_e (CONST, Pmode,
3391                                   gen_rtx_fmt_ee (PLUS, Pmode,
3392                                                   base,
3393                                                   gen_int_mode (off, Pmode)));
3394         }
3395       else if (offset_p)
3396         base = gen_int_mode (off, Pmode);
3397       else
3398         base = NULL_RTX;
3399
3400       if (base)
3401         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3402
3403       start_sequence ();
3404       addr = memory_address (Pmode, addr);
3405       seq = get_insns ();
3406       end_sequence ();
3407
3408       acost = seq_cost (seq);
3409       acost += address_cost (addr, Pmode);
3410
3411       if (!acost)
3412         acost = 1;
3413       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3414     }
3415
3416   return cost + acost;
3417 }
3418
3419 /* Estimates cost of forcing expression EXPR into a variable.  */
3420
3421 unsigned
3422 force_expr_to_var_cost (tree expr)
3423 {
3424   static bool costs_initialized = false;
3425   static unsigned integer_cost;
3426   static unsigned symbol_cost;
3427   static unsigned address_cost;
3428   tree op0, op1;
3429   unsigned cost0, cost1, cost;
3430   enum machine_mode mode;
3431
3432   if (!costs_initialized)
3433     {
3434       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3435       rtx x = gen_rtx_MEM (DECL_MODE (var),
3436                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3437       tree addr;
3438       tree type = build_pointer_type (integer_type_node);
3439
3440       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3441                                                            2000));
3442
3443       SET_DECL_RTL (var, x);
3444       TREE_STATIC (var) = 1;
3445       addr = build1 (ADDR_EXPR, type, var);
3446       symbol_cost = computation_cost (addr) + 1;
3447
3448       address_cost
3449         = computation_cost (build2 (PLUS_EXPR, type,
3450                                     addr,
3451                                     build_int_cst_type (type, 2000))) + 1;
3452       if (dump_file && (dump_flags & TDF_DETAILS))
3453         {
3454           fprintf (dump_file, "force_expr_to_var_cost:\n");
3455           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3456           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3457           fprintf (dump_file, "  address %d\n", (int) address_cost);
3458           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3459           fprintf (dump_file, "\n");
3460         }
3461
3462       costs_initialized = true;
3463     }
3464
3465   STRIP_NOPS (expr);
3466
3467   if (SSA_VAR_P (expr))
3468     return 0;
3469
3470   if (TREE_INVARIANT (expr))
3471     {
3472       if (TREE_CODE (expr) == INTEGER_CST)
3473         return integer_cost;
3474
3475       if (TREE_CODE (expr) == ADDR_EXPR)
3476         {
3477           tree obj = TREE_OPERAND (expr, 0);
3478
3479           if (TREE_CODE (obj) == VAR_DECL
3480               || TREE_CODE (obj) == PARM_DECL
3481               || TREE_CODE (obj) == RESULT_DECL)
3482             return symbol_cost;
3483         }
3484
3485       return address_cost;
3486     }
3487
3488   switch (TREE_CODE (expr))
3489     {
3490     case PLUS_EXPR:
3491     case MINUS_EXPR:
3492     case MULT_EXPR:
3493       op0 = TREE_OPERAND (expr, 0);
3494       op1 = TREE_OPERAND (expr, 1);
3495       STRIP_NOPS (op0);
3496       STRIP_NOPS (op1);
3497
3498       if (is_gimple_val (op0))
3499         cost0 = 0;
3500       else
3501         cost0 = force_expr_to_var_cost (op0);
3502
3503       if (is_gimple_val (op1))
3504         cost1 = 0;
3505       else
3506         cost1 = force_expr_to_var_cost (op1);
3507
3508       break;
3509
3510     default:
3511       /* Just an arbitrary value, FIXME.  */
3512       return target_spill_cost;
3513     }
3514
3515   mode = TYPE_MODE (TREE_TYPE (expr));
3516   switch (TREE_CODE (expr))
3517     {
3518     case PLUS_EXPR:
3519     case MINUS_EXPR:
3520       cost = add_cost (mode);
3521       break;
3522
3523     case MULT_EXPR:
3524       if (cst_and_fits_in_hwi (op0))
3525         cost = multiply_by_cost (int_cst_value (op0), mode);
3526       else if (cst_and_fits_in_hwi (op1))
3527         cost = multiply_by_cost (int_cst_value (op1), mode);
3528       else
3529         return target_spill_cost;
3530       break;
3531
3532     default:
3533       gcc_unreachable ();
3534     }
3535
3536   cost += cost0;
3537   cost += cost1;
3538
3539   /* Bound the cost by target_spill_cost.  The parts of complicated
3540      computations often are either loop invariant or at least can
3541      be shared between several iv uses, so letting this grow without
3542      limits would not give reasonable results.  */
3543   return cost < target_spill_cost ? cost : target_spill_cost;
3544 }
3545
3546 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3547    invariants the computation depends on.  */
3548
3549 static unsigned
3550 force_var_cost (struct ivopts_data *data,
3551                 tree expr, bitmap *depends_on)
3552 {
3553   if (depends_on)
3554     {
3555       fd_ivopts_data = data;
3556       walk_tree (&expr, find_depends, depends_on, NULL);
3557     }
3558
3559   return force_expr_to_var_cost (expr);
3560 }
3561
3562 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3563    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3564    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3565    invariants the computation depends on.  */
3566
3567 static unsigned
3568 split_address_cost (struct ivopts_data *data,
3569                     tree addr, bool *symbol_present, bool *var_present,
3570                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3571 {
3572   tree core;
3573   HOST_WIDE_INT bitsize;
3574   HOST_WIDE_INT bitpos;
3575   tree toffset;
3576   enum machine_mode mode;
3577   int unsignedp, volatilep;
3578
3579   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3580                               &unsignedp, &volatilep, false);
3581
3582   if (toffset != 0
3583       || bitpos % BITS_PER_UNIT != 0
3584       || TREE_CODE (core) != VAR_DECL)
3585     {
3586       *symbol_present = false;
3587       *var_present = true;
3588       fd_ivopts_data = data;
3589       walk_tree (&addr, find_depends, depends_on, NULL);
3590       return target_spill_cost;
3591     }
3592
3593   *offset += bitpos / BITS_PER_UNIT;
3594   if (TREE_STATIC (core)
3595       || DECL_EXTERNAL (core))
3596     {
3597       *symbol_present = true;
3598       *var_present = false;
3599       return 0;
3600     }
3601
3602   *symbol_present = false;
3603   *var_present = true;
3604   return 0;
3605 }
3606
3607 /* Estimates cost of expressing difference of addresses E1 - E2 as
3608    var + symbol + offset.  The value of offset is added to OFFSET,
3609    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3610    part is missing.  DEPENDS_ON is a set of the invariants the computation
3611    depends on.  */
3612
3613 static unsigned
3614 ptr_difference_cost (struct ivopts_data *data,
3615                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3616                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3617 {
3618   HOST_WIDE_INT diff = 0;
3619   unsigned cost;
3620
3621   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3622
3623   if (ptr_difference_const (e1, e2, &diff))
3624     {
3625       *offset += diff;
3626       *symbol_present = false;
3627       *var_present = false;
3628       return 0;
3629     }
3630
3631   if (e2 == integer_zero_node)
3632     return split_address_cost (data, TREE_OPERAND (e1, 0),
3633                                symbol_present, var_present, offset, depends_on);
3634
3635   *symbol_present = false;
3636   *var_present = true;
3637
3638   cost = force_var_cost (data, e1, depends_on);
3639   cost += force_var_cost (data, e2, depends_on);
3640   cost += add_cost (Pmode);
3641
3642   return cost;
3643 }
3644
3645 /* Estimates cost of expressing difference E1 - E2 as
3646    var + symbol + offset.  The value of offset is added to OFFSET,
3647    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3648    part is missing.  DEPENDS_ON is a set of the invariants the computation
3649    depends on.  */
3650
3651 static unsigned
3652 difference_cost (struct ivopts_data *data,
3653                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3654                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3655 {
3656   unsigned cost;
3657   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3658   unsigned HOST_WIDE_INT off1, off2;
3659
3660   e1 = strip_offset (e1, &off1);
3661   e2 = strip_offset (e2, &off2);
3662   *offset += off1 - off2;
3663
3664   STRIP_NOPS (e1);
3665   STRIP_NOPS (e2);
3666
3667   if (TREE_CODE (e1) == ADDR_EXPR)
3668     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3669                                 depends_on);
3670   *symbol_present = false;
3671
3672   if (operand_equal_p (e1, e2, 0))
3673     {
3674       *var_present = false;
3675       return 0;
3676     }
3677   *var_present = true;
3678   if (zero_p (e2))
3679     return force_var_cost (data, e1, depends_on);
3680
3681   if (zero_p (e1))
3682     {
3683       cost = force_var_cost (data, e2, depends_on);
3684       cost += multiply_by_cost (-1, mode);
3685
3686       return cost;
3687     }
3688
3689   cost = force_var_cost (data, e1, depends_on);
3690   cost += force_var_cost (data, e2, depends_on);
3691   cost += add_cost (mode);
3692
3693   return cost;
3694 }
3695
3696 /* Determines the cost of the computation by that USE is expressed
3697    from induction variable CAND.  If ADDRESS_P is true, we just need
3698    to create an address from it, otherwise we want to get it into
3699    register.  A set of invariants we depend on is stored in
3700    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3701
3702 static unsigned
3703 get_computation_cost_at (struct ivopts_data *data,
3704                          struct iv_use *use, struct iv_cand *cand,
3705                          bool address_p, bitmap *depends_on, tree at)
3706 {
3707   tree ubase = use->iv->base, ustep = use->iv->step;
3708   tree cbase, cstep;
3709   tree utype = TREE_TYPE (ubase), ctype;
3710   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3711   HOST_WIDE_INT ratio, aratio;
3712   bool var_present, symbol_present;
3713   unsigned cost = 0, n_sums;
3714
3715   *depends_on = NULL;
3716
3717   /* Only consider real candidates.  */
3718   if (!cand->iv)
3719     return INFTY;
3720
3721   cbase = cand->iv->base;
3722   cstep = cand->iv->step;
3723   ctype = TREE_TYPE (cbase);
3724
3725   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3726     {
3727       /* We do not have a precision to express the values of use.  */
3728       return INFTY;
3729     }
3730
3731   if (address_p)
3732     {
3733       /* Do not try to express address of an object with computation based
3734          on address of a different object.  This may cause problems in rtl
3735          level alias analysis (that does not expect this to be happening,
3736          as this is illegal in C), and would be unlikely to be useful
3737          anyway.  */
3738       if (use->iv->base_object
3739           && cand->iv->base_object
3740           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3741         return INFTY;
3742     }
3743
3744   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3745     {
3746       /* TODO -- add direct handling of this case.  */
3747       goto fallback;
3748     }
3749
3750   /* CSTEPI is removed from the offset in case statement is after the
3751      increment.  If the step is not constant, we use zero instead.
3752      This is a bit imprecise (there is the extra addition), but
3753      redundancy elimination is likely to transform the code so that
3754      it uses value of the variable before increment anyway,
3755      so it is not that much unrealistic.  */
3756   if (cst_and_fits_in_hwi (cstep))
3757     cstepi = int_cst_value (cstep);
3758   else
3759     cstepi = 0;
3760
3761   if (cst_and_fits_in_hwi (ustep)
3762       && cst_and_fits_in_hwi (cstep))
3763     {
3764       ustepi = int_cst_value (ustep);
3765
3766       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3767         return INFTY;
3768     }
3769   else
3770     {
3771       tree rat;
3772
3773       rat = constant_multiple_of (utype, ustep, cstep);
3774
3775       if (!rat)
3776         return INFTY;
3777
3778       if (cst_and_fits_in_hwi (rat))
3779         ratio = int_cst_value (rat);
3780       else if (integer_onep (rat))
3781         ratio = 1;
3782       else if (integer_all_onesp (rat))
3783         ratio = -1;
3784       else
3785         return INFTY;
3786     }
3787
3788   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3789      or ratio == 1, it is better to handle this like
3790
3791      ubase - ratio * cbase + ratio * var
3792
3793      (also holds in the case ratio == -1, TODO.  */
3794
3795   if (cst_and_fits_in_hwi (cbase))
3796     {
3797       offset = - ratio * int_cst_value (cbase);
3798       cost += difference_cost (data,
3799                                ubase, integer_zero_node,
3800                                &symbol_present, &var_present, &offset,
3801                                depends_on);
3802     }
3803   else if (ratio == 1)
3804     {
3805       cost += difference_cost (data,
3806                                ubase, cbase,
3807                                &symbol_present, &var_present, &offset,
3808                                depends_on);
3809     }
3810   else
3811     {
3812       cost += force_var_cost (data, cbase, depends_on);
3813       cost += add_cost (TYPE_MODE (ctype));
3814       cost += difference_cost (data,
3815                                ubase, integer_zero_node,
3816                                &symbol_present, &var_present, &offset,
3817                                depends_on);
3818     }
3819
3820   /* If we are after the increment, the value of the candidate is higher by
3821      one iteration.  */
3822   if (stmt_after_increment (data->current_loop, cand, at))
3823     offset -= ratio * cstepi;
3824
3825   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3826      (symbol/var/const parts may be omitted).  If we are looking for an address,
3827      find the cost of addressing this.  */
3828   if (address_p)
3829     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3830
3831   /* Otherwise estimate the costs for computing the expression.  */
3832   aratio = ratio > 0 ? ratio : -ratio;
3833   if (!symbol_present && !var_present && !offset)
3834     {
3835       if (ratio != 1)
3836         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3837
3838       return cost;
3839     }
3840
3841   if (aratio != 1)
3842     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3843
3844   n_sums = 1;
3845   if (var_present
3846       /* Symbol + offset should be compile-time computable.  */
3847       && (symbol_present || offset))
3848     n_sums++;
3849
3850   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3851
3852 fallback:
3853   {
3854     /* Just get the expression, expand it and measure the cost.  */
3855     tree comp = get_computation_at (data->current_loop, use, cand, at);
3856
3857     if (!comp)
3858       return INFTY;
3859
3860     if (address_p)
3861       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3862
3863     return computation_cost (comp);
3864   }
3865 }
3866
3867 /* Determines the cost of the computation by that USE is expressed
3868    from induction variable CAND.  If ADDRESS_P is true, we just need
3869    to create an address from it, otherwise we want to get it into
3870    register.  A set of invariants we depend on is stored in
3871    DEPENDS_ON.  */
3872
3873 static unsigned
3874 get_computation_cost (struct ivopts_data *data,
3875                       struct iv_use *use, struct iv_cand *cand,
3876                       bool address_p, bitmap *depends_on)
3877 {
3878   return get_computation_cost_at (data,
3879                                   use, cand, address_p, depends_on, use->stmt);
3880 }
3881
3882 /* Determines cost of basing replacement of USE on CAND in a generic
3883    expression.  */
3884
3885 static bool
3886 determine_use_iv_cost_generic (struct ivopts_data *data,
3887                                struct iv_use *use, struct iv_cand *cand)
3888 {
3889   bitmap depends_on;
3890   unsigned cost;
3891
3892   /* The simple case first -- if we need to express value of the preserved
3893      original biv, the cost is 0.  This also prevents us from counting the
3894      cost of increment twice -- once at this use and once in the cost of
3895      the candidate.  */
3896   if (cand->pos == IP_ORIGINAL
3897       && cand->incremented_at == use->stmt)
3898     {
3899       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3900       return true;
3901     }
3902
3903   cost = get_computation_cost (data, use, cand, false, &depends_on);
3904   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3905
3906   return cost != INFTY;
3907 }
3908
3909 /* Determines cost of basing replacement of USE on CAND in an address.  */
3910
3911 static bool
3912 determine_use_iv_cost_address (struct ivopts_data *data,
3913                                struct iv_use *use, struct iv_cand *cand)
3914 {
3915   bitmap depends_on;
3916   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3917
3918   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3919
3920   return cost != INFTY;
3921 }
3922
3923 /* Computes value of induction variable IV in iteration NITER.  */
3924
3925 static tree
3926 iv_value (struct iv *iv, tree niter)
3927 {
3928   tree val;
3929   tree type = TREE_TYPE (iv->base);
3930
3931   niter = fold_convert (type, niter);
3932   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3933
3934   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3935 }
3936
3937 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3938
3939 static tree
3940 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3941 {
3942   tree val = iv_value (cand->iv, niter);
3943   tree type = TREE_TYPE (cand->iv->base);
3944
3945   if (stmt_after_increment (loop, cand, at))
3946     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3947
3948   return val;
3949 }
3950
3951 /* Returns period of induction variable iv.  */
3952
3953 static tree
3954 iv_period (struct iv *iv)
3955 {
3956   tree step = iv->step, period, type;
3957   tree pow2div;
3958
3959   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3960
3961   /* Period of the iv is gcd (step, type range).  Since type range is power
3962      of two, it suffices to determine the maximum power of two that divides
3963      step.  */
3964   pow2div = num_ending_zeros (step);
3965   type = unsigned_type_for (TREE_TYPE (step));
3966
3967   period = build_low_bits_mask (type,
3968                                 (TYPE_PRECISION (type)
3969                                  - tree_low_cst (pow2div, 1)));
3970
3971   return period;
3972 }
3973
3974 /* Returns the comparison operator used when eliminating the iv USE.  */
3975
3976 static enum tree_code
3977 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3978 {
3979   struct loop *loop = data->current_loop;
3980   basic_block ex_bb;
3981   edge exit;
3982
3983   ex_bb = bb_for_stmt (use->stmt);
3984   exit = EDGE_SUCC (ex_bb, 0);
3985   if (flow_bb_inside_loop_p (loop, exit->dest))
3986     exit = EDGE_SUCC (ex_bb, 1);
3987
3988   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3989 }
3990
3991 /* Check whether it is possible to express the condition in USE by comparison
3992    of candidate CAND.  If so, store the value compared with to BOUND.  */
3993
3994 static bool
3995 may_eliminate_iv (struct ivopts_data *data,
3996                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3997 {
3998   basic_block ex_bb;
3999   edge exit;
4000   struct tree_niter_desc *niter;
4001   tree nit, nit_type;
4002   tree wider_type, period, per_type;
4003   struct loop *loop = data->current_loop;
4004
4005   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4006     return false;
4007
4008   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4009      for other conditions inside loop body.  */
4010   ex_bb = bb_for_stmt (use->stmt);
4011   if (use->stmt != last_stmt (ex_bb)
4012       || TREE_CODE (use->stmt) != COND_EXPR)
4013     return false;
4014   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4015     return false;
4016
4017   exit = EDGE_SUCC (ex_bb, 0);
4018   if (flow_bb_inside_loop_p (loop, exit->dest))
4019     exit = EDGE_SUCC (ex_bb, 1);
4020   if (flow_bb_inside_loop_p (loop, exit->dest))
4021     return false;
4022
4023   niter = niter_for_exit (data, exit);
4024   if (!niter
4025       || !zero_p (niter->may_be_zero))
4026     return false;
4027
4028   nit = niter->niter;
4029   nit_type = TREE_TYPE (nit);
4030
4031   /* Determine whether we may use the variable to test whether niter iterations
4032      elapsed.  This is the case iff the period of the induction variable is
4033      greater than the number of iterations.  */
4034   period = iv_period (cand->iv);
4035   if (!period)
4036     return false;
4037   per_type = TREE_TYPE (period);
4038
4039   wider_type = TREE_TYPE (period);
4040   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4041     wider_type = per_type;
4042   else
4043     wider_type = nit_type;
4044
4045   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4046                                       fold_convert (wider_type, period),
4047                                       fold_convert (wider_type, nit))))
4048     return false;
4049
4050   *bound = cand_value_at (loop, cand, use->stmt, nit);
4051   return true;
4052 }
4053
4054 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4055
4056 static bool
4057 determine_use_iv_cost_condition (struct ivopts_data *data,
4058                                  struct iv_use *use, struct iv_cand *cand)
4059 {
4060   tree bound = NULL_TREE, op, cond;
4061   bitmap depends_on = NULL;
4062   unsigned cost;
4063
4064   /* Only consider real candidates.  */
4065   if (!cand->iv)
4066     {
4067       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4068       return false;
4069     }
4070
4071   if (may_eliminate_iv (data, use, cand, &bound))
4072     {
4073       cost = force_var_cost (data, bound, &depends_on);
4074
4075       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4076       return cost != INFTY;
4077     }
4078
4079   /* The induction variable elimination failed; just express the original
4080      giv.  If it is compared with an invariant, note that we cannot get
4081      rid of it.  */
4082   cost = get_computation_cost (data, use, cand, false, &depends_on);
4083
4084   cond = *use->op_p;
4085   if (TREE_CODE (cond) != SSA_NAME)
4086     {
4087       op = TREE_OPERAND (cond, 0);
4088       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4089         op = TREE_OPERAND (cond, 1);
4090       if (TREE_CODE (op) == SSA_NAME)
4091         {
4092           op = get_iv (data, op)->base;
4093           fd_ivopts_data = data;
4094           walk_tree (&op, find_depends, &depends_on, NULL);
4095         }
4096     }
4097
4098   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4099   return cost != INFTY;
4100 }
4101
4102 /* Checks whether it is possible to replace the final value of USE by
4103    a direct computation.  If so, the formula is stored to *VALUE.  */
4104
4105 static bool
4106 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4107                          tree *value)
4108 {
4109   struct loop *loop = data->current_loop;
4110   edge exit;
4111   struct tree_niter_desc *niter;
4112
4113   exit = single_dom_exit (loop);
4114   if (!exit)
4115     return false;
4116
4117   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4118                               bb_for_stmt (use->stmt)));
4119
4120   niter = niter_for_single_dom_exit (data);
4121   if (!niter
4122       || !zero_p (niter->may_be_zero))
4123     return false;
4124
4125   *value = iv_value (use->iv, niter->niter);
4126
4127   return true;
4128 }
4129
4130 /* Determines cost of replacing final value of USE using CAND.  */
4131
4132 static bool
4133 determine_use_iv_cost_outer (struct ivopts_data *data,
4134                              struct iv_use *use, struct iv_cand *cand)
4135 {
4136   bitmap depends_on;
4137   unsigned cost;
4138   edge exit;
4139   tree value = NULL_TREE;
4140   struct loop *loop = data->current_loop;
4141
4142   /* The simple case first -- if we need to express value of the preserved
4143      original biv, the cost is 0.  This also prevents us from counting the
4144      cost of increment twice -- once at this use and once in the cost of
4145      the candidate.  */
4146   if (cand->pos == IP_ORIGINAL
4147       && cand->incremented_at == use->stmt)
4148     {
4149       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4150       return true;
4151     }
4152
4153   if (!cand->iv)
4154     {
4155       if (!may_replace_final_value (data, use, &value))
4156         {
4157           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4158           return false;
4159         }
4160
4161       depends_on = NULL;
4162       cost = force_var_cost (data, value, &depends_on);
4163
4164       cost /= AVG_LOOP_NITER (loop);
4165
4166       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4167       return cost != INFTY;
4168     }
4169
4170   exit = single_dom_exit (loop);
4171   if (exit)
4172     {
4173       /* If there is just a single exit, we may use value of the candidate
4174          after we take it to determine the value of use.  */
4175       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4176                                       last_stmt (exit->src));
4177       if (cost != INFTY)
4178         cost /= AVG_LOOP_NITER (loop);
4179     }
4180   else
4181     {
4182       /* Otherwise we just need to compute the iv.  */
4183       cost = get_computation_cost (data, use, cand, false, &depends_on);
4184     }
4185
4186   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4187
4188   return cost != INFTY;
4189 }
4190
4191 /* Determines cost of basing replacement of USE on CAND.  Returns false
4192    if USE cannot be based on CAND.  */
4193
4194 static bool
4195 determine_use_iv_cost (struct ivopts_data *data,
4196                        struct iv_use *use, struct iv_cand *cand)
4197 {
4198   switch (use->type)
4199     {
4200     case USE_NONLINEAR_EXPR:
4201       return determine_use_iv_cost_generic (data, use, cand);
4202
4203     case USE_OUTER:
4204       return determine_use_iv_cost_outer (data, use, cand);
4205
4206     case USE_ADDRESS:
4207       return determine_use_iv_cost_address (data, use, cand);
4208
4209     case USE_COMPARE:
4210       return determine_use_iv_cost_condition (data, use, cand);
4211
4212     default:
4213       gcc_unreachable ();
4214     }
4215 }
4216
4217 /* Determines costs of basing the use of the iv on an iv candidate.  */
4218
4219 static void
4220 determine_use_iv_costs (struct ivopts_data *data)
4221 {
4222   unsigned i, j;
4223   struct iv_use *use;
4224   struct iv_cand *cand;
4225   bitmap to_clear = BITMAP_ALLOC (NULL);
4226
4227   alloc_use_cost_map (data);
4228
4229   for (i = 0; i < n_iv_uses (data); i++)
4230     {
4231       use = iv_use (data, i);
4232
4233       if (data->consider_all_candidates)
4234         {
4235           for (j = 0; j < n_iv_cands (data); j++)
4236             {
4237               cand = iv_cand (data, j);
4238               determine_use_iv_cost (data, use, cand);
4239             }
4240         }
4241       else
4242         {
4243           bitmap_iterator bi;
4244
4245           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4246             {
4247               cand = iv_cand (data, j);
4248               if (!determine_use_iv_cost (data, use, cand))
4249                 bitmap_set_bit (to_clear, j);
4250             }
4251
4252           /* Remove the candidates for that the cost is infinite from
4253              the list of related candidates.  */
4254           bitmap_and_compl_into (use->related_cands, to_clear);
4255           bitmap_clear (to_clear);
4256         }
4257     }
4258
4259   BITMAP_FREE (to_clear);
4260
4261   if (dump_file && (dump_flags & TDF_DETAILS))
4262     {
4263       fprintf (dump_file, "Use-candidate costs:\n");
4264
4265       for (i = 0; i < n_iv_uses (data); i++)
4266         {
4267           use = iv_use (data, i);
4268
4269           fprintf (dump_file, "Use %d:\n", i);
4270           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4271           for (j = 0; j < use->n_map_members; j++)
4272             {
4273               if (!use->cost_map[j].cand
4274                   || use->cost_map[j].cost == INFTY)
4275                 continue;
4276
4277               fprintf (dump_file, "  %d\t%d\t",
4278                        use->cost_map[j].cand->id,
4279                        use->cost_map[j].cost);
4280               if (use->cost_map[j].depends_on)
4281                 bitmap_print (dump_file,
4282                               use->cost_map[j].depends_on, "","");
4283               fprintf (dump_file, "\n");
4284             }
4285
4286           fprintf (dump_file, "\n");
4287         }
4288       fprintf (dump_file, "\n");
4289     }
4290 }
4291
4292 /* Determines cost of the candidate CAND.  */
4293
4294 static void
4295 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4296 {
4297   unsigned cost_base, cost_step;
4298   tree base;
4299
4300   if (!cand->iv)
4301     {
4302       cand->cost = 0;
4303       return;
4304     }
4305
4306   /* There are two costs associated with the candidate -- its increment
4307      and its initialization.  The second is almost negligible for any loop
4308      that rolls enough, so we take it just very little into account.  */
4309
4310   base = cand->iv->base;
4311   cost_base = force_var_cost (data, base, NULL);
4312   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4313
4314   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4315
4316   /* Prefer the original iv unless we may gain something by replacing it;
4317      this is not really relevant for artificial ivs created by other
4318      passes.  */
4319   if (cand->pos == IP_ORIGINAL
4320       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4321     cand->cost--;
4322
4323   /* Prefer not to insert statements into latch unless there are some
4324      already (so that we do not create unnecessary jumps).  */
4325   if (cand->pos == IP_END
4326       && empty_block_p (ip_end_pos (data->current_loop)))
4327     cand->cost++;
4328 }
4329
4330 /* Determines costs of computation of the candidates.  */
4331
4332 static void
4333 determine_iv_costs (struct ivopts_data *data)
4334 {
4335   unsigned i;
4336
4337   if (dump_file && (dump_flags & TDF_DETAILS))
4338     {
4339       fprintf (dump_file, "Candidate costs:\n");
4340       fprintf (dump_file, "  cand\tcost\n");
4341     }
4342
4343   for (i = 0; i < n_iv_cands (data); i++)
4344     {
4345       struct iv_cand *cand = iv_cand (data, i);
4346
4347       determine_iv_cost (data, cand);
4348
4349       if (dump_file && (dump_flags & TDF_DETAILS))
4350         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4351     }
4352
4353 if (dump_file && (dump_flags & TDF_DETAILS))
4354       fprintf (dump_file, "\n");
4355 }
4356
4357 /* Calculates cost for having SIZE induction variables.  */
4358
4359 static unsigned
4360 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4361 {
4362   return global_cost_for_size (size,
4363                                loop_data (data->current_loop)->regs_used,
4364                                n_iv_uses (data));
4365 }
4366
4367 /* For each size of the induction variable set determine the penalty.  */
4368
4369 static void
4370 determine_set_costs (struct ivopts_data *data)
4371 {
4372   unsigned j, n;
4373   tree phi, op;
4374   struct loop *loop = data->current_loop;
4375   bitmap_iterator bi;
4376
4377   /* We use the following model (definitely improvable, especially the
4378      cost function -- TODO):
4379
4380      We estimate the number of registers available (using MD data), name it A.
4381
4382      We estimate the number of registers used by the loop, name it U.  This
4383      number is obtained as the number of loop phi nodes (not counting virtual
4384      registers and bivs) + the number of variables from outside of the loop.
4385
4386      We set a reserve R (free regs that are used for temporary computations,
4387      etc.).  For now the reserve is a constant 3.
4388
4389      Let I be the number of induction variables.
4390
4391      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4392         make a lot of ivs without a reason).
4393      -- if A - R < U + I <= A, the cost is I * PRES_COST
4394      -- if U + I > A, the cost is I * PRES_COST and
4395         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4396
4397   if (dump_file && (dump_flags & TDF_DETAILS))
4398     {
4399       fprintf (dump_file, "Global costs:\n");
4400       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4401       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4402       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4403       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4404     }
4405
4406   n = 0;
4407   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4408     {
4409       op = PHI_RESULT (phi);
4410
4411       if (!is_gimple_reg (op))
4412         continue;
4413
4414       if (get_iv (data, op))
4415         continue;
4416
4417       n++;
4418     }
4419
4420   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4421     {
4422       struct version_info *info = ver_info (data, j);
4423
4424       if (info->inv_id && info->has_nonlin_use)
4425         n++;
4426     }
4427
4428   loop_data (loop)->regs_used = n;
4429   if (dump_file && (dump_flags & TDF_DETAILS))
4430     fprintf (dump_file, "  regs_used %d\n", n);
4431
4432   if (dump_file && (dump_flags & TDF_DETAILS))
4433     {
4434       fprintf (dump_file, "  cost for size:\n");
4435       fprintf (dump_file, "  ivs\tcost\n");
4436       for (j = 0; j <= 2 * target_avail_regs; j++)
4437         fprintf (dump_file, "  %d\t%d\n", j,
4438                  ivopts_global_cost_for_size (data, j));
4439       fprintf (dump_file, "\n");
4440     }
4441 }
4442
4443 /* Returns true if A is a cheaper cost pair than B.  */
4444
4445 static bool
4446 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4447 {
4448   if (!a)
4449     return false;
4450
4451   if (!b)
4452     return true;
4453
4454   if (a->cost < b->cost)
4455     return true;
4456
4457   if (a->cost > b->cost)
4458     return false;
4459
4460   /* In case the costs are the same, prefer the cheaper candidate.  */
4461   if (a->cand->cost < b->cand->cost)
4462     return true;
4463
4464   return false;
4465 }
4466
4467 /* Computes the cost field of IVS structure.  */
4468
4469 static void
4470 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4471 {
4472   unsigned cost = 0;
4473
4474   cost += ivs->cand_use_cost;
4475   cost += ivs->cand_cost;
4476   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4477
4478   ivs->cost = cost;
4479 }
4480
4481 /* Remove invariants in set INVS to set IVS.  */
4482
4483 static void
4484 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4485 {
4486   bitmap_iterator bi;
4487   unsigned iid;
4488
4489   if (!invs)
4490     return;
4491
4492   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4493     {
4494       ivs->n_invariant_uses[iid]--;
4495       if (ivs->n_invariant_uses[iid] == 0)
4496         ivs->n_regs--;
4497     }
4498 }
4499
4500 /* Set USE not to be expressed by any candidate in IVS.  */
4501
4502 static void
4503 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4504                  struct iv_use *use)
4505 {
4506   unsigned uid = use->id, cid;
4507   struct cost_pair *cp;
4508
4509   cp = ivs->cand_for_use[uid];
4510   if (!cp)
4511     return;
4512   cid = cp->cand->id;
4513
4514   ivs->bad_uses++;
4515   ivs->cand_for_use[uid] = NULL;
4516   ivs->n_cand_uses[cid]--;
4517
4518   if (ivs->n_cand_uses[cid] == 0)
4519     {
4520       bitmap_clear_bit (ivs->cands, cid);
4521       /* Do not count the pseudocandidates.  */
4522       if (cp->cand->iv)
4523         ivs->n_regs--;
4524       ivs->n_cands--;
4525       ivs->cand_cost -= cp->cand->cost;
4526
4527       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4528     }
4529
4530   ivs->cand_use_cost -= cp->cost;
4531
4532   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4533   iv_ca_recount_cost (data, ivs);
4534 }
4535
4536 /* Add invariants in set INVS to set IVS.  */
4537
4538 static void
4539 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4540 {
4541   bitmap_iterator bi;
4542   unsigned iid;
4543
4544   if (!invs)
4545     return;
4546
4547   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4548     {
4549       ivs->n_invariant_uses[iid]++;
4550       if (ivs->n_invariant_uses[iid] == 1)
4551         ivs->n_regs++;
4552     }
4553 }
4554
4555 /* Set cost pair for USE in set IVS to CP.  */
4556
4557 static void
4558 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4559               struct iv_use *use, struct cost_pair *cp)
4560 {
4561   unsigned uid = use->id, cid;
4562
4563   if (ivs->cand_for_use[uid] == cp)
4564     return;
4565
4566   if (ivs->cand_for_use[uid])
4567     iv_ca_set_no_cp (data, ivs, use);
4568
4569   if (cp)
4570     {
4571       cid = cp->cand->id;
4572
4573       ivs->bad_uses--;
4574       ivs->cand_for_use[uid] = cp;
4575       ivs->n_cand_uses[cid]++;
4576       if (ivs->n_cand_uses[cid] == 1)
4577         {
4578           bitmap_set_bit (ivs->cands, cid);
4579           /* Do not count the pseudocandidates.  */
4580           if (cp->cand->iv)
4581             ivs->n_regs++;
4582           ivs->n_cands++;
4583           ivs->cand_cost += cp->cand->cost;
4584
4585           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4586         }
4587
4588       ivs->cand_use_cost += cp->cost;
4589       iv_ca_set_add_invariants (ivs, cp->depends_on);
4590       iv_ca_recount_cost (data, ivs);
4591     }
4592 }
4593
4594 /* Extend set IVS by expressing USE by some of the candidates in it
4595    if possible.  */
4596
4597 static void
4598 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4599                struct iv_use *use)
4600 {
4601   struct cost_pair *best_cp = NULL, *cp;
4602   bitmap_iterator bi;
4603   unsigned i;
4604
4605   gcc_assert (ivs->upto >= use->id);
4606
4607   if (ivs->upto == use->id)
4608     {
4609       ivs->upto++;
4610       ivs->bad_uses++;
4611     }
4612
4613   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4614     {
4615       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4616
4617       if (cheaper_cost_pair (cp, best_cp))
4618         best_cp = cp;
4619     }
4620
4621   iv_ca_set_cp (data, ivs, use, best_cp);
4622 }
4623
4624 /* Get cost for assignment IVS.  */
4625
4626 static unsigned
4627 iv_ca_cost (struct iv_ca *ivs)
4628 {
4629   return (ivs->bad_uses ? INFTY : ivs->cost);
4630 }
4631
4632 /* Returns true if all dependences of CP are among invariants in IVS.  */
4633
4634 static bool
4635 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4636 {
4637   unsigned i;
4638   bitmap_iterator bi;
4639
4640   if (!cp->depends_on)
4641     return true;
4642
4643   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4644     {
4645       if (ivs->n_invariant_uses[i] == 0)
4646         return false;
4647     }
4648
4649   return true;
4650 }
4651
4652 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4653    it before NEXT_CHANGE.  */
4654
4655 static struct iv_ca_delta *
4656 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4657                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4658 {
4659   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4660
4661   change->use = use;
4662   change->old_cp = old_cp;
4663   change->new_cp = new_cp;
4664   change->next_change = next_change;
4665
4666   return change;
4667 }
4668
4669 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4670    are rewritten.  */
4671
4672 static struct iv_ca_delta *
4673 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4674 {
4675   struct iv_ca_delta *last;
4676
4677   if (!l2)
4678     return l1;
4679
4680   if (!l1)
4681     return l2;
4682
4683   for (last = l1; last->next_change; last = last->next_change)
4684     continue;
4685   last->next_change = l2;
4686
4687   return l1;
4688 }
4689
4690 /* Returns candidate by that USE is expressed in IVS.  */
4691
4692 static struct cost_pair *
4693 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4694 {
4695   return ivs->cand_for_use[use->id];
4696 }
4697
4698 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4699
4700 static struct iv_ca_delta *
4701 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4702 {
4703   struct iv_ca_delta *act, *next, *prev = NULL;
4704   struct cost_pair *tmp;
4705
4706   for (act = delta; act; act = next)
4707     {
4708       next = act->next_change;
4709       act->next_change = prev;
4710       prev = act;
4711
4712       tmp = act->old_cp;
4713       act->old_cp = act->new_cp;
4714       act->new_cp = tmp;
4715     }
4716
4717   return prev;
4718 }
4719
4720 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4721    reverted instead.  */
4722
4723 static void
4724 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4725                     struct iv_ca_delta *delta, bool forward)
4726 {
4727   struct cost_pair *from, *to;
4728   struct iv_ca_delta *act;
4729
4730   if (!forward)
4731     delta = iv_ca_delta_reverse (delta);
4732
4733   for (act = delta; act; act = act->next_change)
4734     {
4735       from = act->old_cp;
4736       to = act->new_cp;
4737       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4738       iv_ca_set_cp (data, ivs, act->use, to);
4739     }
4740
4741   if (!forward)
4742     iv_ca_delta_reverse (delta);
4743 }
4744
4745 /* Returns true if CAND is used in IVS.  */
4746
4747 static bool
4748 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4749 {
4750   return ivs->n_cand_uses[cand->id] > 0;
4751 }
4752
4753 /* Returns number of induction variable candidates in the set IVS.  */
4754
4755 static unsigned
4756 iv_ca_n_cands (struct iv_ca *ivs)
4757 {
4758   return ivs->n_cands;
4759 }
4760
4761 /* Free the list of changes DELTA.  */
4762
4763 static void
4764 iv_ca_delta_free (struct iv_ca_delta **delta)
4765 {
4766   struct iv_ca_delta *act, *next;
4767
4768   for (act = *delta; act; act = next)
4769     {
4770       next = act->next_change;
4771       free (act);
4772     }
4773
4774   *delta = NULL;
4775 }
4776
4777 /* Allocates new iv candidates assignment.  */
4778
4779 static struct iv_ca *
4780 iv_ca_new (struct ivopts_data *data)
4781 {
4782   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4783
4784   nw->upto = 0;
4785   nw->bad_uses = 0;
4786   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4787   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4788   nw->cands = BITMAP_ALLOC (NULL);
4789   nw->n_cands = 0;
4790   nw->n_regs = 0;
4791   nw->cand_use_cost = 0;
4792   nw->cand_cost = 0;
4793   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4794   nw->cost = 0;
4795
4796   return nw;
4797 }
4798
4799 /* Free memory occupied by the set IVS.  */
4800
4801 static void
4802 iv_ca_free (struct iv_ca **ivs)
4803 {
4804   free ((*ivs)->cand_for_use);
4805   free ((*ivs)->n_cand_uses);
4806   BITMAP_FREE ((*ivs)->cands);
4807   free ((*ivs)->n_invariant_uses);
4808   free (*ivs);
4809   *ivs = NULL;
4810 }
4811
4812 /* Dumps IVS to FILE.  */
4813
4814 static void
4815 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4816 {
4817   const char *pref = "  invariants ";
4818   unsigned i;
4819
4820   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4821   bitmap_print (file, ivs->cands, "  candidates ","\n");
4822
4823   for (i = 1; i <= data->max_inv_id; i++)
4824     if (ivs->n_invariant_uses[i])
4825       {
4826         fprintf (file, "%s%d", pref, i);
4827         pref = ", ";
4828       }
4829   fprintf (file, "\n");
4830 }
4831
4832 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4833    new set, and store differences in DELTA.  Number of induction variables
4834    in the new set is stored to N_IVS.  */
4835
4836 static unsigned
4837 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4838               struct iv_cand *cand, struct iv_ca_delta **delta,
4839               unsigned *n_ivs)
4840 {
4841   unsigned i, cost;
4842   struct iv_use *use;
4843   struct cost_pair *old_cp, *new_cp;
4844
4845   *delta = NULL;
4846   for (i = 0; i < ivs->upto; i++)
4847     {
4848       use = iv_use (data, i);
4849       old_cp = iv_ca_cand_for_use (ivs, use);
4850
4851       if (old_cp
4852           && old_cp->cand == cand)
4853         continue;
4854
4855       new_cp = get_use_iv_cost (data, use, cand);
4856       if (!new_cp)
4857         continue;
4858
4859       if (!iv_ca_has_deps (ivs, new_cp))
4860         continue;
4861
4862       if (!cheaper_cost_pair (new_cp, old_cp))
4863         continue;
4864
4865       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4866     }
4867
4868   iv_ca_delta_commit (data, ivs, *delta, true);
4869   cost = iv_ca_cost (ivs);
4870   if (n_ivs)
4871     *n_ivs = iv_ca_n_cands (ivs);
4872   iv_ca_delta_commit (data, ivs, *delta, false);
4873
4874   return cost;
4875 }
4876
4877 /* Try narrowing set IVS by removing CAND.  Return the cost of
4878    the new set and store the differences in DELTA.  */
4879
4880 static unsigned
4881 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4882               struct iv_cand *cand, struct iv_ca_delta **delta)
4883 {
4884   unsigned i, ci;
4885   struct iv_use *use;
4886   struct cost_pair *old_cp, *new_cp, *cp;
4887   bitmap_iterator bi;
4888   struct iv_cand *cnd;
4889   unsigned cost;
4890
4891   *delta = NULL;
4892   for (i = 0; i < n_iv_uses (data); i++)
4893     {
4894       use = iv_use (data, i);
4895
4896       old_cp = iv_ca_cand_for_use (ivs, use);
4897       if (old_cp->cand != cand)
4898         continue;
4899
4900       new_cp = NULL;
4901
4902       if (data->consider_all_candidates)
4903         {
4904           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4905             {
4906               if (ci == cand->id)
4907                 continue;
4908
4909               cnd = iv_cand (data, ci);
4910
4911               cp = get_use_iv_cost (data, use, cnd);
4912               if (!cp)
4913                 continue;
4914               if (!iv_ca_has_deps (ivs, cp))
4915                 continue;
4916
4917               if (!cheaper_cost_pair (cp, new_cp))
4918                 continue;
4919
4920               new_cp = cp;
4921             }
4922         }
4923       else
4924         {
4925           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4926             {
4927               if (ci == cand->id)
4928                 continue;
4929
4930               cnd = iv_cand (data, ci);
4931
4932               cp = get_use_iv_cost (data, use, cnd);
4933               if (!cp)
4934                 continue;
4935               if (!iv_ca_has_deps (ivs, cp))
4936                 continue;
4937
4938               if (!cheaper_cost_pair (cp, new_cp))
4939                 continue;
4940
4941               new_cp = cp;
4942             }
4943         }
4944
4945       if (!new_cp)
4946         {
4947           iv_ca_delta_free (delta);
4948           return INFTY;
4949         }
4950
4951       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4952     }
4953
4954   iv_ca_delta_commit (data, ivs, *delta, true);
4955   cost = iv_ca_cost (ivs);
4956   iv_ca_delta_commit (data, ivs, *delta, false);
4957
4958   return cost;
4959 }
4960
4961 /* Try optimizing the set of candidates IVS by removing candidates different
4962    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4963    differences in DELTA.  */
4964
4965 static unsigned
4966 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4967              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4968 {
4969   bitmap_iterator bi;
4970   struct iv_ca_delta *act_delta, *best_delta;
4971   unsigned i, best_cost, acost;
4972   struct iv_cand *cand;
4973
4974   best_delta = NULL;
4975   best_cost = iv_ca_cost (ivs);
4976
4977   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4978     {
4979       cand = iv_cand (data, i);
4980
4981       if (cand == except_cand)
4982         continue;
4983
4984       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4985
4986       if (acost < best_cost)
4987         {
4988           best_cost = acost;
4989           iv_ca_delta_free (&best_delta);
4990           best_delta = act_delta;
4991         }
4992       else
4993         iv_ca_delta_free (&act_delta);
4994     }
4995
4996   if (!best_delta)
4997     {
4998       *delta = NULL;
4999       return best_cost;
5000     }
5001
5002   /* Recurse to possibly remove other unnecessary ivs.  */
5003   iv_ca_delta_commit (data, ivs, best_delta, true);
5004   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5005   iv_ca_delta_commit (data, ivs, best_delta, false);
5006   *delta = iv_ca_delta_join (best_delta, *delta);
5007   return best_cost;
5008 }
5009
5010 /* Tries to extend the sets IVS in the best possible way in order
5011    to express the USE.  */
5012
5013 static bool
5014 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5015                   struct iv_use *use)
5016 {
5017   unsigned best_cost, act_cost;
5018   unsigned i;
5019   bitmap_iterator bi;
5020   struct iv_cand *cand;
5021   struct iv_ca_delta *best_delta = NULL, *act_delta;
5022   struct cost_pair *cp;
5023
5024   iv_ca_add_use (data, ivs, use);
5025   best_cost = iv_ca_cost (ivs);
5026
5027   cp = iv_ca_cand_for_use (ivs, use);
5028   if (cp)
5029     {
5030       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5031       iv_ca_set_no_cp (data, ivs, use);
5032     }
5033
5034   /* First try important candidates.  Only if it fails, try the specific ones.
5035      Rationale -- in loops with many variables the best choice often is to use
5036      just one generic biv.  If we added here many ivs specific to the uses,
5037      the optimization algorithm later would be likely to get stuck in a local
5038      minimum, thus causing us to create too many ivs.  The approach from
5039      few ivs to more seems more likely to be successful -- starting from few
5040      ivs, replacing an expensive use by a specific iv should always be a
5041      win.  */
5042   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5043     {
5044       cand = iv_cand (data, i);
5045
5046       if (iv_ca_cand_used_p (ivs, cand))
5047         continue;
5048
5049       cp = get_use_iv_cost (data, use, cand);
5050       if (!cp)
5051         continue;
5052
5053       iv_ca_set_cp (data, ivs, use, cp);
5054       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5055       iv_ca_set_no_cp (data, ivs, use);
5056       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5057
5058       if (act_cost < best_cost)
5059         {
5060           best_cost = act_cost;
5061
5062           iv_ca_delta_free (&best_delta);
5063           best_delta = act_delta;
5064         }
5065       else
5066         iv_ca_delta_free (&act_delta);
5067     }
5068
5069   if (best_cost == INFTY)
5070     {
5071       for (i = 0; i < use->n_map_members; i++)
5072         {
5073           cp = use->cost_map + i;
5074           cand = cp->cand;
5075           if (!cand)
5076             continue;
5077
5078           /* Already tried this.  */
5079           if (cand->important)
5080             continue;
5081
5082           if (iv_ca_cand_used_p (ivs, cand))
5083             continue;
5084
5085           act_delta = NULL;
5086           iv_ca_set_cp (data, ivs, use, cp);
5087           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5088           iv_ca_set_no_cp (data, ivs, use);
5089           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5090                                        cp, act_delta);
5091
5092           if (act_cost < best_cost)
5093             {
5094               best_cost = act_cost;
5095
5096               if (best_delta)
5097                 iv_ca_delta_free (&best_delta);
5098               best_delta = act_delta;
5099             }
5100           else
5101             iv_ca_delta_free (&act_delta);
5102         }
5103     }
5104
5105   iv_ca_delta_commit (data, ivs, best_delta, true);
5106   iv_ca_delta_free (&best_delta);
5107
5108   return (best_cost != INFTY);
5109 }
5110
5111 /* Finds an initial assignment of candidates to uses.  */
5112
5113 static struct iv_ca *
5114 get_initial_solution (struct ivopts_data *data)
5115 {
5116   struct iv_ca *ivs = iv_ca_new (data);
5117   unsigned i;
5118
5119   for (i = 0; i < n_iv_uses (data); i++)
5120     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5121       {
5122         iv_ca_free (&ivs);
5123         return NULL;
5124       }
5125
5126   return ivs;
5127 }
5128
5129 /* Tries to improve set of induction variables IVS.  */
5130
5131 static bool
5132 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5133 {
5134   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5135   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5136   struct iv_cand *cand;
5137
5138   /* Try extending the set of induction variables by one.  */
5139   for (i = 0; i < n_iv_cands (data); i++)
5140     {
5141       cand = iv_cand (data, i);
5142
5143       if (iv_ca_cand_used_p (ivs, cand))
5144         continue;
5145
5146       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5147       if (!act_delta)
5148         continue;
5149
5150       /* If we successfully added the candidate and the set is small enough,
5151          try optimizing it by removing other candidates.  */
5152       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5153         {
5154           iv_ca_delta_commit (data, ivs, act_delta, true);
5155           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5156           iv_ca_delta_commit (data, ivs, act_delta, false);
5157           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5158         }
5159
5160       if (acost < best_cost)
5161         {
5162           best_cost = acost;
5163           iv_ca_delta_free (&best_delta);
5164           best_delta = act_delta;
5165         }
5166       else
5167         iv_ca_delta_free (&act_delta);
5168     }
5169
5170   if (!best_delta)
5171     {
5172       /* Try removing the candidates from the set instead.  */
5173       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5174
5175       /* Nothing more we can do.  */
5176       if (!best_delta)
5177         return false;
5178     }
5179
5180   iv_ca_delta_commit (data, ivs, best_delta, true);
5181   gcc_assert (best_cost == iv_ca_cost (ivs));
5182   iv_ca_delta_free (&best_delta);
5183   return true;
5184 }
5185
5186 /* Attempts to find the optimal set of induction variables.  We do simple
5187    greedy heuristic -- we try to replace at most one candidate in the selected
5188    solution and remove the unused ivs while this improves the cost.  */
5189
5190 static struct iv_ca *
5191 find_optimal_iv_set (struct ivopts_data *data)
5192 {
5193   unsigned i;
5194   struct iv_ca *set;
5195   struct iv_use *use;
5196
5197   /* Get the initial solution.  */
5198   set = get_initial_solution (data);
5199   if (!set)
5200     {
5201       if (dump_file && (dump_flags & TDF_DETAILS))
5202         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5203       return NULL;
5204     }
5205
5206   if (dump_file && (dump_flags & TDF_DETAILS))
5207     {
5208       fprintf (dump_file, "Initial set of candidates:\n");
5209       iv_ca_dump (data, dump_file, set);
5210     }
5211
5212   while (try_improve_iv_set (data, set))
5213     {
5214       if (dump_file && (dump_flags & TDF_DETAILS))
5215         {
5216           fprintf (dump_file, "Improved to:\n");
5217           iv_ca_dump (data, dump_file, set);
5218         }
5219     }
5220
5221   if (dump_file && (dump_flags & TDF_DETAILS))
5222     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5223
5224   for (i = 0; i < n_iv_uses (data); i++)
5225     {
5226       use = iv_use (data, i);
5227       use->selected = iv_ca_cand_for_use (set, use)->cand;
5228     }
5229
5230   return set;
5231 }
5232
5233 /* Creates a new induction variable corresponding to CAND.  */
5234
5235 static void
5236 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5237 {
5238   block_stmt_iterator incr_pos;
5239   tree base;
5240   bool after = false;
5241
5242   if (!cand->iv)
5243     return;
5244
5245   switch (cand->pos)
5246     {
5247     case IP_NORMAL:
5248       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5249       break;
5250
5251     case IP_END:
5252       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5253       after = true;
5254       break;
5255
5256     case IP_ORIGINAL:
5257       /* Mark that the iv is preserved.  */
5258       name_info (data, cand->var_before)->preserve_biv = true;
5259       name_info (data, cand->var_after)->preserve_biv = true;
5260
5261       /* Rewrite the increment so that it uses var_before directly.  */
5262       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5263
5264       return;
5265     }
5266
5267   gimple_add_tmp_var (cand->var_before);
5268   add_referenced_tmp_var (cand->var_before);
5269
5270   base = unshare_expr (cand->iv->base);
5271
5272   create_iv (base, unshare_expr (cand->iv->step),
5273              cand->var_before, data->current_loop,
5274              &incr_pos, after, &cand->var_before, &cand->var_after);
5275 }
5276
5277 /* Creates new induction variables described in SET.  */
5278
5279 static void
5280 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5281 {
5282   unsigned i;
5283   struct iv_cand *cand;
5284   bitmap_iterator bi;
5285
5286   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5287     {
5288       cand = iv_cand (data, i);
5289       create_new_iv (data, cand);
5290     }
5291 }
5292
5293 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5294    is true, remove also the ssa name defined by the statement.  */
5295
5296 static void
5297 remove_statement (tree stmt, bool including_defined_name)
5298 {
5299   if (TREE_CODE (stmt) == PHI_NODE)
5300     {
5301       if (!including_defined_name)
5302         {
5303           /* Prevent the ssa name defined by the statement from being removed.  */
5304           SET_PHI_RESULT (stmt, NULL);
5305         }
5306       remove_phi_node (stmt, NULL_TREE);
5307     }
5308   else
5309     {
5310       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5311
5312       bsi_remove (&bsi);
5313     }
5314 }
5315
5316 /* Rewrites USE (definition of iv used in a nonlinear expression)
5317    using candidate CAND.  */
5318
5319 static void
5320 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5321                             struct iv_use *use, struct iv_cand *cand)
5322 {
5323   tree comp;
5324   tree op, stmts, tgt, ass;
5325   block_stmt_iterator bsi, pbsi;
5326
5327   /* An important special case -- if we are asked to express value of
5328      the original iv by itself, just exit; there is no need to
5329      introduce a new computation (that might also need casting the
5330      variable to unsigned and back).  */
5331   if (cand->pos == IP_ORIGINAL
5332       && TREE_CODE (use->stmt) == MODIFY_EXPR
5333       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5334     {
5335       op = TREE_OPERAND (use->stmt, 1);
5336
5337       /* Be a bit careful.  In case variable is expressed in some
5338          complicated way, rewrite it so that we may get rid of this
5339          complicated expression.  */
5340       if ((TREE_CODE (op) == PLUS_EXPR
5341            || TREE_CODE (op) == MINUS_EXPR)
5342           && TREE_OPERAND (op, 0) == cand->var_before
5343           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5344         return;
5345     }
5346
5347   comp = get_computation (data->current_loop, use, cand);
5348   switch (TREE_CODE (use->stmt))
5349     {
5350     case PHI_NODE:
5351       tgt = PHI_RESULT (use->stmt);
5352
5353       /* If we should keep the biv, do not replace it.  */
5354       if (name_info (data, tgt)->preserve_biv)
5355         return;
5356
5357       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5358       while (!bsi_end_p (pbsi)
5359              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5360         {
5361           bsi = pbsi;
5362           bsi_next (&pbsi);
5363         }
5364       break;
5365
5366     case MODIFY_EXPR:
5367       tgt = TREE_OPERAND (use->stmt, 0);
5368       bsi = bsi_for_stmt (use->stmt);
5369       break;
5370
5371     default:
5372       gcc_unreachable ();
5373     }
5374
5375   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5376
5377   if (TREE_CODE (use->stmt) == PHI_NODE)
5378     {
5379       if (stmts)
5380         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5381       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5382       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5383       remove_statement (use->stmt, false);
5384       SSA_NAME_DEF_STMT (tgt) = ass;
5385     }
5386   else
5387     {
5388       if (stmts)
5389         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5390       TREE_OPERAND (use->stmt, 1) = op;
5391     }
5392 }
5393
5394 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5395    for_each_index.  */
5396
5397 static bool
5398 idx_remove_ssa_names (tree base, tree *idx,
5399                       void *data ATTRIBUTE_UNUSED)
5400 {
5401   tree *op;
5402
5403   if (TREE_CODE (*idx) == SSA_NAME)
5404     *idx = SSA_NAME_VAR (*idx);
5405
5406   if (TREE_CODE (base) == ARRAY_REF)
5407     {
5408       op = &TREE_OPERAND (base, 2);
5409       if (*op
5410           && TREE_CODE (*op) == SSA_NAME)
5411         *op = SSA_NAME_VAR (*op);
5412       op = &TREE_OPERAND (base, 3);
5413       if (*op
5414           && TREE_CODE (*op) == SSA_NAME)
5415         *op = SSA_NAME_VAR (*op);
5416     }
5417
5418   return true;
5419 }
5420
5421 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5422
5423 static tree
5424 unshare_and_remove_ssa_names (tree ref)
5425 {
5426   ref = unshare_expr (ref);
5427   for_each_index (&ref, idx_remove_ssa_names, NULL);
5428
5429   return ref;
5430 }
5431
5432 /* Extract the alias analysis info for the memory reference REF.  There are
5433    several ways how this information may be stored and what precisely is
5434    its semantics depending on the type of the reference, but there always is
5435    somewhere hidden one _DECL node that is used to determine the set of
5436    virtual operands for the reference.  The code below deciphers this jungle
5437    and extracts this single useful piece of information.  */
5438
5439 static tree
5440 get_ref_tag (tree ref)
5441 {
5442   tree var = get_base_address (ref);
5443   tree tag;
5444
5445   if (!var)
5446     return NULL_TREE;
5447
5448   if (TREE_CODE (var) == INDIRECT_REF)
5449     var = TREE_OPERAND (var, 0);
5450   if (TREE_CODE (var) == SSA_NAME)
5451     {
5452       if (SSA_NAME_PTR_INFO (var))
5453         {
5454           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5455           if (tag)
5456             return tag;
5457         }
5458
5459       var = SSA_NAME_VAR (var);
5460     }
5461
5462   if (DECL_P (var))
5463     {
5464       tag = var_ann (var)->type_mem_tag;
5465       if (tag)
5466         return tag;
5467
5468       return var;
5469     }
5470
5471   return NULL_TREE;
5472 }
5473
5474 /* Copies the reference information from OLD_REF to NEW_REF.  */
5475
5476 static void
5477 copy_ref_info (tree new_ref, tree old_ref)
5478 {
5479   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5480     copy_mem_ref_info (new_ref, old_ref);
5481   else
5482     {
5483       TMR_TAG (new_ref) = get_ref_tag (old_ref);
5484       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5485     }
5486 }
5487
5488 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5489
5490 static void
5491 rewrite_use_address (struct ivopts_data *data,
5492                      struct iv_use *use, struct iv_cand *cand)
5493 {
5494   struct affine_tree_combination aff;
5495   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5496   tree ref;
5497
5498   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5499   unshare_aff_combination (&aff);
5500
5501   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5502   copy_ref_info (ref, *use->op_p);
5503   *use->op_p = ref;
5504 }
5505
5506 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5507    candidate CAND.  */
5508
5509 static void
5510 rewrite_use_compare (struct ivopts_data *data,
5511                      struct iv_use *use, struct iv_cand *cand)
5512 {
5513   tree comp;
5514   tree *op_p, cond, op, stmts, bound;
5515   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5516   enum tree_code compare;
5517   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5518
5519   bound = cp->value;
5520   if (bound)
5521     {
5522       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5523       tree var_type = TREE_TYPE (var);
5524
5525       compare = iv_elimination_compare (data, use);
5526       bound = fold_convert (var_type, bound);
5527       op = force_gimple_operand (unshare_expr (bound), &stmts,
5528                                  true, NULL_TREE);
5529
5530       if (stmts)
5531         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5532
5533       *use->op_p = build2 (compare, boolean_type_node, var, op);
5534       update_stmt (use->stmt);
5535       return;
5536     }
5537
5538   /* The induction variable elimination failed; just express the original
5539      giv.  */
5540   comp = get_computation (data->current_loop, use, cand);
5541
5542   cond = *use->op_p;
5543   op_p = &TREE_OPERAND (cond, 0);
5544   if (TREE_CODE (*op_p) != SSA_NAME
5545       || zero_p (get_iv (data, *op_p)->step))
5546     op_p = &TREE_OPERAND (cond, 1);
5547
5548   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5549   if (stmts)
5550     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5551
5552   *op_p = op;
5553 }
5554
5555 /* Ensure that operand *OP_P may be used at the end of EXIT without
5556    violating loop closed ssa form.  */
5557
5558 static void
5559 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5560 {
5561   basic_block def_bb;
5562   struct loop *def_loop;
5563   tree phi, use;
5564
5565   use = USE_FROM_PTR (op_p);
5566   if (TREE_CODE (use) != SSA_NAME)
5567     return;
5568
5569   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5570   if (!def_bb)
5571     return;
5572
5573   def_loop = def_bb->loop_father;
5574   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5575     return;
5576
5577   /* Try finding a phi node that copies the value out of the loop.  */
5578   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5579     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5580       break;
5581
5582   if (!phi)
5583     {
5584       /* Create such a phi node.  */
5585       tree new_name = duplicate_ssa_name (use, NULL);
5586
5587       phi = create_phi_node (new_name, exit->dest);
5588       SSA_NAME_DEF_STMT (new_name) = phi;
5589       add_phi_arg (phi, use, exit);
5590     }
5591
5592   SET_USE (op_p, PHI_RESULT (phi));
5593 }
5594
5595 /* Ensure that operands of STMT may be used at the end of EXIT without
5596    violating loop closed ssa form.  */
5597
5598 static void
5599 protect_loop_closed_ssa_form (edge exit, tree stmt)
5600 {
5601   ssa_op_iter iter;
5602   use_operand_p use_p;
5603
5604   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5605     protect_loop_closed_ssa_form_use (exit, use_p);
5606 }
5607
5608 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5609    so that they are emitted on the correct place, and so that the loop closed
5610    ssa form is preserved.  */
5611
5612 void
5613 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5614 {
5615   tree_stmt_iterator tsi;
5616   block_stmt_iterator bsi;
5617   tree phi, stmt, def, next;
5618
5619   if (!single_pred_p (exit->dest))
5620     split_loop_exit_edge (exit);
5621
5622   /* Ensure there is label in exit->dest, so that we can
5623      insert after it.  */
5624   tree_block_label (exit->dest);
5625   bsi = bsi_after_labels (exit->dest);
5626
5627   if (TREE_CODE (stmts) == STATEMENT_LIST)
5628     {
5629       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5630         {
5631           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5632           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5633         }
5634     }
5635   else
5636     {
5637       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5638       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5639     }
5640
5641   if (!op)
5642     return;
5643
5644   for (phi = phi_nodes (exit->dest); phi; phi = next)
5645     {
5646       next = PHI_CHAIN (phi);
5647
5648       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5649         {
5650           def = PHI_RESULT (phi);
5651           remove_statement (phi, false);
5652           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5653                         def, op);
5654           SSA_NAME_DEF_STMT (def) = stmt;
5655           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5656         }
5657     }
5658 }
5659
5660 /* Rewrites the final value of USE (that is only needed outside of the loop)
5661    using candidate CAND.  */
5662
5663 static void
5664 rewrite_use_outer (struct ivopts_data *data,
5665                    struct iv_use *use, struct iv_cand *cand)
5666 {
5667   edge exit;
5668   tree value, op, stmts, tgt;
5669   tree phi;
5670
5671   switch (TREE_CODE (use->stmt))
5672     {
5673     case PHI_NODE:
5674       tgt = PHI_RESULT (use->stmt);
5675       break;
5676     case MODIFY_EXPR:
5677       tgt = TREE_OPERAND (use->stmt, 0);
5678       break;
5679     default:
5680       gcc_unreachable ();
5681     }
5682
5683   exit = single_dom_exit (data->current_loop);
5684
5685   if (exit)
5686     {
5687       if (!cand->iv)
5688         {
5689           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5690           value = unshare_expr (cp->value);
5691         }
5692       else
5693         value = get_computation_at (data->current_loop,
5694                                     use, cand, last_stmt (exit->src));
5695
5696       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5697
5698       /* If we will preserve the iv anyway and we would need to perform
5699          some computation to replace the final value, do nothing.  */
5700       if (stmts && name_info (data, tgt)->preserve_biv)
5701         return;
5702
5703       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5704         {
5705           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5706
5707           if (USE_FROM_PTR (use_p) == tgt)
5708             SET_USE (use_p, op);
5709         }
5710
5711       if (stmts)
5712         compute_phi_arg_on_exit (exit, stmts, op);
5713
5714       /* Enable removal of the statement.  We cannot remove it directly,
5715          since we may still need the aliasing information attached to the
5716          ssa name defined by it.  */
5717       name_info (data, tgt)->iv->have_use_for = false;
5718       return;
5719     }
5720
5721   /* If the variable is going to be preserved anyway, there is nothing to
5722      do.  */
5723   if (name_info (data, tgt)->preserve_biv)
5724     return;
5725
5726   /* Otherwise we just need to compute the iv.  */
5727   rewrite_use_nonlinear_expr (data, use, cand);
5728 }
5729
5730 /* Rewrites USE using candidate CAND.  */
5731
5732 static void
5733 rewrite_use (struct ivopts_data *data,
5734              struct iv_use *use, struct iv_cand *cand)
5735 {
5736   switch (use->type)
5737     {
5738       case USE_NONLINEAR_EXPR:
5739         rewrite_use_nonlinear_expr (data, use, cand);
5740         break;
5741
5742       case USE_OUTER:
5743         rewrite_use_outer (data, use, cand);
5744         break;
5745
5746       case USE_ADDRESS:
5747         rewrite_use_address (data, use, cand);
5748         break;
5749
5750       case USE_COMPARE:
5751         rewrite_use_compare (data, use, cand);
5752         break;
5753
5754       default:
5755         gcc_unreachable ();
5756     }
5757   update_stmt (use->stmt);
5758 }
5759
5760 /* Rewrite the uses using the selected induction variables.  */
5761
5762 static void
5763 rewrite_uses (struct ivopts_data *data)
5764 {
5765   unsigned i;
5766   struct iv_cand *cand;
5767   struct iv_use *use;
5768
5769   for (i = 0; i < n_iv_uses (data); i++)
5770     {
5771       use = iv_use (data, i);
5772       cand = use->selected;
5773       gcc_assert (cand);
5774
5775       rewrite_use (data, use, cand);
5776     }
5777 }
5778
5779 /* Removes the ivs that are not used after rewriting.  */
5780
5781 static void
5782 remove_unused_ivs (struct ivopts_data *data)
5783 {
5784   unsigned j;
5785   bitmap_iterator bi;
5786
5787   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5788     {
5789       struct version_info *info;
5790
5791       info = ver_info (data, j);
5792       if (info->iv
5793           && !zero_p (info->iv->step)
5794           && !info->inv_id
5795           && !info->iv->have_use_for
5796           && !info->preserve_biv)
5797         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5798     }
5799 }
5800
5801 /* Frees data allocated by the optimization of a single loop.  */
5802
5803 static void
5804 free_loop_data (struct ivopts_data *data)
5805 {
5806   unsigned i, j;
5807   bitmap_iterator bi;
5808   tree obj;
5809
5810   htab_empty (data->niters);
5811
5812   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5813     {
5814       struct version_info *info;
5815
5816       info = ver_info (data, i);
5817       if (info->iv)
5818         free (info->iv);
5819       info->iv = NULL;
5820       info->has_nonlin_use = false;
5821       info->preserve_biv = false;
5822       info->inv_id = 0;
5823     }
5824   bitmap_clear (data->relevant);
5825   bitmap_clear (data->important_candidates);
5826
5827   for (i = 0; i < n_iv_uses (data); i++)
5828     {
5829       struct iv_use *use = iv_use (data, i);
5830
5831       free (use->iv);
5832       BITMAP_FREE (use->related_cands);
5833       for (j = 0; j < use->n_map_members; j++)
5834         if (use->cost_map[j].depends_on)
5835           BITMAP_FREE (use->cost_map[j].depends_on);
5836       free (use->cost_map);
5837       free (use);
5838     }
5839   VEC_truncate (iv_use_p, data->iv_uses, 0);
5840
5841   for (i = 0; i < n_iv_cands (data); i++)
5842     {
5843       struct iv_cand *cand = iv_cand (data, i);
5844
5845       if (cand->iv)
5846         free (cand->iv);
5847       if (cand->depends_on)
5848         BITMAP_FREE (cand->depends_on);
5849       free (cand);
5850     }
5851   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5852
5853   if (data->version_info_size < num_ssa_names)
5854     {
5855       data->version_info_size = 2 * num_ssa_names;
5856       free (data->version_info);
5857       data->version_info = xcalloc (data->version_info_size,
5858                                     sizeof (struct version_info));
5859     }
5860
5861   data->max_inv_id = 0;
5862
5863   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5864     SET_DECL_RTL (obj, NULL_RTX);
5865
5866   VEC_truncate (tree, decl_rtl_to_reset, 0);
5867 }
5868
5869 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5870    loop tree.  */
5871
5872 static void
5873 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5874 {
5875   unsigned i;
5876
5877   for (i = 1; i < loops->num; i++)
5878     if (loops->parray[i])
5879       {
5880         free (loops->parray[i]->aux);
5881         loops->parray[i]->aux = NULL;
5882       }
5883
5884   free_loop_data (data);
5885   free (data->version_info);
5886   BITMAP_FREE (data->relevant);
5887   BITMAP_FREE (data->important_candidates);
5888   htab_delete (data->niters);
5889
5890   VEC_free (tree, heap, decl_rtl_to_reset);
5891   VEC_free (iv_use_p, heap, data->iv_uses);
5892   VEC_free (iv_cand_p, heap, data->iv_candidates);
5893 }
5894
5895 /* Optimizes the LOOP.  Returns true if anything changed.  */
5896
5897 static bool
5898 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5899 {
5900   bool changed = false;
5901   struct iv_ca *iv_ca;
5902   edge exit;
5903
5904   data->current_loop = loop;
5905
5906   if (dump_file && (dump_flags & TDF_DETAILS))
5907     {
5908       fprintf (dump_file, "Processing loop %d\n", loop->num);
5909
5910       exit = single_dom_exit (loop);
5911       if (exit)
5912         {
5913           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5914                    exit->src->index, exit->dest->index);
5915           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5916           fprintf (dump_file, "\n");
5917         }
5918
5919       fprintf (dump_file, "\n");
5920     }
5921
5922   /* For each ssa name determines whether it behaves as an induction variable
5923      in some loop.  */
5924   if (!find_induction_variables (data))
5925     goto finish;
5926
5927   /* Finds interesting uses (item 1).  */
5928   find_interesting_uses (data);
5929   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5930     goto finish;
5931
5932   /* Finds candidates for the induction variables (item 2).  */
5933   find_iv_candidates (data);
5934
5935   /* Calculates the costs (item 3, part 1).  */
5936   determine_use_iv_costs (data);
5937   determine_iv_costs (data);
5938   determine_set_costs (data);
5939
5940   /* Find the optimal set of induction variables (item 3, part 2).  */
5941   iv_ca = find_optimal_iv_set (data);
5942   if (!iv_ca)
5943     goto finish;
5944   changed = true;
5945
5946   /* Create the new induction variables (item 4, part 1).  */
5947   create_new_ivs (data, iv_ca);
5948   iv_ca_free (&iv_ca);
5949
5950   /* Rewrite the uses (item 4, part 2).  */
5951   rewrite_uses (data);
5952
5953   /* Remove the ivs that are unused after rewriting.  */
5954   remove_unused_ivs (data);
5955
5956   /* We have changed the structure of induction variables; it might happen
5957      that definitions in the scev database refer to some of them that were
5958      eliminated.  */
5959   scev_reset ();
5960
5961 finish:
5962   free_loop_data (data);
5963
5964   return changed;
5965 }
5966
5967 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5968
5969 void
5970 tree_ssa_iv_optimize (struct loops *loops)
5971 {
5972   struct loop *loop;
5973   struct ivopts_data data;
5974
5975   tree_ssa_iv_optimize_init (loops, &data);
5976
5977   /* Optimize the loops starting with the innermost ones.  */
5978   loop = loops->tree_root;
5979   while (loop->inner)
5980     loop = loop->inner;
5981
5982   /* Scan the loops, inner ones first.  */
5983   while (loop != loops->tree_root)
5984     {
5985       if (dump_file && (dump_flags & TDF_DETAILS))
5986         flow_loop_dump (loop, dump_file, NULL, 1);
5987
5988       tree_ssa_iv_optimize_loop (&data, loop);
5989
5990       if (loop->next)
5991         {
5992           loop = loop->next;
5993           while (loop->inner)
5994             loop = loop->inner;
5995         }
5996       else
5997         loop = loop->outer;
5998     }
5999
6000   tree_ssa_iv_optimize_finalize (loops, &data);
6001 }