gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 static edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter);
 716       *slot = nfe_desc;
 717     }
 718   else
 719     nfe_desc = *slot;
 720
 721   if (!nfe_desc->valid_p)
 722     return NULL;
 723
 724   return &nfe_desc->niter;
 725 }
 726
 727 /* Returns structure describing number of iterations determined from
 728    single dominating exit of DATA->current_loop, or NULL if something
 729    goes wrong.  */
 730
 731 static struct tree_niter_desc *
 732 niter_for_single_dom_exit (struct ivopts_data *data)
 733 {
 734   edge exit = single_dom_exit (data->current_loop);
 735
 736   if (!exit)
 737     return NULL;
 738
 739   return niter_for_exit (data, exit);
 740 }
 741
 742 /* Initializes data structures used by the iv optimization pass, stored
 743    in DATA.  LOOPS is the loop tree.  */
 744
 745 static void
 746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 747 {
 748   unsigned i;
 749
 750   data->version_info_size = 2 * num_ssa_names;
 751   data->version_info = xcalloc (data->version_info_size,
 752                                 sizeof (struct version_info));
 753   data->relevant = BITMAP_ALLOC (NULL);
 754   data->important_candidates = BITMAP_ALLOC (NULL);
 755   data->max_inv_id = 0;
 756   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 757
 758   for (i = 1; i < loops->num; i++)
 759     if (loops->parray[i])
 760       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 761
 762   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 763   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 764   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 765 }
 766
 767 /* Returns a memory object to that EXPR points.  In case we are able to
 768    determine that it does not point to any such object, NULL is returned.  */
 769
 770 static tree
 771 determine_base_object (tree expr)
 772 {
 773   enum tree_code code = TREE_CODE (expr);
 774   tree base, obj, op0, op1;
 775
 776   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 777     return NULL_TREE;
 778
 779   switch (code)
 780     {
 781     case INTEGER_CST:
 782       return NULL_TREE;
 783
 784     case ADDR_EXPR:
 785       obj = TREE_OPERAND (expr, 0);
 786       base = get_base_address (obj);
 787
 788       if (!base)
 789         return expr;
 790
 791       if (TREE_CODE (base) == INDIRECT_REF)
 792         return determine_base_object (TREE_OPERAND (base, 0));
 793
 794       return fold (build1 (ADDR_EXPR, ptr_type_node, base));
 795
 796     case PLUS_EXPR:
 797     case MINUS_EXPR:
 798       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 799       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 800
 801       if (!op1)
 802         return op0;
 803
 804       if (!op0)
 805         return (code == PLUS_EXPR
 806                 ? op1
 807                 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
 808
 809       return fold (build (code, ptr_type_node, op0, op1));
 810
 811     case NOP_EXPR:
 812     case CONVERT_EXPR:
 813       return determine_base_object (TREE_OPERAND (expr, 0));
 814
 815     default:
 816       return fold_convert (ptr_type_node, expr);
 817     }
 818 }
 819
 820 /* Allocates an induction variable with given initial value BASE and step STEP
 821    for loop LOOP.  */
 822
 823 static struct iv *
 824 alloc_iv (tree base, tree step)
 825 {
 826   struct iv *iv = xcalloc (1, sizeof (struct iv));
 827
 828   if (step && integer_zerop (step))
 829     step = NULL_TREE;
 830
 831   iv->base = base;
 832   iv->base_object = determine_base_object (base);
 833   iv->step = step;
 834   iv->biv_p = false;
 835   iv->have_use_for = false;
 836   iv->use_id = 0;
 837   iv->ssa_name = NULL_TREE;
 838
 839   return iv;
 840 }
 841
 842 /* Sets STEP and BASE for induction variable IV.  */
 843
 844 static void
 845 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 846 {
 847   struct version_info *info = name_info (data, iv);
 848
 849   gcc_assert (!info->iv);
 850
 851   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 852   info->iv = alloc_iv (base, step);
 853   info->iv->ssa_name = iv;
 854 }
 855
 856 /* Finds induction variable declaration for VAR.  */
 857
 858 static struct iv *
 859 get_iv (struct ivopts_data *data, tree var)
 860 {
 861   basic_block bb;
 862
 863   if (!name_info (data, var)->iv)
 864     {
 865       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 866
 867       if (!bb
 868           || !flow_bb_inside_loop_p (data->current_loop, bb))
 869         set_iv (data, var, var, NULL_TREE);
 870     }
 871
 872   return name_info (data, var)->iv;
 873 }
 874
 875 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 876    not define a simple affine biv with nonzero step.  */
 877
 878 static tree
 879 determine_biv_step (tree phi)
 880 {
 881   struct loop *loop = bb_for_stmt (phi)->loop_father;
 882   tree name = PHI_RESULT (phi), base, step;
 883
 884   if (!is_gimple_reg (name))
 885     return NULL_TREE;
 886
 887   if (!simple_iv (loop, phi, name, &base, &step, true))
 888     return NULL_TREE;
 889
 890   if (zero_p (step))
 891     return NULL_TREE;
 892
 893   return step;
 894 }
 895
 896 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 897
 898 static bool
 899 abnormal_ssa_name_p (tree exp)
 900 {
 901   if (!exp)
 902     return false;
 903
 904   if (TREE_CODE (exp) != SSA_NAME)
 905     return false;
 906
 907   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 908 }
 909
 910 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 911    abnormal phi node.  Callback for for_each_index.  */
 912
 913 static bool
 914 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 915                                   void *data ATTRIBUTE_UNUSED)
 916 {
 917   if (TREE_CODE (base) == ARRAY_REF)
 918     {
 919       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 920         return false;
 921       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 922         return false;
 923     }
 924
 925   return !abnormal_ssa_name_p (*index);
 926 }
 927
 928 /* Returns true if EXPR contains a ssa name that occurs in an
 929    abnormal phi node.  */
 930
 931 static bool
 932 contains_abnormal_ssa_name_p (tree expr)
 933 {
 934   enum tree_code code;
 935   enum tree_code_class class;
 936
 937   if (!expr)
 938     return false;
 939
 940   code = TREE_CODE (expr);
 941   class = TREE_CODE_CLASS (code);
 942
 943   if (code == SSA_NAME)
 944     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 945
 946   if (code == INTEGER_CST
 947       || is_gimple_min_invariant (expr))
 948     return false;
 949
 950   if (code == ADDR_EXPR)
 951     return !for_each_index (&TREE_OPERAND (expr, 0),
 952                             idx_contains_abnormal_ssa_name_p,
 953                             NULL);
 954
 955   switch (class)
 956     {
 957     case tcc_binary:
 958     case tcc_comparison:
 959       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 960         return true;
 961
 962       /* Fallthru.  */
 963     case tcc_unary:
 964       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 965         return true;
 966
 967       break;
 968
 969     default:
 970       gcc_unreachable ();
 971     }
 972
 973   return false;
 974 }
 975
 976 /* Finds basic ivs.  */
 977
 978 static bool
 979 find_bivs (struct ivopts_data *data)
 980 {
 981   tree phi, step, type, base;
 982   bool found = false;
 983   struct loop *loop = data->current_loop;
 984
 985   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 986     {
 987       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 988         continue;
 989
 990       step = determine_biv_step (phi);
 991       if (!step)
 992         continue;
 993
 994       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 995       if (contains_abnormal_ssa_name_p (base)
 996           || contains_abnormal_ssa_name_p (step))
 997         continue;
 998
 999       type = TREE_TYPE (PHI_RESULT (phi));
1000       base = fold_convert (type, base);
1001       if (step)
1002         step = fold_convert (type, step);
1003
1004       set_iv (data, PHI_RESULT (phi), base, step);
1005       found = true;
1006     }
1007
1008   return found;
1009 }
1010
1011 /* Marks basic ivs.  */
1012
1013 static void
1014 mark_bivs (struct ivopts_data *data)
1015 {
1016   tree phi, var;
1017   struct iv *iv, *incr_iv;
1018   struct loop *loop = data->current_loop;
1019   basic_block incr_bb;
1020
1021   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1022     {
1023       iv = get_iv (data, PHI_RESULT (phi));
1024       if (!iv)
1025         continue;
1026
1027       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1028       incr_iv = get_iv (data, var);
1029       if (!incr_iv)
1030         continue;
1031
1032       /* If the increment is in the subloop, ignore it.  */
1033       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1034       if (incr_bb->loop_father != data->current_loop
1035           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1036         continue;
1037
1038       iv->biv_p = true;
1039       incr_iv->biv_p = true;
1040     }
1041 }
1042
1043 /* Checks whether STMT defines a linear induction variable and stores its
1044    parameters to BASE and STEP.  */
1045
1046 static bool
1047 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1048                         tree *base, tree *step)
1049 {
1050   tree lhs;
1051   struct loop *loop = data->current_loop;
1052
1053   *base = NULL_TREE;
1054   *step = NULL_TREE;
1055
1056   if (TREE_CODE (stmt) != MODIFY_EXPR)
1057     return false;
1058
1059   lhs = TREE_OPERAND (stmt, 0);
1060   if (TREE_CODE (lhs) != SSA_NAME)
1061     return false;
1062
1063   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1064     return false;
1065
1066   if (contains_abnormal_ssa_name_p (*base)
1067       || contains_abnormal_ssa_name_p (*step))
1068     return false;
1069
1070   return true;
1071 }
1072
1073 /* Finds general ivs in statement STMT.  */
1074
1075 static void
1076 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1077 {
1078   tree base, step;
1079
1080   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1081     return;
1082
1083   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1084 }
1085
1086 /* Finds general ivs in basic block BB.  */
1087
1088 static void
1089 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1090 {
1091   block_stmt_iterator bsi;
1092
1093   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1094     find_givs_in_stmt (data, bsi_stmt (bsi));
1095 }
1096
1097 /* Finds general ivs.  */
1098
1099 static void
1100 find_givs (struct ivopts_data *data)
1101 {
1102   struct loop *loop = data->current_loop;
1103   basic_block *body = get_loop_body_in_dom_order (loop);
1104   unsigned i;
1105
1106   for (i = 0; i < loop->num_nodes; i++)
1107     find_givs_in_bb (data, body[i]);
1108   free (body);
1109 }
1110
1111 /* For each ssa name defined in LOOP determines whether it is an induction
1112    variable and if so, its initial value and step.  */
1113
1114 static bool
1115 find_induction_variables (struct ivopts_data *data)
1116 {
1117   unsigned i;
1118   bitmap_iterator bi;
1119
1120   if (!find_bivs (data))
1121     return false;
1122
1123   find_givs (data);
1124   mark_bivs (data);
1125
1126   if (dump_file && (dump_flags & TDF_DETAILS))
1127     {
1128       struct tree_niter_desc *niter;
1129
1130       niter = niter_for_single_dom_exit (data);
1131
1132       if (niter)
1133         {
1134           fprintf (dump_file, "  number of iterations ");
1135           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1136           fprintf (dump_file, "\n");
1137
1138           fprintf (dump_file, "  may be zero if ");
1139           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1140           fprintf (dump_file, "\n");
1141           fprintf (dump_file, "\n");
1142         };
1143
1144       fprintf (dump_file, "Induction variables:\n\n");
1145
1146       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1147         {
1148           if (ver_info (data, i)->iv)
1149             dump_iv (dump_file, ver_info (data, i)->iv);
1150         }
1151     }
1152
1153   return true;
1154 }
1155
1156 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1157
1158 static struct iv_use *
1159 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1160             tree stmt, enum use_type use_type)
1161 {
1162   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1163
1164   use->id = n_iv_uses (data);
1165   use->type = use_type;
1166   use->iv = iv;
1167   use->stmt = stmt;
1168   use->op_p = use_p;
1169   use->related_cands = BITMAP_ALLOC (NULL);
1170
1171   /* To avoid showing ssa name in the dumps, if it was not reset by the
1172      caller.  */
1173   iv->ssa_name = NULL_TREE;
1174
1175   if (dump_file && (dump_flags & TDF_DETAILS))
1176     dump_use (dump_file, use);
1177
1178   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1179
1180   return use;
1181 }
1182
1183 /* Checks whether OP is a loop-level invariant and if so, records it.
1184    NONLINEAR_USE is true if the invariant is used in a way we do not
1185    handle specially.  */
1186
1187 static void
1188 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1189 {
1190   basic_block bb;
1191   struct version_info *info;
1192
1193   if (TREE_CODE (op) != SSA_NAME
1194       || !is_gimple_reg (op))
1195     return;
1196
1197   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1198   if (bb
1199       && flow_bb_inside_loop_p (data->current_loop, bb))
1200     return;
1201
1202   info = name_info (data, op);
1203   info->name = op;
1204   info->has_nonlin_use |= nonlinear_use;
1205   if (!info->inv_id)
1206     info->inv_id = ++data->max_inv_id;
1207   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1208 }
1209
1210 /* Checks whether the use OP is interesting and if so, records it
1211    as TYPE.  */
1212
1213 static struct iv_use *
1214 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1215                                        enum use_type type)
1216 {
1217   struct iv *iv;
1218   struct iv *civ;
1219   tree stmt;
1220   struct iv_use *use;
1221
1222   if (TREE_CODE (op) != SSA_NAME)
1223     return NULL;
1224
1225   iv = get_iv (data, op);
1226   if (!iv)
1227     return NULL;
1228
1229   if (iv->have_use_for)
1230     {
1231       use = iv_use (data, iv->use_id);
1232
1233       gcc_assert (use->type == USE_NONLINEAR_EXPR
1234                   || use->type == USE_OUTER);
1235
1236       if (type == USE_NONLINEAR_EXPR)
1237         use->type = USE_NONLINEAR_EXPR;
1238       return use;
1239     }
1240
1241   if (zero_p (iv->step))
1242     {
1243       record_invariant (data, op, true);
1244       return NULL;
1245     }
1246   iv->have_use_for = true;
1247
1248   civ = xmalloc (sizeof (struct iv));
1249   *civ = *iv;
1250
1251   stmt = SSA_NAME_DEF_STMT (op);
1252   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1253               || TREE_CODE (stmt) == MODIFY_EXPR);
1254
1255   use = record_use (data, NULL, civ, stmt, type);
1256   iv->use_id = use->id;
1257
1258   return use;
1259 }
1260
1261 /* Checks whether the use OP is interesting and if so, records it.  */
1262
1263 static struct iv_use *
1264 find_interesting_uses_op (struct ivopts_data *data, tree op)
1265 {
1266   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1267 }
1268
1269 /* Records a definition of induction variable OP that is used outside of the
1270    loop.  */
1271
1272 static struct iv_use *
1273 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1274 {
1275   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1276 }
1277
1278 /* Checks whether the condition *COND_P in STMT is interesting
1279    and if so, records it.  */
1280
1281 static void
1282 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1283 {
1284   tree *op0_p;
1285   tree *op1_p;
1286   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1287   struct iv const_iv;
1288   tree zero = integer_zero_node;
1289
1290   const_iv.step = NULL_TREE;
1291
1292   if (TREE_CODE (*cond_p) != SSA_NAME
1293       && !COMPARISON_CLASS_P (*cond_p))
1294     return;
1295
1296   if (TREE_CODE (*cond_p) == SSA_NAME)
1297     {
1298       op0_p = cond_p;
1299       op1_p = &zero;
1300     }
1301   else
1302     {
1303       op0_p = &TREE_OPERAND (*cond_p, 0);
1304       op1_p = &TREE_OPERAND (*cond_p, 1);
1305     }
1306
1307   if (TREE_CODE (*op0_p) == SSA_NAME)
1308     iv0 = get_iv (data, *op0_p);
1309   else
1310     iv0 = &const_iv;
1311
1312   if (TREE_CODE (*op1_p) == SSA_NAME)
1313     iv1 = get_iv (data, *op1_p);
1314   else
1315     iv1 = &const_iv;
1316
1317   if (/* When comparing with non-invariant value, we may not do any senseful
1318          induction variable elimination.  */
1319       (!iv0 || !iv1)
1320       /* Eliminating condition based on two ivs would be nontrivial.
1321          ??? TODO -- it is not really important to handle this case.  */
1322       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1323     {
1324       find_interesting_uses_op (data, *op0_p);
1325       find_interesting_uses_op (data, *op1_p);
1326       return;
1327     }
1328
1329   if (zero_p (iv0->step) && zero_p (iv1->step))
1330     {
1331       /* If both are invariants, this is a work for unswitching.  */
1332       return;
1333     }
1334
1335   civ = xmalloc (sizeof (struct iv));
1336   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1337   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1338 }
1339
1340 /* Returns true if expression EXPR is obviously invariant in LOOP,
1341    i.e. if all its operands are defined outside of the LOOP.  */
1342
1343 bool
1344 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1345 {
1346   basic_block def_bb;
1347   unsigned i, len;
1348
1349   if (is_gimple_min_invariant (expr))
1350     return true;
1351
1352   if (TREE_CODE (expr) == SSA_NAME)
1353     {
1354       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1355       if (def_bb
1356           && flow_bb_inside_loop_p (loop, def_bb))
1357         return false;
1358
1359       return true;
1360     }
1361
1362   if (!EXPR_P (expr))
1363     return false;
1364
1365   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1366   for (i = 0; i < len; i++)
1367     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1368       return false;
1369
1370   return true;
1371 }
1372
1373 /* Cumulates the steps of indices into DATA and replaces their values with the
1374    initial ones.  Returns false when the value of the index cannot be determined.
1375    Callback for for_each_index.  */
1376
1377 struct ifs_ivopts_data
1378 {
1379   struct ivopts_data *ivopts_data;
1380   tree stmt;
1381   tree *step_p;
1382 };
1383
1384 static bool
1385 idx_find_step (tree base, tree *idx, void *data)
1386 {
1387   struct ifs_ivopts_data *dta = data;
1388   struct iv *iv;
1389   tree step, type, iv_type, iv_step, lbound, off;
1390   struct loop *loop = dta->ivopts_data->current_loop;
1391
1392   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1393       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1394     return false;
1395
1396   /* If base is a component ref, require that the offset of the reference
1397      be invariant.  */
1398   if (TREE_CODE (base) == COMPONENT_REF)
1399     {
1400       off = component_ref_field_offset (base);
1401       return expr_invariant_in_loop_p (loop, off);
1402     }
1403
1404   /* If base is array, first check whether we will be able to move the
1405      reference out of the loop (in order to take its address in strength
1406      reduction).  In order for this to work we need both lower bound
1407      and step to be loop invariants.  */
1408   if (TREE_CODE (base) == ARRAY_REF)
1409     {
1410       step = array_ref_element_size (base);
1411       lbound = array_ref_low_bound (base);
1412
1413       if (!expr_invariant_in_loop_p (loop, step)
1414           || !expr_invariant_in_loop_p (loop, lbound))
1415         return false;
1416     }
1417
1418   if (TREE_CODE (*idx) != SSA_NAME)
1419     return true;
1420
1421   iv = get_iv (dta->ivopts_data, *idx);
1422   if (!iv)
1423     return false;
1424
1425   *idx = iv->base;
1426
1427   if (!iv->step)
1428     return true;
1429
1430   iv_type = TREE_TYPE (iv->base);
1431   type = build_pointer_type (TREE_TYPE (base));
1432   if (TREE_CODE (base) == ARRAY_REF)
1433     {
1434       step = array_ref_element_size (base);
1435
1436       /* We only handle addresses whose step is an integer constant.  */
1437       if (TREE_CODE (step) != INTEGER_CST)
1438         return false;
1439     }
1440   else
1441     /* The step for pointer arithmetics already is 1 byte.  */
1442     step = build_int_cst (type, 1);
1443
1444   if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1445     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1446                                           type, iv->base, iv->step, dta->stmt);
1447   else
1448     iv_step = fold_convert (iv_type, iv->step);
1449
1450   if (!iv_step)
1451     {
1452       /* The index might wrap.  */
1453       return false;
1454     }
1455
1456   step = fold_build2 (MULT_EXPR, type, step, iv_step);
1457
1458   if (!*dta->step_p)
1459     *dta->step_p = step;
1460   else
1461     *dta->step_p = fold_build2 (PLUS_EXPR, type, *dta->step_p, step);
1462
1463   return true;
1464 }
1465
1466 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1467    object is passed to it in DATA.  */
1468
1469 static bool
1470 idx_record_use (tree base, tree *idx,
1471                 void *data)
1472 {
1473   find_interesting_uses_op (data, *idx);
1474   if (TREE_CODE (base) == ARRAY_REF)
1475     {
1476       find_interesting_uses_op (data, array_ref_element_size (base));
1477       find_interesting_uses_op (data, array_ref_low_bound (base));
1478     }
1479   return true;
1480 }
1481
1482 /* Returns true if memory reference REF may be unaligned.  */
1483
1484 static bool
1485 may_be_unaligned_p (tree ref)
1486 {
1487   tree base;
1488   tree base_type;
1489   HOST_WIDE_INT bitsize;
1490   HOST_WIDE_INT bitpos;
1491   tree toffset;
1492   enum machine_mode mode;
1493   int unsignedp, volatilep;
1494   unsigned base_align;
1495
1496   /* The test below is basically copy of what expr.c:normal_inner_ref
1497      does to check whether the object must be loaded by parts when
1498      STRICT_ALIGNMENT is true.  */
1499   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1500                               &unsignedp, &volatilep, true);
1501   base_type = TREE_TYPE (base);
1502   base_align = TYPE_ALIGN (base_type);
1503
1504   if (mode != BLKmode
1505       && (base_align < GET_MODE_ALIGNMENT (mode)
1506           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1507           || bitpos % BITS_PER_UNIT != 0))
1508     return true;
1509
1510   return false;
1511 }
1512
1513 /* Builds ADDR_EXPR of object OBJ.  If OBJ is an INDIRECT_REF, the indirect_ref
1514    is stripped instead.  */
1515
1516 static tree
1517 build_addr_strip_iref (tree obj)
1518 {
1519   tree type;
1520
1521   if (TREE_CODE (obj) == INDIRECT_REF)
1522     {
1523       type = build_pointer_type (TREE_TYPE (obj));
1524       obj = fold_convert (type, TREE_OPERAND (obj, 0));
1525     }
1526   else
1527     obj = build_addr (obj);
1528
1529   return obj;
1530 }
1531
1532 /* Finds addresses in *OP_P inside STMT.  */
1533
1534 static void
1535 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1536 {
1537   tree base = unshare_expr (*op_p), step = NULL;
1538   struct iv *civ;
1539   struct ifs_ivopts_data ifs_ivopts_data;
1540
1541   /* Do not play with volatile memory references.  A bit too conservative,
1542      perhaps, but safe.  */
1543   if (stmt_ann (stmt)->has_volatile_ops)
1544     goto fail;
1545
1546   /* Ignore bitfields for now.  Not really something terribly complicated
1547      to handle.  TODO.  */
1548   if (TREE_CODE (base) == COMPONENT_REF
1549       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1550     goto fail;
1551
1552   if (STRICT_ALIGNMENT
1553       && may_be_unaligned_p (base))
1554     goto fail;
1555
1556   ifs_ivopts_data.ivopts_data = data;
1557   ifs_ivopts_data.stmt = stmt;
1558   ifs_ivopts_data.step_p = &step;
1559   if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1560       || zero_p (step))
1561     goto fail;
1562
1563   gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1564   gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1565
1566   base = build_addr_strip_iref (base);
1567
1568   civ = alloc_iv (base, step);
1569   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1570   return;
1571
1572 fail:
1573   for_each_index (op_p, idx_record_use, data);
1574 }
1575
1576 /* Finds and records invariants used in STMT.  */
1577
1578 static void
1579 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1580 {
1581   ssa_op_iter iter;
1582   use_operand_p use_p;
1583   tree op;
1584
1585   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1586     {
1587       op = USE_FROM_PTR (use_p);
1588       record_invariant (data, op, false);
1589     }
1590 }
1591
1592 /* Finds interesting uses of induction variables in the statement STMT.  */
1593
1594 static void
1595 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1596 {
1597   struct iv *iv;
1598   tree op, lhs, rhs;
1599   ssa_op_iter iter;
1600   use_operand_p use_p;
1601
1602   find_invariants_stmt (data, stmt);
1603
1604   if (TREE_CODE (stmt) == COND_EXPR)
1605     {
1606       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1607       return;
1608     }
1609
1610   if (TREE_CODE (stmt) == MODIFY_EXPR)
1611     {
1612       lhs = TREE_OPERAND (stmt, 0);
1613       rhs = TREE_OPERAND (stmt, 1);
1614
1615       if (TREE_CODE (lhs) == SSA_NAME)
1616         {
1617           /* If the statement defines an induction variable, the uses are not
1618              interesting by themselves.  */
1619
1620           iv = get_iv (data, lhs);
1621
1622           if (iv && !zero_p (iv->step))
1623             return;
1624         }
1625
1626       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1627         {
1628         case tcc_comparison:
1629           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1630           return;
1631
1632         case tcc_reference:
1633           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1634           if (REFERENCE_CLASS_P (lhs))
1635             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1636           return;
1637
1638         default: ;
1639         }
1640
1641       if (REFERENCE_CLASS_P (lhs)
1642           && is_gimple_val (rhs))
1643         {
1644           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1645           find_interesting_uses_op (data, rhs);
1646           return;
1647         }
1648
1649       /* TODO -- we should also handle address uses of type
1650
1651          memory = call (whatever);
1652
1653          and
1654
1655          call (memory).  */
1656     }
1657
1658   if (TREE_CODE (stmt) == PHI_NODE
1659       && bb_for_stmt (stmt) == data->current_loop->header)
1660     {
1661       lhs = PHI_RESULT (stmt);
1662       iv = get_iv (data, lhs);
1663
1664       if (iv && !zero_p (iv->step))
1665         return;
1666     }
1667
1668   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1669     {
1670       op = USE_FROM_PTR (use_p);
1671
1672       if (TREE_CODE (op) != SSA_NAME)
1673         continue;
1674
1675       iv = get_iv (data, op);
1676       if (!iv)
1677         continue;
1678
1679       find_interesting_uses_op (data, op);
1680     }
1681 }
1682
1683 /* Finds interesting uses of induction variables outside of loops
1684    on loop exit edge EXIT.  */
1685
1686 static void
1687 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1688 {
1689   tree phi, def;
1690
1691   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1692     {
1693       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1694       find_interesting_uses_outer (data, def);
1695     }
1696 }
1697
1698 /* Finds uses of the induction variables that are interesting.  */
1699
1700 static void
1701 find_interesting_uses (struct ivopts_data *data)
1702 {
1703   basic_block bb;
1704   block_stmt_iterator bsi;
1705   tree phi;
1706   basic_block *body = get_loop_body (data->current_loop);
1707   unsigned i;
1708   struct version_info *info;
1709   edge e;
1710
1711   if (dump_file && (dump_flags & TDF_DETAILS))
1712     fprintf (dump_file, "Uses:\n\n");
1713
1714   for (i = 0; i < data->current_loop->num_nodes; i++)
1715     {
1716       edge_iterator ei;
1717       bb = body[i];
1718
1719       FOR_EACH_EDGE (e, ei, bb->succs)
1720         if (e->dest != EXIT_BLOCK_PTR
1721             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1722           find_interesting_uses_outside (data, e);
1723
1724       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1725         find_interesting_uses_stmt (data, phi);
1726       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1727         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1728     }
1729
1730   if (dump_file && (dump_flags & TDF_DETAILS))
1731     {
1732       bitmap_iterator bi;
1733
1734       fprintf (dump_file, "\n");
1735
1736       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1737         {
1738           info = ver_info (data, i);
1739           if (info->inv_id)
1740             {
1741               fprintf (dump_file, "  ");
1742               print_generic_expr (dump_file, info->name, TDF_SLIM);
1743               fprintf (dump_file, " is invariant (%d)%s\n",
1744                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1745             }
1746         }
1747
1748       fprintf (dump_file, "\n");
1749     }
1750
1751   free (body);
1752 }
1753
1754 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1755    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1756    we are at the top-level of the processed address.  */
1757
1758 static tree
1759 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1760                 unsigned HOST_WIDE_INT *offset)
1761 {
1762   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1763   enum tree_code code;
1764   tree type, orig_type = TREE_TYPE (expr);
1765   unsigned HOST_WIDE_INT off0, off1, st;
1766   tree orig_expr = expr;
1767
1768   STRIP_NOPS (expr);
1769
1770   type = TREE_TYPE (expr);
1771   code = TREE_CODE (expr);
1772   *offset = 0;
1773
1774   switch (code)
1775     {
1776     case INTEGER_CST:
1777       if (!cst_and_fits_in_hwi (expr)
1778           || zero_p (expr))
1779         return orig_expr;
1780
1781       *offset = int_cst_value (expr);
1782       return build_int_cst_type (orig_type, 0);
1783
1784     case PLUS_EXPR:
1785     case MINUS_EXPR:
1786       op0 = TREE_OPERAND (expr, 0);
1787       op1 = TREE_OPERAND (expr, 1);
1788
1789       op0 = strip_offset_1 (op0, false, false, &off0);
1790       op1 = strip_offset_1 (op1, false, false, &off1);
1791
1792       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1793       if (op0 == TREE_OPERAND (expr, 0)
1794           && op1 == TREE_OPERAND (expr, 1))
1795         return orig_expr;
1796
1797       if (zero_p (op1))
1798         expr = op0;
1799       else if (zero_p (op0))
1800         {
1801           if (code == PLUS_EXPR)
1802             expr = op1;
1803           else
1804             expr = fold_build1 (NEGATE_EXPR, type, op1);
1805         }
1806       else
1807         expr = fold_build2 (code, type, op0, op1);
1808
1809       return fold_convert (orig_type, expr);
1810
1811     case ARRAY_REF:
1812       if (!inside_addr)
1813         return orig_expr;
1814
1815       step = array_ref_element_size (expr);
1816       if (!cst_and_fits_in_hwi (step))
1817         break;
1818
1819       st = int_cst_value (step);
1820       op1 = TREE_OPERAND (expr, 1);
1821       op1 = strip_offset_1 (op1, false, false, &off1);
1822       *offset = off1 * st;
1823
1824       if (top_compref
1825           && zero_p (op1))
1826         {
1827           /* Strip the component reference completely.  */
1828           op0 = TREE_OPERAND (expr, 0);
1829           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1830           *offset += off0;
1831           return op0;
1832         }
1833       break;
1834
1835     case COMPONENT_REF:
1836       if (!inside_addr)
1837         return orig_expr;
1838
1839       tmp = component_ref_field_offset (expr);
1840       if (top_compref
1841           && cst_and_fits_in_hwi (tmp))
1842         {
1843           /* Strip the component reference completely.  */
1844           op0 = TREE_OPERAND (expr, 0);
1845           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1846           *offset = off0 + int_cst_value (tmp);
1847           return op0;
1848         }
1849       break;
1850
1851     case ADDR_EXPR:
1852       op0 = TREE_OPERAND (expr, 0);
1853       op0 = strip_offset_1 (op0, true, true, &off0);
1854       *offset += off0;
1855
1856       if (op0 == TREE_OPERAND (expr, 0))
1857         return orig_expr;
1858
1859       expr = build_addr_strip_iref (op0);
1860       return fold_convert (orig_type, expr);
1861
1862     case INDIRECT_REF:
1863       inside_addr = false;
1864       break;
1865
1866     default:
1867       return orig_expr;
1868     }
1869
1870   /* Default handling of expressions for that we want to recurse into
1871      the first operand.  */
1872   op0 = TREE_OPERAND (expr, 0);
1873   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1874   *offset += off0;
1875
1876   if (op0 == TREE_OPERAND (expr, 0)
1877       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1878     return orig_expr;
1879
1880   expr = copy_node (expr);
1881   TREE_OPERAND (expr, 0) = op0;
1882   if (op1)
1883     TREE_OPERAND (expr, 1) = op1;
1884
1885   /* Inside address, we might strip the top level component references,
1886      thus changing type of the expresion.  Handling of ADDR_EXPR
1887      will fix that.  */
1888   expr = fold_convert (orig_type, expr);
1889
1890   return expr;
1891 }
1892
1893 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1894
1895 static tree
1896 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1897 {
1898   return strip_offset_1 (expr, false, false, offset);
1899 }
1900
1901 /* Returns variant of TYPE that can be used as base for different uses.
1902    For integer types, we return unsigned variant of the type, which
1903    avoids problems with overflows.  For pointer types, we return void *.  */
1904
1905 static tree
1906 generic_type_for (tree type)
1907 {
1908   if (POINTER_TYPE_P (type))
1909     return ptr_type_node;
1910
1911   if (TYPE_UNSIGNED (type))
1912     return type;
1913
1914   return unsigned_type_for (type);
1915 }
1916
1917 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1918    the bitmap to that we should store it.  */
1919
1920 static struct ivopts_data *fd_ivopts_data;
1921 static tree
1922 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1923 {
1924   bitmap *depends_on = data;
1925   struct version_info *info;
1926
1927   if (TREE_CODE (*expr_p) != SSA_NAME)
1928     return NULL_TREE;
1929   info = name_info (fd_ivopts_data, *expr_p);
1930
1931   if (!info->inv_id || info->has_nonlin_use)
1932     return NULL_TREE;
1933
1934   if (!*depends_on)
1935     *depends_on = BITMAP_ALLOC (NULL);
1936   bitmap_set_bit (*depends_on, info->inv_id);
1937
1938   return NULL_TREE;
1939 }
1940
1941 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1942    position to POS.  If USE is not NULL, the candidate is set as related to
1943    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1944    replacement of the final value of the iv by a direct computation.  */
1945
1946 static struct iv_cand *
1947 add_candidate_1 (struct ivopts_data *data,
1948                  tree base, tree step, bool important, enum iv_position pos,
1949                  struct iv_use *use, tree incremented_at)
1950 {
1951   unsigned i;
1952   struct iv_cand *cand = NULL;
1953   tree type, orig_type;
1954
1955   if (base)
1956     {
1957       orig_type = TREE_TYPE (base);
1958       type = generic_type_for (orig_type);
1959       if (type != orig_type)
1960         {
1961           base = fold_convert (type, base);
1962           if (step)
1963             step = fold_convert (type, step);
1964         }
1965     }
1966
1967   for (i = 0; i < n_iv_cands (data); i++)
1968     {
1969       cand = iv_cand (data, i);
1970
1971       if (cand->pos != pos)
1972         continue;
1973
1974       if (cand->incremented_at != incremented_at)
1975         continue;
1976
1977       if (!cand->iv)
1978         {
1979           if (!base && !step)
1980             break;
1981
1982           continue;
1983         }
1984
1985       if (!base && !step)
1986         continue;
1987
1988       if (!operand_equal_p (base, cand->iv->base, 0))
1989         continue;
1990
1991       if (zero_p (cand->iv->step))
1992         {
1993           if (zero_p (step))
1994             break;
1995         }
1996       else
1997         {
1998           if (step && operand_equal_p (step, cand->iv->step, 0))
1999             break;
2000         }
2001     }
2002
2003   if (i == n_iv_cands (data))
2004     {
2005       cand = xcalloc (1, sizeof (struct iv_cand));
2006       cand->id = i;
2007
2008       if (!base && !step)
2009         cand->iv = NULL;
2010       else
2011         cand->iv = alloc_iv (base, step);
2012
2013       cand->pos = pos;
2014       if (pos != IP_ORIGINAL && cand->iv)
2015         {
2016           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2017           cand->var_after = cand->var_before;
2018         }
2019       cand->important = important;
2020       cand->incremented_at = incremented_at;
2021       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2022
2023       if (step
2024           && TREE_CODE (step) != INTEGER_CST)
2025         {
2026           fd_ivopts_data = data;
2027           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2028         }
2029
2030       if (dump_file && (dump_flags & TDF_DETAILS))
2031         dump_cand (dump_file, cand);
2032     }
2033
2034   if (important && !cand->important)
2035     {
2036       cand->important = true;
2037       if (dump_file && (dump_flags & TDF_DETAILS))
2038         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2039     }
2040
2041   if (use)
2042     {
2043       bitmap_set_bit (use->related_cands, i);
2044       if (dump_file && (dump_flags & TDF_DETAILS))
2045         fprintf (dump_file, "Candidate %d is related to use %d\n",
2046                  cand->id, use->id);
2047     }
2048
2049   return cand;
2050 }
2051
2052 /* Returns true if incrementing the induction variable at the end of the LOOP
2053    is allowed.
2054
2055    The purpose is to avoid splitting latch edge with a biv increment, thus
2056    creating a jump, possibly confusing other optimization passes and leaving
2057    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2058    is not available (so we do not have a better alternative), or if the latch
2059    edge is already nonempty.  */
2060
2061 static bool
2062 allow_ip_end_pos_p (struct loop *loop)
2063 {
2064   if (!ip_normal_pos (loop))
2065     return true;
2066
2067   if (!empty_block_p (ip_end_pos (loop)))
2068     return true;
2069
2070   return false;
2071 }
2072
2073 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2074    position to POS.  If USE is not NULL, the candidate is set as related to
2075    it.  The candidate computation is scheduled on all available positions.  */
2076
2077 static void
2078 add_candidate (struct ivopts_data *data,
2079                tree base, tree step, bool important, struct iv_use *use)
2080 {
2081   if (ip_normal_pos (data->current_loop))
2082     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2083   if (ip_end_pos (data->current_loop)
2084       && allow_ip_end_pos_p (data->current_loop))
2085     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2086 }
2087
2088 /* Add a standard "0 + 1 * iteration" iv candidate for a
2089    type with SIZE bits.  */
2090
2091 static void
2092 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2093                                      unsigned int size)
2094 {
2095   tree type = lang_hooks.types.type_for_size (size, true);
2096   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2097                  true, NULL);
2098 }
2099
2100 /* Adds standard iv candidates.  */
2101
2102 static void
2103 add_standard_iv_candidates (struct ivopts_data *data)
2104 {
2105   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2106
2107   /* The same for a double-integer type if it is still fast enough.  */
2108   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2109     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2110 }
2111
2112
2113 /* Adds candidates bases on the old induction variable IV.  */
2114
2115 static void
2116 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2117 {
2118   tree phi, def;
2119   struct iv_cand *cand;
2120
2121   add_candidate (data, iv->base, iv->step, true, NULL);
2122
2123   /* The same, but with initial value zero.  */
2124   add_candidate (data,
2125                  build_int_cst (TREE_TYPE (iv->base), 0),
2126                  iv->step, true, NULL);
2127
2128   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2129   if (TREE_CODE (phi) == PHI_NODE)
2130     {
2131       /* Additionally record the possibility of leaving the original iv
2132          untouched.  */
2133       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2134       cand = add_candidate_1 (data,
2135                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2136                               SSA_NAME_DEF_STMT (def));
2137       cand->var_before = iv->ssa_name;
2138       cand->var_after = def;
2139     }
2140 }
2141
2142 /* Adds candidates based on the old induction variables.  */
2143
2144 static void
2145 add_old_ivs_candidates (struct ivopts_data *data)
2146 {
2147   unsigned i;
2148   struct iv *iv;
2149   bitmap_iterator bi;
2150
2151   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2152     {
2153       iv = ver_info (data, i)->iv;
2154       if (iv && iv->biv_p && !zero_p (iv->step))
2155         add_old_iv_candidates (data, iv);
2156     }
2157 }
2158
2159 /* Adds candidates based on the value of the induction variable IV and USE.  */
2160
2161 static void
2162 add_iv_value_candidates (struct ivopts_data *data,
2163                          struct iv *iv, struct iv_use *use)
2164 {
2165   unsigned HOST_WIDE_INT offset;
2166   tree base;
2167
2168   add_candidate (data, iv->base, iv->step, false, use);
2169
2170   /* The same, but with initial value zero.  Make such variable important,
2171      since it is generic enough so that possibly many uses may be based
2172      on it.  */
2173   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2174                  iv->step, true, use);
2175
2176   /* Third, try removing the constant offset.  */
2177   base = strip_offset (iv->base, &offset);
2178   if (offset)
2179     add_candidate (data, base, iv->step, false, use);
2180 }
2181
2182 /* Possibly adds pseudocandidate for replacing the final value of USE by
2183    a direct computation.  */
2184
2185 static void
2186 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2187 {
2188   struct tree_niter_desc *niter;
2189
2190   /* We must know where we exit the loop and how many times does it roll.  */
2191   niter = niter_for_single_dom_exit (data);
2192   if (!niter
2193       || !zero_p (niter->may_be_zero))
2194     return;
2195
2196   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2197 }
2198
2199 /* Adds candidates based on the uses.  */
2200
2201 static void
2202 add_derived_ivs_candidates (struct ivopts_data *data)
2203 {
2204   unsigned i;
2205
2206   for (i = 0; i < n_iv_uses (data); i++)
2207     {
2208       struct iv_use *use = iv_use (data, i);
2209
2210       if (!use)
2211         continue;
2212
2213       switch (use->type)
2214         {
2215         case USE_NONLINEAR_EXPR:
2216         case USE_COMPARE:
2217         case USE_ADDRESS:
2218           /* Just add the ivs based on the value of the iv used here.  */
2219           add_iv_value_candidates (data, use->iv, use);
2220           break;
2221
2222         case USE_OUTER:
2223           add_iv_value_candidates (data, use->iv, use);
2224
2225           /* Additionally, add the pseudocandidate for the possibility to
2226              replace the final value by a direct computation.  */
2227           add_iv_outer_candidates (data, use);
2228           break;
2229
2230         default:
2231           gcc_unreachable ();
2232         }
2233     }
2234 }
2235
2236 /* Record important candidates and add them to related_cands bitmaps
2237    if needed.  */
2238
2239 static void
2240 record_important_candidates (struct ivopts_data *data)
2241 {
2242   unsigned i;
2243   struct iv_use *use;
2244
2245   for (i = 0; i < n_iv_cands (data); i++)
2246     {
2247       struct iv_cand *cand = iv_cand (data, i);
2248
2249       if (cand->important)
2250         bitmap_set_bit (data->important_candidates, i);
2251     }
2252
2253   data->consider_all_candidates = (n_iv_cands (data)
2254                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2255
2256   if (data->consider_all_candidates)
2257     {
2258       /* We will not need "related_cands" bitmaps in this case,
2259          so release them to decrease peak memory consumption.  */
2260       for (i = 0; i < n_iv_uses (data); i++)
2261         {
2262           use = iv_use (data, i);
2263           BITMAP_FREE (use->related_cands);
2264         }
2265     }
2266   else
2267     {
2268       /* Add important candidates to the related_cands bitmaps.  */
2269       for (i = 0; i < n_iv_uses (data); i++)
2270         bitmap_ior_into (iv_use (data, i)->related_cands,
2271                          data->important_candidates);
2272     }
2273 }
2274
2275 /* Finds the candidates for the induction variables.  */
2276
2277 static void
2278 find_iv_candidates (struct ivopts_data *data)
2279 {
2280   /* Add commonly used ivs.  */
2281   add_standard_iv_candidates (data);
2282
2283   /* Add old induction variables.  */
2284   add_old_ivs_candidates (data);
2285
2286   /* Add induction variables derived from uses.  */
2287   add_derived_ivs_candidates (data);
2288
2289   /* Record the important candidates.  */
2290   record_important_candidates (data);
2291 }
2292
2293 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2294    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2295    we allocate a simple list to every use.  */
2296
2297 static void
2298 alloc_use_cost_map (struct ivopts_data *data)
2299 {
2300   unsigned i, size, s, j;
2301
2302   for (i = 0; i < n_iv_uses (data); i++)
2303     {
2304       struct iv_use *use = iv_use (data, i);
2305       bitmap_iterator bi;
2306
2307       if (data->consider_all_candidates)
2308         size = n_iv_cands (data);
2309       else
2310         {
2311           s = 0;
2312           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2313             {
2314               s++;
2315             }
2316
2317           /* Round up to the power of two, so that moduling by it is fast.  */
2318           for (size = 1; size < s; size <<= 1)
2319             continue;
2320         }
2321
2322       use->n_map_members = size;
2323       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2324     }
2325 }
2326
2327 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2328    on invariants DEPENDS_ON and that the value used in expressing it
2329    is VALUE.*/
2330
2331 static void
2332 set_use_iv_cost (struct ivopts_data *data,
2333                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2334                  bitmap depends_on, tree value)
2335 {
2336   unsigned i, s;
2337
2338   if (cost == INFTY)
2339     {
2340       BITMAP_FREE (depends_on);
2341       return;
2342     }
2343
2344   if (data->consider_all_candidates)
2345     {
2346       use->cost_map[cand->id].cand = cand;
2347       use->cost_map[cand->id].cost = cost;
2348       use->cost_map[cand->id].depends_on = depends_on;
2349       use->cost_map[cand->id].value = value;
2350       return;
2351     }
2352
2353   /* n_map_members is a power of two, so this computes modulo.  */
2354   s = cand->id & (use->n_map_members - 1);
2355   for (i = s; i < use->n_map_members; i++)
2356     if (!use->cost_map[i].cand)
2357       goto found;
2358   for (i = 0; i < s; i++)
2359     if (!use->cost_map[i].cand)
2360       goto found;
2361
2362   gcc_unreachable ();
2363
2364 found:
2365   use->cost_map[i].cand = cand;
2366   use->cost_map[i].cost = cost;
2367   use->cost_map[i].depends_on = depends_on;
2368   use->cost_map[i].value = value;
2369 }
2370
2371 /* Gets cost of (USE, CANDIDATE) pair.  */
2372
2373 static struct cost_pair *
2374 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2375                  struct iv_cand *cand)
2376 {
2377   unsigned i, s;
2378   struct cost_pair *ret;
2379
2380   if (!cand)
2381     return NULL;
2382
2383   if (data->consider_all_candidates)
2384     {
2385       ret = use->cost_map + cand->id;
2386       if (!ret->cand)
2387         return NULL;
2388
2389       return ret;
2390     }
2391
2392   /* n_map_members is a power of two, so this computes modulo.  */
2393   s = cand->id & (use->n_map_members - 1);
2394   for (i = s; i < use->n_map_members; i++)
2395     if (use->cost_map[i].cand == cand)
2396       return use->cost_map + i;
2397
2398   for (i = 0; i < s; i++)
2399     if (use->cost_map[i].cand == cand)
2400       return use->cost_map + i;
2401
2402   return NULL;
2403 }
2404
2405 /* Returns estimate on cost of computing SEQ.  */
2406
2407 static unsigned
2408 seq_cost (rtx seq)
2409 {
2410   unsigned cost = 0;
2411   rtx set;
2412
2413   for (; seq; seq = NEXT_INSN (seq))
2414     {
2415       set = single_set (seq);
2416       if (set)
2417         cost += rtx_cost (set, SET);
2418       else
2419         cost++;
2420     }
2421
2422   return cost;
2423 }
2424
2425 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2426 static rtx
2427 produce_memory_decl_rtl (tree obj, int *regno)
2428 {
2429   rtx x;
2430
2431   gcc_assert (obj);
2432   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2433     {
2434       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2435       x = gen_rtx_SYMBOL_REF (Pmode, name);
2436     }
2437   else
2438     x = gen_raw_REG (Pmode, (*regno)++);
2439
2440   return gen_rtx_MEM (DECL_MODE (obj), x);
2441 }
2442
2443 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2444    walk_tree.  DATA contains the actual fake register number.  */
2445
2446 static tree
2447 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2448 {
2449   tree obj = NULL_TREE;
2450   rtx x = NULL_RTX;
2451   int *regno = data;
2452
2453   switch (TREE_CODE (*expr_p))
2454     {
2455     case ADDR_EXPR:
2456       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2457            handled_component_p (*expr_p);
2458            expr_p = &TREE_OPERAND (*expr_p, 0))
2459         continue;
2460       obj = *expr_p;
2461       if (DECL_P (obj))
2462         x = produce_memory_decl_rtl (obj, regno);
2463       break;
2464
2465     case SSA_NAME:
2466       *ws = 0;
2467       obj = SSA_NAME_VAR (*expr_p);
2468       if (!DECL_RTL_SET_P (obj))
2469         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2470       break;
2471
2472     case VAR_DECL:
2473     case PARM_DECL:
2474     case RESULT_DECL:
2475       *ws = 0;
2476       obj = *expr_p;
2477
2478       if (DECL_RTL_SET_P (obj))
2479         break;
2480
2481       if (DECL_MODE (obj) == BLKmode)
2482         x = produce_memory_decl_rtl (obj, regno);
2483       else
2484         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2485
2486       break;
2487
2488     default:
2489       break;
2490     }
2491
2492   if (x)
2493     {
2494       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2495       SET_DECL_RTL (obj, x);
2496     }
2497
2498   return NULL_TREE;
2499 }
2500
2501 /* Determines cost of the computation of EXPR.  */
2502
2503 static unsigned
2504 computation_cost (tree expr)
2505 {
2506   rtx seq, rslt;
2507   tree type = TREE_TYPE (expr);
2508   unsigned cost;
2509   /* Avoid using hard regs in ways which may be unsupported.  */
2510   int regno = LAST_VIRTUAL_REGISTER + 1;
2511
2512   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2513   start_sequence ();
2514   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2515   seq = get_insns ();
2516   end_sequence ();
2517
2518   cost = seq_cost (seq);
2519   if (MEM_P (rslt))
2520     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2521
2522   return cost;
2523 }
2524
2525 /* Returns variable containing the value of candidate CAND at statement AT.  */
2526
2527 static tree
2528 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2529 {
2530   if (stmt_after_increment (loop, cand, stmt))
2531     return cand->var_after;
2532   else
2533     return cand->var_before;
2534 }
2535
2536 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2537    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2538
2539 static int
2540 tree_int_cst_sign_bit (tree t)
2541 {
2542   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2543   unsigned HOST_WIDE_INT w;
2544
2545   if (bitno < HOST_BITS_PER_WIDE_INT)
2546     w = TREE_INT_CST_LOW (t);
2547   else
2548     {
2549       w = TREE_INT_CST_HIGH (t);
2550       bitno -= HOST_BITS_PER_WIDE_INT;
2551     }
2552
2553   return (w >> bitno) & 1;
2554 }
2555
2556 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2557    return cst.  Otherwise return NULL_TREE.  */
2558
2559 static tree
2560 constant_multiple_of (tree type, tree top, tree bot)
2561 {
2562   tree res, mby, p0, p1;
2563   enum tree_code code;
2564   bool negate;
2565
2566   STRIP_NOPS (top);
2567   STRIP_NOPS (bot);
2568
2569   if (operand_equal_p (top, bot, 0))
2570     return build_int_cst (type, 1);
2571
2572   code = TREE_CODE (top);
2573   switch (code)
2574     {
2575     case MULT_EXPR:
2576       mby = TREE_OPERAND (top, 1);
2577       if (TREE_CODE (mby) != INTEGER_CST)
2578         return NULL_TREE;
2579
2580       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2581       if (!res)
2582         return NULL_TREE;
2583
2584       return fold_binary_to_constant (MULT_EXPR, type, res,
2585                                       fold_convert (type, mby));
2586
2587     case PLUS_EXPR:
2588     case MINUS_EXPR:
2589       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2590       if (!p0)
2591         return NULL_TREE;
2592       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2593       if (!p1)
2594         return NULL_TREE;
2595
2596       return fold_binary_to_constant (code, type, p0, p1);
2597
2598     case INTEGER_CST:
2599       if (TREE_CODE (bot) != INTEGER_CST)
2600         return NULL_TREE;
2601
2602       bot = fold_convert (type, bot);
2603       top = fold_convert (type, top);
2604
2605       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2606          the result afterwards.  */
2607       if (tree_int_cst_sign_bit (bot))
2608         {
2609           negate = true;
2610           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2611         }
2612       else
2613         negate = false;
2614
2615       /* Ditto for TOP.  */
2616       if (tree_int_cst_sign_bit (top))
2617         {
2618           negate = !negate;
2619           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2620         }
2621
2622       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2623         return NULL_TREE;
2624
2625       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2626       if (negate)
2627         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2628       return res;
2629
2630     default:
2631       return NULL_TREE;
2632     }
2633 }
2634
2635 /* Affine combination of trees.  We keep track of at most MAX_AFF_ELTS elements
2636    to make things simpler; this is sufficient in most cases.  */
2637
2638 #define MAX_AFF_ELTS 8
2639
2640 struct affine_tree_combination
2641 {
2642   /* Type of the result of the combination.  */
2643   tree type;
2644
2645   /* Mask modulo that the operations are performed.  */
2646   unsigned HOST_WIDE_INT mask;
2647
2648   /* Constant offset.  */
2649   unsigned HOST_WIDE_INT offset;
2650
2651   /* Number of elements of the combination.  */
2652   unsigned n;
2653
2654   /* Elements and their coefficients.  */
2655   tree elts[MAX_AFF_ELTS];
2656   unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2657
2658   /* Remainder of the expression.  */
2659   tree rest;
2660 };
2661
2662 /* Sets COMB to CST.  */
2663
2664 static void
2665 aff_combination_const (struct affine_tree_combination *comb, tree type,
2666                        unsigned HOST_WIDE_INT cst)
2667 {
2668   unsigned prec = TYPE_PRECISION (type);
2669
2670   comb->type = type;
2671   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2672
2673   comb->n = 0;
2674   comb->rest = NULL_TREE;
2675   comb->offset = cst & comb->mask;
2676 }
2677
2678 /* Sets COMB to single element ELT.  */
2679
2680 static void
2681 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2682 {
2683   unsigned prec = TYPE_PRECISION (type);
2684
2685   comb->type = type;
2686   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2687
2688   comb->n = 1;
2689   comb->elts[0] = elt;
2690   comb->coefs[0] = 1;
2691   comb->rest = NULL_TREE;
2692   comb->offset = 0;
2693 }
2694
2695 /* Scales COMB by SCALE.  */
2696
2697 static void
2698 aff_combination_scale (struct affine_tree_combination *comb,
2699                        unsigned HOST_WIDE_INT scale)
2700 {
2701   unsigned i, j;
2702
2703   if (scale == 1)
2704     return;
2705
2706   if (scale == 0)
2707     {
2708       aff_combination_const (comb, comb->type, 0);
2709       return;
2710     }
2711
2712   comb->offset = (scale * comb->offset) & comb->mask;
2713   for (i = 0, j = 0; i < comb->n; i++)
2714     {
2715       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2716       comb->elts[j] = comb->elts[i];
2717       if (comb->coefs[j] != 0)
2718         j++;
2719     }
2720   comb->n = j;
2721
2722   if (comb->rest)
2723     {
2724       if (comb->n < MAX_AFF_ELTS)
2725         {
2726           comb->coefs[comb->n] = scale;
2727           comb->elts[comb->n] = comb->rest;
2728           comb->rest = NULL_TREE;
2729           comb->n++;
2730         }
2731       else
2732         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2733                                   build_int_cst_type (comb->type, scale));
2734     }
2735 }
2736
2737 /* Adds ELT * SCALE to COMB.  */
2738
2739 static void
2740 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2741                          unsigned HOST_WIDE_INT scale)
2742 {
2743   unsigned i;
2744
2745   if (scale == 0)
2746     return;
2747
2748   for (i = 0; i < comb->n; i++)
2749     if (operand_equal_p (comb->elts[i], elt, 0))
2750       {
2751         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2752         if (comb->coefs[i])
2753           return;
2754
2755         comb->n--;
2756         comb->coefs[i] = comb->coefs[comb->n];
2757         comb->elts[i] = comb->elts[comb->n];
2758         return;
2759       }
2760   if (comb->n < MAX_AFF_ELTS)
2761     {
2762       comb->coefs[comb->n] = scale;
2763       comb->elts[comb->n] = elt;
2764       comb->n++;
2765       return;
2766     }
2767
2768   if (scale == 1)
2769     elt = fold_convert (comb->type, elt);
2770   else
2771     elt = fold_build2 (MULT_EXPR, comb->type,
2772                        fold_convert (comb->type, elt),
2773                        build_int_cst_type (comb->type, scale));
2774
2775   if (comb->rest)
2776     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2777   else
2778     comb->rest = elt;
2779 }
2780
2781 /* Adds COMB2 to COMB1.  */
2782
2783 static void
2784 aff_combination_add (struct affine_tree_combination *comb1,
2785                      struct affine_tree_combination *comb2)
2786 {
2787   unsigned i;
2788
2789   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2790   for (i = 0; i < comb2-> n; i++)
2791     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2792   if (comb2->rest)
2793     aff_combination_add_elt (comb1, comb2->rest, 1);
2794 }
2795
2796 /* Splits EXPR into an affine combination of parts.  */
2797
2798 static void
2799 tree_to_aff_combination (tree expr, tree type,
2800                          struct affine_tree_combination *comb)
2801 {
2802   struct affine_tree_combination tmp;
2803   enum tree_code code;
2804   tree cst, core, toffset;
2805   HOST_WIDE_INT bitpos, bitsize;
2806   enum machine_mode mode;
2807   int unsignedp, volatilep;
2808
2809   STRIP_NOPS (expr);
2810
2811   code = TREE_CODE (expr);
2812   switch (code)
2813     {
2814     case INTEGER_CST:
2815       aff_combination_const (comb, type, int_cst_value (expr));
2816       return;
2817
2818     case PLUS_EXPR:
2819     case MINUS_EXPR:
2820       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2821       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2822       if (code == MINUS_EXPR)
2823         aff_combination_scale (&tmp, -1);
2824       aff_combination_add (comb, &tmp);
2825       return;
2826
2827     case MULT_EXPR:
2828       cst = TREE_OPERAND (expr, 1);
2829       if (TREE_CODE (cst) != INTEGER_CST)
2830         break;
2831       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2832       aff_combination_scale (comb, int_cst_value (cst));
2833       return;
2834
2835     case NEGATE_EXPR:
2836       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2837       aff_combination_scale (comb, -1);
2838       return;
2839
2840     case ADDR_EXPR:
2841       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2842                                   &toffset, &mode, &unsignedp, &volatilep,
2843                                   false);
2844       if (bitpos % BITS_PER_UNIT != 0)
2845         break;
2846       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2847       core = build_addr_strip_iref (core);
2848       if (TREE_CODE (core) == ADDR_EXPR)
2849         aff_combination_add_elt (comb, core, 1);
2850       else
2851         {
2852           tree_to_aff_combination (core, type, &tmp);
2853           aff_combination_add (comb, &tmp);
2854         }
2855       if (toffset)
2856         {
2857           tree_to_aff_combination (toffset, type, &tmp);
2858           aff_combination_add (comb, &tmp);
2859         }
2860       return;
2861
2862     default:
2863       break;
2864     }
2865
2866   aff_combination_elt (comb, type, expr);
2867 }
2868
2869 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2870
2871 static tree
2872 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2873                  unsigned HOST_WIDE_INT mask)
2874 {
2875   enum tree_code code;
2876
2877   scale &= mask;
2878   elt = fold_convert (type, elt);
2879
2880   if (scale == 1)
2881     {
2882       if (!expr)
2883         return elt;
2884
2885       return fold_build2 (PLUS_EXPR, type, expr, elt);
2886     }
2887
2888   if (scale == mask)
2889     {
2890       if (!expr)
2891         return fold_build1 (NEGATE_EXPR, type, elt);
2892
2893       return fold_build2 (MINUS_EXPR, type, expr, elt);
2894     }
2895
2896   if (!expr)
2897     return fold_build2 (MULT_EXPR, type, elt,
2898                         build_int_cst_type (type, scale));
2899
2900   if ((scale | (mask >> 1)) == mask)
2901     {
2902       /* Scale is negative.  */
2903       code = MINUS_EXPR;
2904       scale = (-scale) & mask;
2905     }
2906   else
2907     code = PLUS_EXPR;
2908
2909   elt = fold_build2 (MULT_EXPR, type, elt,
2910                      build_int_cst_type (type, scale));
2911   return fold_build2 (code, type, expr, elt);
2912 }
2913
2914 /* Makes tree from the affine combination COMB.  */
2915
2916 static tree
2917 aff_combination_to_tree (struct affine_tree_combination *comb)
2918 {
2919   tree type = comb->type;
2920   tree expr = comb->rest;
2921   unsigned i;
2922   unsigned HOST_WIDE_INT off, sgn;
2923
2924   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2925
2926   for (i = 0; i < comb->n; i++)
2927     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2928                             comb->mask);
2929
2930   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2931     {
2932       /* Offset is negative.  */
2933       off = (-comb->offset) & comb->mask;
2934       sgn = comb->mask;
2935     }
2936   else
2937     {
2938       off = comb->offset;
2939       sgn = 1;
2940     }
2941   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2942                           comb->mask);
2943 }
2944
2945 /* Folds X + RATIO * Y in TYPE.  */
2946
2947 static tree
2948 fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2949 {
2950   enum tree_code code;
2951   tree cst;
2952   struct affine_tree_combination cx, cy;
2953
2954   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2955     {
2956       if (ratio == 1)
2957         return fold_build2 (PLUS_EXPR, type, x, y);
2958       if (ratio == -1)
2959         return fold_build2 (MINUS_EXPR, type, x, y);
2960
2961       if (ratio < 0)
2962         {
2963           code = MINUS_EXPR;
2964           ratio = -ratio;
2965         }
2966       else
2967         code = PLUS_EXPR;
2968
2969       cst = build_int_cst_type (type, ratio);
2970       y = fold_build2 (MULT_EXPR, type, y, cst);
2971       return fold_build2 (code, type, x, y);
2972     }
2973
2974   tree_to_aff_combination (x, type, &cx);
2975   tree_to_aff_combination (y, type, &cy);
2976   aff_combination_scale (&cy, ratio);
2977   aff_combination_add (&cx, &cy);
2978
2979   return aff_combination_to_tree (&cx);
2980 }
2981
2982 /* Determines the expression by that USE is expressed from induction variable
2983    CAND at statement AT in LOOP.  */
2984
2985 static tree
2986 get_computation_at (struct loop *loop,
2987                     struct iv_use *use, struct iv_cand *cand, tree at)
2988 {
2989   tree ubase = use->iv->base;
2990   tree ustep = use->iv->step;
2991   tree cbase = cand->iv->base;
2992   tree cstep = cand->iv->step;
2993   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2994   tree uutype;
2995   tree expr, delta;
2996   tree ratio;
2997   unsigned HOST_WIDE_INT ustepi, cstepi;
2998   HOST_WIDE_INT ratioi;
2999
3000   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3001     {
3002       /* We do not have a precision to express the values of use.  */
3003       return NULL_TREE;
3004     }
3005
3006   expr = var_at_stmt (loop, cand, at);
3007
3008   if (TREE_TYPE (expr) != ctype)
3009     {
3010       /* This may happen with the original ivs.  */
3011       expr = fold_convert (ctype, expr);
3012     }
3013
3014   if (TYPE_UNSIGNED (utype))
3015     uutype = utype;
3016   else
3017     {
3018       uutype = unsigned_type_for (utype);
3019       ubase = fold_convert (uutype, ubase);
3020       ustep = fold_convert (uutype, ustep);
3021     }
3022
3023   if (uutype != ctype)
3024     {
3025       expr = fold_convert (uutype, expr);
3026       cbase = fold_convert (uutype, cbase);
3027       cstep = fold_convert (uutype, cstep);
3028     }
3029
3030   if (cst_and_fits_in_hwi (cstep)
3031       && cst_and_fits_in_hwi (ustep))
3032     {
3033       ustepi = int_cst_value (ustep);
3034       cstepi = int_cst_value (cstep);
3035
3036       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3037         {
3038           /* TODO maybe consider case when ustep divides cstep and the ratio is
3039              a power of 2 (so that the division is fast to execute)?  We would
3040              need to be much more careful with overflows etc. then.  */
3041           return NULL_TREE;
3042         }
3043
3044       ratio = build_int_cst_type (uutype, ratioi);
3045     }
3046   else
3047     {
3048       ratio = constant_multiple_of (uutype, ustep, cstep);
3049       if (!ratio)
3050         return NULL_TREE;
3051
3052       /* Ratioi is only used to detect special cases when the multiplicative
3053          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3054          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3055          to integer_onep/integer_all_onesp, since the former ignores
3056          TREE_OVERFLOW.  */
3057       if (cst_and_fits_in_hwi (ratio))
3058         ratioi = int_cst_value (ratio);
3059       else if (integer_onep (ratio))
3060         ratioi = 1;
3061       else if (integer_all_onesp (ratio))
3062         ratioi = -1;
3063       else
3064         ratioi = 0;
3065     }
3066
3067   /* We may need to shift the value if we are after the increment.  */
3068   if (stmt_after_increment (loop, cand, at))
3069     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3070
3071   /* use = ubase - ratio * cbase + ratio * var.
3072
3073      In general case ubase + ratio * (var - cbase) could be better (one less
3074      multiplication), but often it is possible to eliminate redundant parts
3075      of computations from (ubase - ratio * cbase) term, and if it does not
3076      happen, fold is able to apply the distributive law to obtain this form
3077      anyway.  */
3078
3079   if (ratioi == 1)
3080     {
3081       delta = fold_affine_sum (uutype, ubase, cbase, -1);
3082       expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3083     }
3084   else if (ratioi == -1)
3085     {
3086       delta = fold_affine_sum (uutype, ubase, cbase, 1);
3087       expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3088     }
3089   else
3090     {
3091       if (ratioi)
3092         delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3093       else
3094         {
3095           delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3096           delta = fold_affine_sum (uutype, ubase, delta, -1);
3097         }
3098       expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3099       expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3100     }
3101
3102   return fold_convert (utype, expr);
3103 }
3104
3105 /* Determines the expression by that USE is expressed from induction variable
3106    CAND in LOOP.  */
3107
3108 static tree
3109 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3110 {
3111   return get_computation_at (loop, use, cand, use->stmt);
3112 }
3113
3114 /* Returns cost of addition in MODE.  */
3115
3116 static unsigned
3117 add_cost (enum machine_mode mode)
3118 {
3119   static unsigned costs[NUM_MACHINE_MODES];
3120   rtx seq;
3121   unsigned cost;
3122
3123   if (costs[mode])
3124     return costs[mode];
3125
3126   start_sequence ();
3127   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3128                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3129                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3130                  NULL_RTX);
3131   seq = get_insns ();
3132   end_sequence ();
3133
3134   cost = seq_cost (seq);
3135   if (!cost)
3136     cost = 1;
3137
3138   costs[mode] = cost;
3139
3140   if (dump_file && (dump_flags & TDF_DETAILS))
3141     fprintf (dump_file, "Addition in %s costs %d\n",
3142              GET_MODE_NAME (mode), cost);
3143   return cost;
3144 }
3145
3146 /* Entry in a hashtable of already known costs for multiplication.  */
3147 struct mbc_entry
3148 {
3149   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3150   enum machine_mode mode;       /* In mode.  */
3151   unsigned cost;                /* The cost.  */
3152 };
3153
3154 /* Counts hash value for the ENTRY.  */
3155
3156 static hashval_t
3157 mbc_entry_hash (const void *entry)
3158 {
3159   const struct mbc_entry *e = entry;
3160
3161   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3162 }
3163
3164 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3165
3166 static int
3167 mbc_entry_eq (const void *entry1, const void *entry2)
3168 {
3169   const struct mbc_entry *e1 = entry1;
3170   const struct mbc_entry *e2 = entry2;
3171
3172   return (e1->mode == e2->mode
3173           && e1->cst == e2->cst);
3174 }
3175
3176 /* Returns cost of multiplication by constant CST in MODE.  */
3177
3178 static unsigned
3179 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3180 {
3181   static htab_t costs;
3182   struct mbc_entry **cached, act;
3183   rtx seq;
3184   unsigned cost;
3185
3186   if (!costs)
3187     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3188
3189   act.mode = mode;
3190   act.cst = cst;
3191   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3192   if (*cached)
3193     return (*cached)->cost;
3194
3195   *cached = xmalloc (sizeof (struct mbc_entry));
3196   (*cached)->mode = mode;
3197   (*cached)->cst = cst;
3198
3199   start_sequence ();
3200   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3201                NULL_RTX, 0);
3202   seq = get_insns ();
3203   end_sequence ();
3204
3205   cost = seq_cost (seq);
3206
3207   if (dump_file && (dump_flags & TDF_DETAILS))
3208     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3209              (int) cst, GET_MODE_NAME (mode), cost);
3210
3211   (*cached)->cost = cost;
3212
3213   return cost;
3214 }
3215
3216 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3217    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3218    variable is omitted.  The created memory accesses MODE.
3219
3220    TODO -- there must be some better way.  This all is quite crude.  */
3221
3222 static unsigned
3223 get_address_cost (bool symbol_present, bool var_present,
3224                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3225 {
3226 #define MAX_RATIO 128
3227   static sbitmap valid_mult;
3228   static HOST_WIDE_INT rat, off;
3229   static HOST_WIDE_INT min_offset, max_offset;
3230   static unsigned costs[2][2][2][2];
3231   unsigned cost, acost;
3232   rtx seq, addr, base;
3233   bool offset_p, ratio_p;
3234   rtx reg1;
3235   HOST_WIDE_INT s_offset;
3236   unsigned HOST_WIDE_INT mask;
3237   unsigned bits;
3238
3239   if (!valid_mult)
3240     {
3241       HOST_WIDE_INT i;
3242
3243       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3244
3245       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3246       for (i = 1; i <= 1 << 20; i <<= 1)
3247         {
3248           XEXP (addr, 1) = GEN_INT (i);
3249           if (!memory_address_p (Pmode, addr))
3250             break;
3251         }
3252       max_offset = i >> 1;
3253       off = max_offset;
3254
3255       for (i = 1; i <= 1 << 20; i <<= 1)
3256         {
3257           XEXP (addr, 1) = GEN_INT (-i);
3258           if (!memory_address_p (Pmode, addr))
3259             break;
3260         }
3261       min_offset = -(i >> 1);
3262
3263       if (dump_file && (dump_flags & TDF_DETAILS))
3264         {
3265           fprintf (dump_file, "get_address_cost:\n");
3266           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3267           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3268         }
3269
3270       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3271       sbitmap_zero (valid_mult);
3272       rat = 1;
3273       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3274       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3275         {
3276           XEXP (addr, 1) = GEN_INT (i);
3277           if (memory_address_p (Pmode, addr))
3278             {
3279               SET_BIT (valid_mult, i + MAX_RATIO);
3280               rat = i;
3281             }
3282         }
3283
3284       if (dump_file && (dump_flags & TDF_DETAILS))
3285         {
3286           fprintf (dump_file, "  allowed multipliers:");
3287           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3288             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3289               fprintf (dump_file, " %d", (int) i);
3290           fprintf (dump_file, "\n");
3291           fprintf (dump_file, "\n");
3292         }
3293     }
3294
3295   bits = GET_MODE_BITSIZE (Pmode);
3296   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3297   offset &= mask;
3298   if ((offset >> (bits - 1) & 1))
3299     offset |= ~mask;
3300   s_offset = offset;
3301
3302   cost = 0;
3303   offset_p = (s_offset != 0
3304               && min_offset <= s_offset && s_offset <= max_offset);
3305   ratio_p = (ratio != 1
3306              && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3307              && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3308
3309   if (ratio != 1 && !ratio_p)
3310     cost += multiply_by_cost (ratio, Pmode);
3311
3312   if (s_offset && !offset_p && !symbol_present)
3313     {
3314       cost += add_cost (Pmode);
3315       var_present = true;
3316     }
3317
3318   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3319   if (!acost)
3320     {
3321       acost = 0;
3322
3323       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3324       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3325       if (ratio_p)
3326         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3327
3328       if (var_present)
3329         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3330
3331       if (symbol_present)
3332         {
3333           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3334           if (offset_p)
3335             base = gen_rtx_fmt_e (CONST, Pmode,
3336                                   gen_rtx_fmt_ee (PLUS, Pmode,
3337                                                   base,
3338                                                   GEN_INT (off)));
3339         }
3340       else if (offset_p)
3341         base = GEN_INT (off);
3342       else
3343         base = NULL_RTX;
3344
3345       if (base)
3346         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3347
3348       start_sequence ();
3349       addr = memory_address (Pmode, addr);
3350       seq = get_insns ();
3351       end_sequence ();
3352
3353       acost = seq_cost (seq);
3354       acost += address_cost (addr, Pmode);
3355
3356       if (!acost)
3357         acost = 1;
3358       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3359     }
3360
3361   return cost + acost;
3362 }
3363 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3364    invariants the computation depends on.  */
3365
3366 static unsigned
3367 force_var_cost (struct ivopts_data *data,
3368                 tree expr, bitmap *depends_on)
3369 {
3370   static bool costs_initialized = false;
3371   static unsigned integer_cost;
3372   static unsigned symbol_cost;
3373   static unsigned address_cost;
3374   tree op0, op1;
3375   unsigned cost0, cost1, cost;
3376   enum machine_mode mode;
3377
3378   if (!costs_initialized)
3379     {
3380       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3381       rtx x = gen_rtx_MEM (DECL_MODE (var),
3382                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3383       tree addr;
3384       tree type = build_pointer_type (integer_type_node);
3385
3386       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3387                                                            2000));
3388
3389       SET_DECL_RTL (var, x);
3390       TREE_STATIC (var) = 1;
3391       addr = build1 (ADDR_EXPR, type, var);
3392       symbol_cost = computation_cost (addr) + 1;
3393
3394       address_cost
3395         = computation_cost (build2 (PLUS_EXPR, type,
3396                                     addr,
3397                                     build_int_cst_type (type, 2000))) + 1;
3398       if (dump_file && (dump_flags & TDF_DETAILS))
3399         {
3400           fprintf (dump_file, "force_var_cost:\n");
3401           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3402           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3403           fprintf (dump_file, "  address %d\n", (int) address_cost);
3404           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3405           fprintf (dump_file, "\n");
3406         }
3407
3408       costs_initialized = true;
3409     }
3410
3411   STRIP_NOPS (expr);
3412
3413   if (depends_on)
3414     {
3415       fd_ivopts_data = data;
3416       walk_tree (&expr, find_depends, depends_on, NULL);
3417     }
3418
3419   if (SSA_VAR_P (expr))
3420     return 0;
3421
3422   if (TREE_INVARIANT (expr))
3423     {
3424       if (TREE_CODE (expr) == INTEGER_CST)
3425         return integer_cost;
3426
3427       if (TREE_CODE (expr) == ADDR_EXPR)
3428         {
3429           tree obj = TREE_OPERAND (expr, 0);
3430
3431           if (TREE_CODE (obj) == VAR_DECL
3432               || TREE_CODE (obj) == PARM_DECL
3433               || TREE_CODE (obj) == RESULT_DECL)
3434             return symbol_cost;
3435         }
3436
3437       return address_cost;
3438     }
3439
3440   switch (TREE_CODE (expr))
3441     {
3442     case PLUS_EXPR:
3443     case MINUS_EXPR:
3444     case MULT_EXPR:
3445       op0 = TREE_OPERAND (expr, 0);
3446       op1 = TREE_OPERAND (expr, 1);
3447       STRIP_NOPS (op0);
3448       STRIP_NOPS (op1);
3449
3450       if (is_gimple_val (op0))
3451         cost0 = 0;
3452       else
3453         cost0 = force_var_cost (data, op0, NULL);
3454
3455       if (is_gimple_val (op1))
3456         cost1 = 0;
3457       else
3458         cost1 = force_var_cost (data, op1, NULL);
3459
3460       break;
3461
3462     default:
3463       /* Just an arbitrary value, FIXME.  */
3464       return target_spill_cost;
3465     }
3466
3467   mode = TYPE_MODE (TREE_TYPE (expr));
3468   switch (TREE_CODE (expr))
3469     {
3470     case PLUS_EXPR:
3471     case MINUS_EXPR:
3472       cost = add_cost (mode);
3473       break;
3474
3475     case MULT_EXPR:
3476       if (cst_and_fits_in_hwi (op0))
3477         cost = multiply_by_cost (int_cst_value (op0), mode);
3478       else if (cst_and_fits_in_hwi (op1))
3479         cost = multiply_by_cost (int_cst_value (op1), mode);
3480       else
3481         return target_spill_cost;
3482       break;
3483
3484     default:
3485       gcc_unreachable ();
3486     }
3487
3488   cost += cost0;
3489   cost += cost1;
3490
3491   /* Bound the cost by target_spill_cost.  The parts of complicated
3492      computations often are either loop invariant or at least can
3493      be shared between several iv uses, so letting this grow without
3494      limits would not give reasonable results.  */
3495   return cost < target_spill_cost ? cost : target_spill_cost;
3496 }
3497
3498 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3499    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3500    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3501    invariants the computation depends on.  */
3502
3503 static unsigned
3504 split_address_cost (struct ivopts_data *data,
3505                     tree addr, bool *symbol_present, bool *var_present,
3506                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3507 {
3508   tree core;
3509   HOST_WIDE_INT bitsize;
3510   HOST_WIDE_INT bitpos;
3511   tree toffset;
3512   enum machine_mode mode;
3513   int unsignedp, volatilep;
3514
3515   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3516                               &unsignedp, &volatilep, false);
3517
3518   if (toffset != 0
3519       || bitpos % BITS_PER_UNIT != 0
3520       || TREE_CODE (core) != VAR_DECL)
3521     {
3522       *symbol_present = false;
3523       *var_present = true;
3524       fd_ivopts_data = data;
3525       walk_tree (&addr, find_depends, depends_on, NULL);
3526       return target_spill_cost;
3527     }
3528
3529   *offset += bitpos / BITS_PER_UNIT;
3530   if (TREE_STATIC (core)
3531       || DECL_EXTERNAL (core))
3532     {
3533       *symbol_present = true;
3534       *var_present = false;
3535       return 0;
3536     }
3537
3538   *symbol_present = false;
3539   *var_present = true;
3540   return 0;
3541 }
3542
3543 /* Estimates cost of expressing difference of addresses E1 - E2 as
3544    var + symbol + offset.  The value of offset is added to OFFSET,
3545    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3546    part is missing.  DEPENDS_ON is a set of the invariants the computation
3547    depends on.  */
3548
3549 static unsigned
3550 ptr_difference_cost (struct ivopts_data *data,
3551                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3552                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3553 {
3554   HOST_WIDE_INT diff = 0;
3555   unsigned cost;
3556
3557   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3558
3559   if (ptr_difference_const (e1, e2, &diff))
3560     {
3561       *offset += diff;
3562       *symbol_present = false;
3563       *var_present = false;
3564       return 0;
3565     }
3566
3567   if (e2 == integer_zero_node)
3568     return split_address_cost (data, TREE_OPERAND (e1, 0),
3569                                symbol_present, var_present, offset, depends_on);
3570
3571   *symbol_present = false;
3572   *var_present = true;
3573
3574   cost = force_var_cost (data, e1, depends_on);
3575   cost += force_var_cost (data, e2, depends_on);
3576   cost += add_cost (Pmode);
3577
3578   return cost;
3579 }
3580
3581 /* Estimates cost of expressing difference E1 - E2 as
3582    var + symbol + offset.  The value of offset is added to OFFSET,
3583    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3584    part is missing.  DEPENDS_ON is a set of the invariants the computation
3585    depends on.  */
3586
3587 static unsigned
3588 difference_cost (struct ivopts_data *data,
3589                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3590                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3591 {
3592   unsigned cost;
3593   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3594   unsigned HOST_WIDE_INT off1, off2;
3595
3596   e1 = strip_offset (e1, &off1);
3597   e2 = strip_offset (e2, &off2);
3598   *offset += off1 - off2;
3599
3600   STRIP_NOPS (e1);
3601   STRIP_NOPS (e2);
3602
3603   if (TREE_CODE (e1) == ADDR_EXPR)
3604     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3605                                 depends_on);
3606   *symbol_present = false;
3607
3608   if (operand_equal_p (e1, e2, 0))
3609     {
3610       *var_present = false;
3611       return 0;
3612     }
3613   *var_present = true;
3614   if (zero_p (e2))
3615     return force_var_cost (data, e1, depends_on);
3616
3617   if (zero_p (e1))
3618     {
3619       cost = force_var_cost (data, e2, depends_on);
3620       cost += multiply_by_cost (-1, mode);
3621
3622       return cost;
3623     }
3624
3625   cost = force_var_cost (data, e1, depends_on);
3626   cost += force_var_cost (data, e2, depends_on);
3627   cost += add_cost (mode);
3628
3629   return cost;
3630 }
3631
3632 /* Determines the cost of the computation by that USE is expressed
3633    from induction variable CAND.  If ADDRESS_P is true, we just need
3634    to create an address from it, otherwise we want to get it into
3635    register.  A set of invariants we depend on is stored in
3636    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3637
3638 static unsigned
3639 get_computation_cost_at (struct ivopts_data *data,
3640                          struct iv_use *use, struct iv_cand *cand,
3641                          bool address_p, bitmap *depends_on, tree at)
3642 {
3643   tree ubase = use->iv->base, ustep = use->iv->step;
3644   tree cbase, cstep;
3645   tree utype = TREE_TYPE (ubase), ctype;
3646   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3647   HOST_WIDE_INT ratio, aratio;
3648   bool var_present, symbol_present;
3649   unsigned cost = 0, n_sums;
3650
3651   *depends_on = NULL;
3652
3653   /* Only consider real candidates.  */
3654   if (!cand->iv)
3655     return INFTY;
3656
3657   cbase = cand->iv->base;
3658   cstep = cand->iv->step;
3659   ctype = TREE_TYPE (cbase);
3660
3661   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3662     {
3663       /* We do not have a precision to express the values of use.  */
3664       return INFTY;
3665     }
3666
3667   if (address_p)
3668     {
3669       /* Do not try to express address of an object with computation based
3670          on address of a different object.  This may cause problems in rtl
3671          level alias analysis (that does not expect this to be happening,
3672          as this is illegal in C), and would be unlikely to be useful
3673          anyway.  */
3674       if (use->iv->base_object
3675           && cand->iv->base_object
3676           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3677         return INFTY;
3678     }
3679
3680   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3681     {
3682       /* TODO -- add direct handling of this case.  */
3683       goto fallback;
3684     }
3685
3686   /* CSTEPI is removed from the offset in case statement is after the
3687      increment.  If the step is not constant, we use zero instead.
3688      This is a bit imprecise (there is the extra addition), but
3689      redundancy elimination is likely to transform the code so that
3690      it uses value of the variable before increment anyway,
3691      so it is not that much unrealistic.  */
3692   if (cst_and_fits_in_hwi (cstep))
3693     cstepi = int_cst_value (cstep);
3694   else
3695     cstepi = 0;
3696
3697   if (cst_and_fits_in_hwi (ustep)
3698       && cst_and_fits_in_hwi (cstep))
3699     {
3700       ustepi = int_cst_value (ustep);
3701
3702       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3703         return INFTY;
3704     }
3705   else
3706     {
3707       tree rat;
3708
3709       rat = constant_multiple_of (utype, ustep, cstep);
3710
3711       if (!rat)
3712         return INFTY;
3713
3714       if (cst_and_fits_in_hwi (rat))
3715         ratio = int_cst_value (rat);
3716       else if (integer_onep (rat))
3717         ratio = 1;
3718       else if (integer_all_onesp (rat))
3719         ratio = -1;
3720       else
3721         return INFTY;
3722     }
3723
3724   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3725      or ratio == 1, it is better to handle this like
3726
3727      ubase - ratio * cbase + ratio * var
3728
3729      (also holds in the case ratio == -1, TODO.  */
3730
3731   if (cst_and_fits_in_hwi (cbase))
3732     {
3733       offset = - ratio * int_cst_value (cbase);
3734       cost += difference_cost (data,
3735                                ubase, integer_zero_node,
3736                                &symbol_present, &var_present, &offset,
3737                                depends_on);
3738     }
3739   else if (ratio == 1)
3740     {
3741       cost += difference_cost (data,
3742                                ubase, cbase,
3743                                &symbol_present, &var_present, &offset,
3744                                depends_on);
3745     }
3746   else
3747     {
3748       cost += force_var_cost (data, cbase, depends_on);
3749       cost += add_cost (TYPE_MODE (ctype));
3750       cost += difference_cost (data,
3751                                ubase, integer_zero_node,
3752                                &symbol_present, &var_present, &offset,
3753                                depends_on);
3754     }
3755
3756   /* If we are after the increment, the value of the candidate is higher by
3757      one iteration.  */
3758   if (stmt_after_increment (data->current_loop, cand, at))
3759     offset -= ratio * cstepi;
3760
3761   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3762      (symbol/var/const parts may be omitted).  If we are looking for an address,
3763      find the cost of addressing this.  */
3764   if (address_p)
3765     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3766
3767   /* Otherwise estimate the costs for computing the expression.  */
3768   aratio = ratio > 0 ? ratio : -ratio;
3769   if (!symbol_present && !var_present && !offset)
3770     {
3771       if (ratio != 1)
3772         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3773
3774       return cost;
3775     }
3776
3777   if (aratio != 1)
3778     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3779
3780   n_sums = 1;
3781   if (var_present
3782       /* Symbol + offset should be compile-time computable.  */
3783       && (symbol_present || offset))
3784     n_sums++;
3785
3786   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3787
3788 fallback:
3789   {
3790     /* Just get the expression, expand it and measure the cost.  */
3791     tree comp = get_computation_at (data->current_loop, use, cand, at);
3792
3793     if (!comp)
3794       return INFTY;
3795
3796     if (address_p)
3797       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3798
3799     return computation_cost (comp);
3800   }
3801 }
3802
3803 /* Determines the cost of the computation by that USE is expressed
3804    from induction variable CAND.  If ADDRESS_P is true, we just need
3805    to create an address from it, otherwise we want to get it into
3806    register.  A set of invariants we depend on is stored in
3807    DEPENDS_ON.  */
3808
3809 static unsigned
3810 get_computation_cost (struct ivopts_data *data,
3811                       struct iv_use *use, struct iv_cand *cand,
3812                       bool address_p, bitmap *depends_on)
3813 {
3814   return get_computation_cost_at (data,
3815                                   use, cand, address_p, depends_on, use->stmt);
3816 }
3817
3818 /* Determines cost of basing replacement of USE on CAND in a generic
3819    expression.  */
3820
3821 static bool
3822 determine_use_iv_cost_generic (struct ivopts_data *data,
3823                                struct iv_use *use, struct iv_cand *cand)
3824 {
3825   bitmap depends_on;
3826   unsigned cost;
3827
3828   /* The simple case first -- if we need to express value of the preserved
3829      original biv, the cost is 0.  This also prevents us from counting the
3830      cost of increment twice -- once at this use and once in the cost of
3831      the candidate.  */
3832   if (cand->pos == IP_ORIGINAL
3833       && cand->incremented_at == use->stmt)
3834     {
3835       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3836       return true;
3837     }
3838
3839   cost = get_computation_cost (data, use, cand, false, &depends_on);
3840   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3841
3842   return cost != INFTY;
3843 }
3844
3845 /* Determines cost of basing replacement of USE on CAND in an address.  */
3846
3847 static bool
3848 determine_use_iv_cost_address (struct ivopts_data *data,
3849                                struct iv_use *use, struct iv_cand *cand)
3850 {
3851   bitmap depends_on;
3852   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3853
3854   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3855
3856   return cost != INFTY;
3857 }
3858
3859 /* Computes value of induction variable IV in iteration NITER.  */
3860
3861 static tree
3862 iv_value (struct iv *iv, tree niter)
3863 {
3864   tree val;
3865   tree type = TREE_TYPE (iv->base);
3866
3867   niter = fold_convert (type, niter);
3868   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3869
3870   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3871 }
3872
3873 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3874
3875 static tree
3876 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3877 {
3878   tree val = iv_value (cand->iv, niter);
3879   tree type = TREE_TYPE (cand->iv->base);
3880
3881   if (stmt_after_increment (loop, cand, at))
3882     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3883
3884   return val;
3885 }
3886
3887 /* Returns period of induction variable iv.  */
3888
3889 static tree
3890 iv_period (struct iv *iv)
3891 {
3892   tree step = iv->step, period, type;
3893   tree pow2div;
3894
3895   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3896
3897   /* Period of the iv is gcd (step, type range).  Since type range is power
3898      of two, it suffices to determine the maximum power of two that divides
3899      step.  */
3900   pow2div = num_ending_zeros (step);
3901   type = unsigned_type_for (TREE_TYPE (step));
3902
3903   period = build_low_bits_mask (type,
3904                                 (TYPE_PRECISION (type)
3905                                  - tree_low_cst (pow2div, 1)));
3906
3907   return period;
3908 }
3909
3910 /* Returns the comparison operator used when eliminating the iv USE.  */
3911
3912 static enum tree_code
3913 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3914 {
3915   struct loop *loop = data->current_loop;
3916   basic_block ex_bb;
3917   edge exit;
3918
3919   ex_bb = bb_for_stmt (use->stmt);
3920   exit = EDGE_SUCC (ex_bb, 0);
3921   if (flow_bb_inside_loop_p (loop, exit->dest))
3922     exit = EDGE_SUCC (ex_bb, 1);
3923
3924   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3925 }
3926
3927 /* Check whether it is possible to express the condition in USE by comparison
3928    of candidate CAND.  If so, store the value compared with to BOUND.  */
3929
3930 static bool
3931 may_eliminate_iv (struct ivopts_data *data,
3932                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3933 {
3934   basic_block ex_bb;
3935   edge exit;
3936   struct tree_niter_desc *niter;
3937   tree nit, nit_type;
3938   tree wider_type, period, per_type;
3939   struct loop *loop = data->current_loop;
3940
3941   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3942     return false;
3943
3944   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3945      for other conditions inside loop body.  */
3946   ex_bb = bb_for_stmt (use->stmt);
3947   if (use->stmt != last_stmt (ex_bb)
3948       || TREE_CODE (use->stmt) != COND_EXPR)
3949     return false;
3950   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3951     return false;
3952
3953   exit = EDGE_SUCC (ex_bb, 0);
3954   if (flow_bb_inside_loop_p (loop, exit->dest))
3955     exit = EDGE_SUCC (ex_bb, 1);
3956   if (flow_bb_inside_loop_p (loop, exit->dest))
3957     return false;
3958
3959   niter = niter_for_exit (data, exit);
3960   if (!niter
3961       || !zero_p (niter->may_be_zero))
3962     return false;
3963
3964   nit = niter->niter;
3965   nit_type = TREE_TYPE (nit);
3966
3967   /* Determine whether we may use the variable to test whether niter iterations
3968      elapsed.  This is the case iff the period of the induction variable is
3969      greater than the number of iterations.  */
3970   period = iv_period (cand->iv);
3971   if (!period)
3972     return false;
3973   per_type = TREE_TYPE (period);
3974
3975   wider_type = TREE_TYPE (period);
3976   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3977     wider_type = per_type;
3978   else
3979     wider_type = nit_type;
3980
3981   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3982                                        fold_convert (wider_type, period),
3983                                        fold_convert (wider_type, nit)))))
3984     return false;
3985
3986   *bound = cand_value_at (loop, cand, use->stmt, nit);
3987   return true;
3988 }
3989
3990 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3991
3992 static bool
3993 determine_use_iv_cost_condition (struct ivopts_data *data,
3994                                  struct iv_use *use, struct iv_cand *cand)
3995 {
3996   tree bound = NULL_TREE, op, cond;
3997   bitmap depends_on = NULL;
3998   unsigned cost;
3999
4000   /* Only consider real candidates.  */
4001   if (!cand->iv)
4002     {
4003       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4004       return false;
4005     }
4006
4007   if (may_eliminate_iv (data, use, cand, &bound))
4008     {
4009       cost = force_var_cost (data, bound, &depends_on);
4010
4011       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4012       return cost != INFTY;
4013     }
4014
4015   /* The induction variable elimination failed; just express the original
4016      giv.  If it is compared with an invariant, note that we cannot get
4017      rid of it.  */
4018   cost = get_computation_cost (data, use, cand, false, &depends_on);
4019
4020   cond = *use->op_p;
4021   if (TREE_CODE (cond) != SSA_NAME)
4022     {
4023       op = TREE_OPERAND (cond, 0);
4024       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4025         op = TREE_OPERAND (cond, 1);
4026       if (TREE_CODE (op) == SSA_NAME)
4027         {
4028           op = get_iv (data, op)->base;
4029           fd_ivopts_data = data;
4030           walk_tree (&op, find_depends, &depends_on, NULL);
4031         }
4032     }
4033
4034   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4035   return cost != INFTY;
4036 }
4037
4038 /* Checks whether it is possible to replace the final value of USE by
4039    a direct computation.  If so, the formula is stored to *VALUE.  */
4040
4041 static bool
4042 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4043                          tree *value)
4044 {
4045   struct loop *loop = data->current_loop;
4046   edge exit;
4047   struct tree_niter_desc *niter;
4048
4049   exit = single_dom_exit (loop);
4050   if (!exit)
4051     return false;
4052
4053   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4054                               bb_for_stmt (use->stmt)));
4055
4056   niter = niter_for_single_dom_exit (data);
4057   if (!niter
4058       || !zero_p (niter->may_be_zero))
4059     return false;
4060
4061   *value = iv_value (use->iv, niter->niter);
4062
4063   return true;
4064 }
4065
4066 /* Determines cost of replacing final value of USE using CAND.  */
4067
4068 static bool
4069 determine_use_iv_cost_outer (struct ivopts_data *data,
4070                              struct iv_use *use, struct iv_cand *cand)
4071 {
4072   bitmap depends_on;
4073   unsigned cost;
4074   edge exit;
4075   tree value = NULL_TREE;
4076   struct loop *loop = data->current_loop;
4077
4078   /* The simple case first -- if we need to express value of the preserved
4079      original biv, the cost is 0.  This also prevents us from counting the
4080      cost of increment twice -- once at this use and once in the cost of
4081      the candidate.  */
4082   if (cand->pos == IP_ORIGINAL
4083       && cand->incremented_at == use->stmt)
4084     {
4085       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4086       return true;
4087     }
4088
4089   if (!cand->iv)
4090     {
4091       if (!may_replace_final_value (data, use, &value))
4092         {
4093           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4094           return false;
4095         }
4096
4097       depends_on = NULL;
4098       cost = force_var_cost (data, value, &depends_on);
4099
4100       cost /= AVG_LOOP_NITER (loop);
4101
4102       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4103       return cost != INFTY;
4104     }
4105
4106   exit = single_dom_exit (loop);
4107   if (exit)
4108     {
4109       /* If there is just a single exit, we may use value of the candidate
4110          after we take it to determine the value of use.  */
4111       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4112                                       last_stmt (exit->src));
4113       if (cost != INFTY)
4114         cost /= AVG_LOOP_NITER (loop);
4115     }
4116   else
4117     {
4118       /* Otherwise we just need to compute the iv.  */
4119       cost = get_computation_cost (data, use, cand, false, &depends_on);
4120     }
4121
4122   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4123
4124   return cost != INFTY;
4125 }
4126
4127 /* Determines cost of basing replacement of USE on CAND.  Returns false
4128    if USE cannot be based on CAND.  */
4129
4130 static bool
4131 determine_use_iv_cost (struct ivopts_data *data,
4132                        struct iv_use *use, struct iv_cand *cand)
4133 {
4134   switch (use->type)
4135     {
4136     case USE_NONLINEAR_EXPR:
4137       return determine_use_iv_cost_generic (data, use, cand);
4138
4139     case USE_OUTER:
4140       return determine_use_iv_cost_outer (data, use, cand);
4141
4142     case USE_ADDRESS:
4143       return determine_use_iv_cost_address (data, use, cand);
4144
4145     case USE_COMPARE:
4146       return determine_use_iv_cost_condition (data, use, cand);
4147
4148     default:
4149       gcc_unreachable ();
4150     }
4151 }
4152
4153 /* Determines costs of basing the use of the iv on an iv candidate.  */
4154
4155 static void
4156 determine_use_iv_costs (struct ivopts_data *data)
4157 {
4158   unsigned i, j;
4159   struct iv_use *use;
4160   struct iv_cand *cand;
4161   bitmap to_clear = BITMAP_ALLOC (NULL);
4162
4163   alloc_use_cost_map (data);
4164
4165   for (i = 0; i < n_iv_uses (data); i++)
4166     {
4167       use = iv_use (data, i);
4168
4169       if (data->consider_all_candidates)
4170         {
4171           for (j = 0; j < n_iv_cands (data); j++)
4172             {
4173               cand = iv_cand (data, j);
4174               determine_use_iv_cost (data, use, cand);
4175             }
4176         }
4177       else
4178         {
4179           bitmap_iterator bi;
4180
4181           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4182             {
4183               cand = iv_cand (data, j);
4184               if (!determine_use_iv_cost (data, use, cand))
4185                 bitmap_set_bit (to_clear, j);
4186             }
4187
4188           /* Remove the candidates for that the cost is infinite from
4189              the list of related candidates.  */
4190           bitmap_and_compl_into (use->related_cands, to_clear);
4191           bitmap_clear (to_clear);
4192         }
4193     }
4194
4195   BITMAP_FREE (to_clear);
4196
4197   if (dump_file && (dump_flags & TDF_DETAILS))
4198     {
4199       fprintf (dump_file, "Use-candidate costs:\n");
4200
4201       for (i = 0; i < n_iv_uses (data); i++)
4202         {
4203           use = iv_use (data, i);
4204
4205           fprintf (dump_file, "Use %d:\n", i);
4206           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4207           for (j = 0; j < use->n_map_members; j++)
4208             {
4209               if (!use->cost_map[j].cand
4210                   || use->cost_map[j].cost == INFTY)
4211                 continue;
4212
4213               fprintf (dump_file, "  %d\t%d\t",
4214                        use->cost_map[j].cand->id,
4215                        use->cost_map[j].cost);
4216               if (use->cost_map[j].depends_on)
4217                 bitmap_print (dump_file,
4218                               use->cost_map[j].depends_on, "","");
4219               fprintf (dump_file, "\n");
4220             }
4221
4222           fprintf (dump_file, "\n");
4223         }
4224       fprintf (dump_file, "\n");
4225     }
4226 }
4227
4228 /* Determines cost of the candidate CAND.  */
4229
4230 static void
4231 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4232 {
4233   unsigned cost_base, cost_step;
4234   tree base;
4235
4236   if (!cand->iv)
4237     {
4238       cand->cost = 0;
4239       return;
4240     }
4241
4242   /* There are two costs associated with the candidate -- its increment
4243      and its initialization.  The second is almost negligible for any loop
4244      that rolls enough, so we take it just very little into account.  */
4245
4246   base = cand->iv->base;
4247   cost_base = force_var_cost (data, base, NULL);
4248   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4249
4250   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4251
4252   /* Prefer the original iv unless we may gain something by replacing it;
4253      this is not really relevant for artificial ivs created by other
4254      passes.  */
4255   if (cand->pos == IP_ORIGINAL
4256       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4257     cand->cost--;
4258
4259   /* Prefer not to insert statements into latch unless there are some
4260      already (so that we do not create unnecessary jumps).  */
4261   if (cand->pos == IP_END
4262       && empty_block_p (ip_end_pos (data->current_loop)))
4263     cand->cost++;
4264 }
4265
4266 /* Determines costs of computation of the candidates.  */
4267
4268 static void
4269 determine_iv_costs (struct ivopts_data *data)
4270 {
4271   unsigned i;
4272
4273   if (dump_file && (dump_flags & TDF_DETAILS))
4274     {
4275       fprintf (dump_file, "Candidate costs:\n");
4276       fprintf (dump_file, "  cand\tcost\n");
4277     }
4278
4279   for (i = 0; i < n_iv_cands (data); i++)
4280     {
4281       struct iv_cand *cand = iv_cand (data, i);
4282
4283       determine_iv_cost (data, cand);
4284
4285       if (dump_file && (dump_flags & TDF_DETAILS))
4286         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4287     }
4288
4289 if (dump_file && (dump_flags & TDF_DETAILS))
4290       fprintf (dump_file, "\n");
4291 }
4292
4293 /* Calculates cost for having SIZE induction variables.  */
4294
4295 static unsigned
4296 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4297 {
4298   return global_cost_for_size (size,
4299                                loop_data (data->current_loop)->regs_used,
4300                                n_iv_uses (data));
4301 }
4302
4303 /* For each size of the induction variable set determine the penalty.  */
4304
4305 static void
4306 determine_set_costs (struct ivopts_data *data)
4307 {
4308   unsigned j, n;
4309   tree phi, op;
4310   struct loop *loop = data->current_loop;
4311   bitmap_iterator bi;
4312
4313   /* We use the following model (definitely improvable, especially the
4314      cost function -- TODO):
4315
4316      We estimate the number of registers available (using MD data), name it A.
4317
4318      We estimate the number of registers used by the loop, name it U.  This
4319      number is obtained as the number of loop phi nodes (not counting virtual
4320      registers and bivs) + the number of variables from outside of the loop.
4321
4322      We set a reserve R (free regs that are used for temporary computations,
4323      etc.).  For now the reserve is a constant 3.
4324
4325      Let I be the number of induction variables.
4326
4327      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4328         make a lot of ivs without a reason).
4329      -- if A - R < U + I <= A, the cost is I * PRES_COST
4330      -- if U + I > A, the cost is I * PRES_COST and
4331         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4332
4333   if (dump_file && (dump_flags & TDF_DETAILS))
4334     {
4335       fprintf (dump_file, "Global costs:\n");
4336       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4337       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4338       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4339       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4340     }
4341
4342   n = 0;
4343   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4344     {
4345       op = PHI_RESULT (phi);
4346
4347       if (!is_gimple_reg (op))
4348         continue;
4349
4350       if (get_iv (data, op))
4351         continue;
4352
4353       n++;
4354     }
4355
4356   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4357     {
4358       struct version_info *info = ver_info (data, j);
4359
4360       if (info->inv_id && info->has_nonlin_use)
4361         n++;
4362     }
4363
4364   loop_data (loop)->regs_used = n;
4365   if (dump_file && (dump_flags & TDF_DETAILS))
4366     fprintf (dump_file, "  regs_used %d\n", n);
4367
4368   if (dump_file && (dump_flags & TDF_DETAILS))
4369     {
4370       fprintf (dump_file, "  cost for size:\n");
4371       fprintf (dump_file, "  ivs\tcost\n");
4372       for (j = 0; j <= 2 * target_avail_regs; j++)
4373         fprintf (dump_file, "  %d\t%d\n", j,
4374                  ivopts_global_cost_for_size (data, j));
4375       fprintf (dump_file, "\n");
4376     }
4377 }
4378
4379 /* Returns true if A is a cheaper cost pair than B.  */
4380
4381 static bool
4382 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4383 {
4384   if (!a)
4385     return false;
4386
4387   if (!b)
4388     return true;
4389
4390   if (a->cost < b->cost)
4391     return true;
4392
4393   if (a->cost > b->cost)
4394     return false;
4395
4396   /* In case the costs are the same, prefer the cheaper candidate.  */
4397   if (a->cand->cost < b->cand->cost)
4398     return true;
4399
4400   return false;
4401 }
4402
4403 /* Computes the cost field of IVS structure.  */
4404
4405 static void
4406 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4407 {
4408   unsigned cost = 0;
4409
4410   cost += ivs->cand_use_cost;
4411   cost += ivs->cand_cost;
4412   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4413
4414   ivs->cost = cost;
4415 }
4416
4417 /* Remove invariants in set INVS to set IVS.  */
4418
4419 static void
4420 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4421 {
4422   bitmap_iterator bi;
4423   unsigned iid;
4424
4425   if (!invs)
4426     return;
4427
4428   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4429     {
4430       ivs->n_invariant_uses[iid]--;
4431       if (ivs->n_invariant_uses[iid] == 0)
4432         ivs->n_regs--;
4433     }
4434 }
4435
4436 /* Set USE not to be expressed by any candidate in IVS.  */
4437
4438 static void
4439 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4440                  struct iv_use *use)
4441 {
4442   unsigned uid = use->id, cid;
4443   struct cost_pair *cp;
4444
4445   cp = ivs->cand_for_use[uid];
4446   if (!cp)
4447     return;
4448   cid = cp->cand->id;
4449
4450   ivs->bad_uses++;
4451   ivs->cand_for_use[uid] = NULL;
4452   ivs->n_cand_uses[cid]--;
4453
4454   if (ivs->n_cand_uses[cid] == 0)
4455     {
4456       bitmap_clear_bit (ivs->cands, cid);
4457       /* Do not count the pseudocandidates.  */
4458       if (cp->cand->iv)
4459         ivs->n_regs--;
4460       ivs->n_cands--;
4461       ivs->cand_cost -= cp->cand->cost;
4462
4463       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4464     }
4465
4466   ivs->cand_use_cost -= cp->cost;
4467
4468   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4469   iv_ca_recount_cost (data, ivs);
4470 }
4471
4472 /* Add invariants in set INVS to set IVS.  */
4473
4474 static void
4475 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4476 {
4477   bitmap_iterator bi;
4478   unsigned iid;
4479
4480   if (!invs)
4481     return;
4482
4483   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4484     {
4485       ivs->n_invariant_uses[iid]++;
4486       if (ivs->n_invariant_uses[iid] == 1)
4487         ivs->n_regs++;
4488     }
4489 }
4490
4491 /* Set cost pair for USE in set IVS to CP.  */
4492
4493 static void
4494 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4495               struct iv_use *use, struct cost_pair *cp)
4496 {
4497   unsigned uid = use->id, cid;
4498
4499   if (ivs->cand_for_use[uid] == cp)
4500     return;
4501
4502   if (ivs->cand_for_use[uid])
4503     iv_ca_set_no_cp (data, ivs, use);
4504
4505   if (cp)
4506     {
4507       cid = cp->cand->id;
4508
4509       ivs->bad_uses--;
4510       ivs->cand_for_use[uid] = cp;
4511       ivs->n_cand_uses[cid]++;
4512       if (ivs->n_cand_uses[cid] == 1)
4513         {
4514           bitmap_set_bit (ivs->cands, cid);
4515           /* Do not count the pseudocandidates.  */
4516           if (cp->cand->iv)
4517             ivs->n_regs++;
4518           ivs->n_cands++;
4519           ivs->cand_cost += cp->cand->cost;
4520
4521           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4522         }
4523
4524       ivs->cand_use_cost += cp->cost;
4525       iv_ca_set_add_invariants (ivs, cp->depends_on);
4526       iv_ca_recount_cost (data, ivs);
4527     }
4528 }
4529
4530 /* Extend set IVS by expressing USE by some of the candidates in it
4531    if possible.  */
4532
4533 static void
4534 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4535                struct iv_use *use)
4536 {
4537   struct cost_pair *best_cp = NULL, *cp;
4538   bitmap_iterator bi;
4539   unsigned i;
4540
4541   gcc_assert (ivs->upto >= use->id);
4542
4543   if (ivs->upto == use->id)
4544     {
4545       ivs->upto++;
4546       ivs->bad_uses++;
4547     }
4548
4549   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4550     {
4551       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4552
4553       if (cheaper_cost_pair (cp, best_cp))
4554         best_cp = cp;
4555     }
4556
4557   iv_ca_set_cp (data, ivs, use, best_cp);
4558 }
4559
4560 /* Get cost for assignment IVS.  */
4561
4562 static unsigned
4563 iv_ca_cost (struct iv_ca *ivs)
4564 {
4565   return (ivs->bad_uses ? INFTY : ivs->cost);
4566 }
4567
4568 /* Returns true if all dependences of CP are among invariants in IVS.  */
4569
4570 static bool
4571 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4572 {
4573   unsigned i;
4574   bitmap_iterator bi;
4575
4576   if (!cp->depends_on)
4577     return true;
4578
4579   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4580     {
4581       if (ivs->n_invariant_uses[i] == 0)
4582         return false;
4583     }
4584
4585   return true;
4586 }
4587
4588 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4589    it before NEXT_CHANGE.  */
4590
4591 static struct iv_ca_delta *
4592 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4593                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4594 {
4595   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4596
4597   change->use = use;
4598   change->old_cp = old_cp;
4599   change->new_cp = new_cp;
4600   change->next_change = next_change;
4601
4602   return change;
4603 }
4604
4605 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4606    are rewritten.  */
4607
4608 static struct iv_ca_delta *
4609 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4610 {
4611   struct iv_ca_delta *last;
4612
4613   if (!l2)
4614     return l1;
4615
4616   if (!l1)
4617     return l2;
4618
4619   for (last = l1; last->next_change; last = last->next_change)
4620     continue;
4621   last->next_change = l2;
4622
4623   return l1;
4624 }
4625
4626 /* Returns candidate by that USE is expressed in IVS.  */
4627
4628 static struct cost_pair *
4629 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4630 {
4631   return ivs->cand_for_use[use->id];
4632 }
4633
4634 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4635
4636 static struct iv_ca_delta *
4637 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4638 {
4639   struct iv_ca_delta *act, *next, *prev = NULL;
4640   struct cost_pair *tmp;
4641
4642   for (act = delta; act; act = next)
4643     {
4644       next = act->next_change;
4645       act->next_change = prev;
4646       prev = act;
4647
4648       tmp = act->old_cp;
4649       act->old_cp = act->new_cp;
4650       act->new_cp = tmp;
4651     }
4652
4653   return prev;
4654 }
4655
4656 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4657    reverted instead.  */
4658
4659 static void
4660 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4661                     struct iv_ca_delta *delta, bool forward)
4662 {
4663   struct cost_pair *from, *to;
4664   struct iv_ca_delta *act;
4665
4666   if (!forward)
4667     delta = iv_ca_delta_reverse (delta);
4668
4669   for (act = delta; act; act = act->next_change)
4670     {
4671       from = act->old_cp;
4672       to = act->new_cp;
4673       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4674       iv_ca_set_cp (data, ivs, act->use, to);
4675     }
4676
4677   if (!forward)
4678     iv_ca_delta_reverse (delta);
4679 }
4680
4681 /* Returns true if CAND is used in IVS.  */
4682
4683 static bool
4684 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4685 {
4686   return ivs->n_cand_uses[cand->id] > 0;
4687 }
4688
4689 /* Returns number of induction variable candidates in the set IVS.  */
4690
4691 static unsigned
4692 iv_ca_n_cands (struct iv_ca *ivs)
4693 {
4694   return ivs->n_cands;
4695 }
4696
4697 /* Free the list of changes DELTA.  */
4698
4699 static void
4700 iv_ca_delta_free (struct iv_ca_delta **delta)
4701 {
4702   struct iv_ca_delta *act, *next;
4703
4704   for (act = *delta; act; act = next)
4705     {
4706       next = act->next_change;
4707       free (act);
4708     }
4709
4710   *delta = NULL;
4711 }
4712
4713 /* Allocates new iv candidates assignment.  */
4714
4715 static struct iv_ca *
4716 iv_ca_new (struct ivopts_data *data)
4717 {
4718   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4719
4720   nw->upto = 0;
4721   nw->bad_uses = 0;
4722   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4723   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4724   nw->cands = BITMAP_ALLOC (NULL);
4725   nw->n_cands = 0;
4726   nw->n_regs = 0;
4727   nw->cand_use_cost = 0;
4728   nw->cand_cost = 0;
4729   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4730   nw->cost = 0;
4731
4732   return nw;
4733 }
4734
4735 /* Free memory occupied by the set IVS.  */
4736
4737 static void
4738 iv_ca_free (struct iv_ca **ivs)
4739 {
4740   free ((*ivs)->cand_for_use);
4741   free ((*ivs)->n_cand_uses);
4742   BITMAP_FREE ((*ivs)->cands);
4743   free ((*ivs)->n_invariant_uses);
4744   free (*ivs);
4745   *ivs = NULL;
4746 }
4747
4748 /* Dumps IVS to FILE.  */
4749
4750 static void
4751 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4752 {
4753   const char *pref = "  invariants ";
4754   unsigned i;
4755
4756   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4757   bitmap_print (file, ivs->cands, "  candidates ","\n");
4758
4759   for (i = 1; i <= data->max_inv_id; i++)
4760     if (ivs->n_invariant_uses[i])
4761       {
4762         fprintf (file, "%s%d", pref, i);
4763         pref = ", ";
4764       }
4765   fprintf (file, "\n");
4766 }
4767
4768 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4769    new set, and store differences in DELTA.  Number of induction variables
4770    in the new set is stored to N_IVS.  */
4771
4772 static unsigned
4773 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4774               struct iv_cand *cand, struct iv_ca_delta **delta,
4775               unsigned *n_ivs)
4776 {
4777   unsigned i, cost;
4778   struct iv_use *use;
4779   struct cost_pair *old_cp, *new_cp;
4780
4781   *delta = NULL;
4782   for (i = 0; i < ivs->upto; i++)
4783     {
4784       use = iv_use (data, i);
4785       old_cp = iv_ca_cand_for_use (ivs, use);
4786
4787       if (old_cp
4788           && old_cp->cand == cand)
4789         continue;
4790
4791       new_cp = get_use_iv_cost (data, use, cand);
4792       if (!new_cp)
4793         continue;
4794
4795       if (!iv_ca_has_deps (ivs, new_cp))
4796         continue;
4797
4798       if (!cheaper_cost_pair (new_cp, old_cp))
4799         continue;
4800
4801       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4802     }
4803
4804   iv_ca_delta_commit (data, ivs, *delta, true);
4805   cost = iv_ca_cost (ivs);
4806   if (n_ivs)
4807     *n_ivs = iv_ca_n_cands (ivs);
4808   iv_ca_delta_commit (data, ivs, *delta, false);
4809
4810   return cost;
4811 }
4812
4813 /* Try narrowing set IVS by removing CAND.  Return the cost of
4814    the new set and store the differences in DELTA.  */
4815
4816 static unsigned
4817 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4818               struct iv_cand *cand, struct iv_ca_delta **delta)
4819 {
4820   unsigned i, ci;
4821   struct iv_use *use;
4822   struct cost_pair *old_cp, *new_cp, *cp;
4823   bitmap_iterator bi;
4824   struct iv_cand *cnd;
4825   unsigned cost;
4826
4827   *delta = NULL;
4828   for (i = 0; i < n_iv_uses (data); i++)
4829     {
4830       use = iv_use (data, i);
4831
4832       old_cp = iv_ca_cand_for_use (ivs, use);
4833       if (old_cp->cand != cand)
4834         continue;
4835
4836       new_cp = NULL;
4837
4838       if (data->consider_all_candidates)
4839         {
4840           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4841             {
4842               if (ci == cand->id)
4843                 continue;
4844
4845               cnd = iv_cand (data, ci);
4846
4847               cp = get_use_iv_cost (data, use, cnd);
4848               if (!cp)
4849                 continue;
4850               if (!iv_ca_has_deps (ivs, cp))
4851                 continue;
4852
4853               if (!cheaper_cost_pair (cp, new_cp))
4854                 continue;
4855
4856               new_cp = cp;
4857             }
4858         }
4859       else
4860         {
4861           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4862             {
4863               if (ci == cand->id)
4864                 continue;
4865
4866               cnd = iv_cand (data, ci);
4867
4868               cp = get_use_iv_cost (data, use, cnd);
4869               if (!cp)
4870                 continue;
4871               if (!iv_ca_has_deps (ivs, cp))
4872                 continue;
4873
4874               if (!cheaper_cost_pair (cp, new_cp))
4875                 continue;
4876
4877               new_cp = cp;
4878             }
4879         }
4880
4881       if (!new_cp)
4882         {
4883           iv_ca_delta_free (delta);
4884           return INFTY;
4885         }
4886
4887       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4888     }
4889
4890   iv_ca_delta_commit (data, ivs, *delta, true);
4891   cost = iv_ca_cost (ivs);
4892   iv_ca_delta_commit (data, ivs, *delta, false);
4893
4894   return cost;
4895 }
4896
4897 /* Try optimizing the set of candidates IVS by removing candidates different
4898    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4899    differences in DELTA.  */
4900
4901 static unsigned
4902 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4903              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4904 {
4905   bitmap_iterator bi;
4906   struct iv_ca_delta *act_delta, *best_delta;
4907   unsigned i, best_cost, acost;
4908   struct iv_cand *cand;
4909
4910   best_delta = NULL;
4911   best_cost = iv_ca_cost (ivs);
4912
4913   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4914     {
4915       cand = iv_cand (data, i);
4916
4917       if (cand == except_cand)
4918         continue;
4919
4920       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4921
4922       if (acost < best_cost)
4923         {
4924           best_cost = acost;
4925           iv_ca_delta_free (&best_delta);
4926           best_delta = act_delta;
4927         }
4928       else
4929         iv_ca_delta_free (&act_delta);
4930     }
4931
4932   if (!best_delta)
4933     {
4934       *delta = NULL;
4935       return best_cost;
4936     }
4937
4938   /* Recurse to possibly remove other unnecessary ivs.  */
4939   iv_ca_delta_commit (data, ivs, best_delta, true);
4940   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4941   iv_ca_delta_commit (data, ivs, best_delta, false);
4942   *delta = iv_ca_delta_join (best_delta, *delta);
4943   return best_cost;
4944 }
4945
4946 /* Tries to extend the sets IVS in the best possible way in order
4947    to express the USE.  */
4948
4949 static bool
4950 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4951                   struct iv_use *use)
4952 {
4953   unsigned best_cost, act_cost;
4954   unsigned i;
4955   bitmap_iterator bi;
4956   struct iv_cand *cand;
4957   struct iv_ca_delta *best_delta = NULL, *act_delta;
4958   struct cost_pair *cp;
4959
4960   iv_ca_add_use (data, ivs, use);
4961   best_cost = iv_ca_cost (ivs);
4962
4963   cp = iv_ca_cand_for_use (ivs, use);
4964   if (cp)
4965     {
4966       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4967       iv_ca_set_no_cp (data, ivs, use);
4968     }
4969
4970   /* First try important candidates.  Only if it fails, try the specific ones.
4971      Rationale -- in loops with many variables the best choice often is to use
4972      just one generic biv.  If we added here many ivs specific to the uses,
4973      the optimization algorithm later would be likely to get stuck in a local
4974      minimum, thus causing us to create too many ivs.  The approach from
4975      few ivs to more seems more likely to be successful -- starting from few
4976      ivs, replacing an expensive use by a specific iv should always be a
4977      win.  */
4978   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4979     {
4980       cand = iv_cand (data, i);
4981
4982       if (iv_ca_cand_used_p (ivs, cand))
4983         continue;
4984
4985       cp = get_use_iv_cost (data, use, cand);
4986       if (!cp)
4987         continue;
4988
4989       iv_ca_set_cp (data, ivs, use, cp);
4990       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4991       iv_ca_set_no_cp (data, ivs, use);
4992       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4993
4994       if (act_cost < best_cost)
4995         {
4996           best_cost = act_cost;
4997
4998           iv_ca_delta_free (&best_delta);
4999           best_delta = act_delta;
5000         }
5001       else
5002         iv_ca_delta_free (&act_delta);
5003     }
5004
5005   if (best_cost == INFTY)
5006     {
5007       for (i = 0; i < use->n_map_members; i++)
5008         {
5009           cp = use->cost_map + i;
5010           cand = cp->cand;
5011           if (!cand)
5012             continue;
5013
5014           /* Already tried this.  */
5015           if (cand->important)
5016             continue;
5017
5018           if (iv_ca_cand_used_p (ivs, cand))
5019             continue;
5020
5021           act_delta = NULL;
5022           iv_ca_set_cp (data, ivs, use, cp);
5023           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5024           iv_ca_set_no_cp (data, ivs, use);
5025           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5026                                        cp, act_delta);
5027
5028           if (act_cost < best_cost)
5029             {
5030               best_cost = act_cost;
5031
5032               if (best_delta)
5033                 iv_ca_delta_free (&best_delta);
5034               best_delta = act_delta;
5035             }
5036           else
5037             iv_ca_delta_free (&act_delta);
5038         }
5039     }
5040
5041   iv_ca_delta_commit (data, ivs, best_delta, true);
5042   iv_ca_delta_free (&best_delta);
5043
5044   return (best_cost != INFTY);
5045 }
5046
5047 /* Finds an initial assignment of candidates to uses.  */
5048
5049 static struct iv_ca *
5050 get_initial_solution (struct ivopts_data *data)
5051 {
5052   struct iv_ca *ivs = iv_ca_new (data);
5053   unsigned i;
5054
5055   for (i = 0; i < n_iv_uses (data); i++)
5056     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5057       {
5058         iv_ca_free (&ivs);
5059         return NULL;
5060       }
5061
5062   return ivs;
5063 }
5064
5065 /* Tries to improve set of induction variables IVS.  */
5066
5067 static bool
5068 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5069 {
5070   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5071   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5072   struct iv_cand *cand;
5073
5074   /* Try extending the set of induction variables by one.  */
5075   for (i = 0; i < n_iv_cands (data); i++)
5076     {
5077       cand = iv_cand (data, i);
5078
5079       if (iv_ca_cand_used_p (ivs, cand))
5080         continue;
5081
5082       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5083       if (!act_delta)
5084         continue;
5085
5086       /* If we successfully added the candidate and the set is small enough,
5087          try optimizing it by removing other candidates.  */
5088       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5089         {
5090           iv_ca_delta_commit (data, ivs, act_delta, true);
5091           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5092           iv_ca_delta_commit (data, ivs, act_delta, false);
5093           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5094         }
5095
5096       if (acost < best_cost)
5097         {
5098           best_cost = acost;
5099           iv_ca_delta_free (&best_delta);
5100           best_delta = act_delta;
5101         }
5102       else
5103         iv_ca_delta_free (&act_delta);
5104     }
5105
5106   if (!best_delta)
5107     {
5108       /* Try removing the candidates from the set instead.  */
5109       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5110
5111       /* Nothing more we can do.  */
5112       if (!best_delta)
5113         return false;
5114     }
5115
5116   iv_ca_delta_commit (data, ivs, best_delta, true);
5117   gcc_assert (best_cost == iv_ca_cost (ivs));
5118   iv_ca_delta_free (&best_delta);
5119   return true;
5120 }
5121
5122 /* Attempts to find the optimal set of induction variables.  We do simple
5123    greedy heuristic -- we try to replace at most one candidate in the selected
5124    solution and remove the unused ivs while this improves the cost.  */
5125
5126 static struct iv_ca *
5127 find_optimal_iv_set (struct ivopts_data *data)
5128 {
5129   unsigned i;
5130   struct iv_ca *set;
5131   struct iv_use *use;
5132
5133   /* Get the initial solution.  */
5134   set = get_initial_solution (data);
5135   if (!set)
5136     {
5137       if (dump_file && (dump_flags & TDF_DETAILS))
5138         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5139       return NULL;
5140     }
5141
5142   if (dump_file && (dump_flags & TDF_DETAILS))
5143     {
5144       fprintf (dump_file, "Initial set of candidates:\n");
5145       iv_ca_dump (data, dump_file, set);
5146     }
5147
5148   while (try_improve_iv_set (data, set))
5149     {
5150       if (dump_file && (dump_flags & TDF_DETAILS))
5151         {
5152           fprintf (dump_file, "Improved to:\n");
5153           iv_ca_dump (data, dump_file, set);
5154         }
5155     }
5156
5157   if (dump_file && (dump_flags & TDF_DETAILS))
5158     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5159
5160   for (i = 0; i < n_iv_uses (data); i++)
5161     {
5162       use = iv_use (data, i);
5163       use->selected = iv_ca_cand_for_use (set, use)->cand;
5164     }
5165
5166   return set;
5167 }
5168
5169 /* Creates a new induction variable corresponding to CAND.  */
5170
5171 static void
5172 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5173 {
5174   block_stmt_iterator incr_pos;
5175   tree base;
5176   bool after = false;
5177
5178   if (!cand->iv)
5179     return;
5180
5181   switch (cand->pos)
5182     {
5183     case IP_NORMAL:
5184       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5185       break;
5186
5187     case IP_END:
5188       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5189       after = true;
5190       break;
5191
5192     case IP_ORIGINAL:
5193       /* Mark that the iv is preserved.  */
5194       name_info (data, cand->var_before)->preserve_biv = true;
5195       name_info (data, cand->var_after)->preserve_biv = true;
5196
5197       /* Rewrite the increment so that it uses var_before directly.  */
5198       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5199
5200       return;
5201     }
5202
5203   gimple_add_tmp_var (cand->var_before);
5204   add_referenced_tmp_var (cand->var_before);
5205
5206   base = unshare_expr (cand->iv->base);
5207
5208   create_iv (base, unshare_expr (cand->iv->step),
5209              cand->var_before, data->current_loop,
5210              &incr_pos, after, &cand->var_before, &cand->var_after);
5211 }
5212
5213 /* Creates new induction variables described in SET.  */
5214
5215 static void
5216 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5217 {
5218   unsigned i;
5219   struct iv_cand *cand;
5220   bitmap_iterator bi;
5221
5222   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5223     {
5224       cand = iv_cand (data, i);
5225       create_new_iv (data, cand);
5226     }
5227 }
5228
5229 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5230    is true, remove also the ssa name defined by the statement.  */
5231
5232 static void
5233 remove_statement (tree stmt, bool including_defined_name)
5234 {
5235   if (TREE_CODE (stmt) == PHI_NODE)
5236     {
5237       if (!including_defined_name)
5238         {
5239           /* Prevent the ssa name defined by the statement from being removed.  */
5240           SET_PHI_RESULT (stmt, NULL);
5241         }
5242       remove_phi_node (stmt, NULL_TREE);
5243     }
5244   else
5245     {
5246       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5247
5248       bsi_remove (&bsi);
5249     }
5250 }
5251
5252 /* Rewrites USE (definition of iv used in a nonlinear expression)
5253    using candidate CAND.  */
5254
5255 static void
5256 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5257                             struct iv_use *use, struct iv_cand *cand)
5258 {
5259   tree comp;
5260   tree op, stmts, tgt, ass;
5261   block_stmt_iterator bsi, pbsi;
5262
5263   /* An important special case -- if we are asked to express value of
5264      the original iv by itself, just exit; there is no need to
5265      introduce a new computation (that might also need casting the
5266      variable to unsigned and back).  */
5267   if (cand->pos == IP_ORIGINAL
5268       && TREE_CODE (use->stmt) == MODIFY_EXPR
5269       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5270     {
5271       op = TREE_OPERAND (use->stmt, 1);
5272
5273       /* Be a bit careful.  In case variable is expressed in some
5274          complicated way, rewrite it so that we may get rid of this
5275          complicated expression.  */
5276       if ((TREE_CODE (op) == PLUS_EXPR
5277            || TREE_CODE (op) == MINUS_EXPR)
5278           && TREE_OPERAND (op, 0) == cand->var_before
5279           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5280         return;
5281     }
5282
5283   comp = unshare_expr (get_computation (data->current_loop,
5284                                         use, cand));
5285   switch (TREE_CODE (use->stmt))
5286     {
5287     case PHI_NODE:
5288       tgt = PHI_RESULT (use->stmt);
5289
5290       /* If we should keep the biv, do not replace it.  */
5291       if (name_info (data, tgt)->preserve_biv)
5292         return;
5293
5294       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5295       while (!bsi_end_p (pbsi)
5296              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5297         {
5298           bsi = pbsi;
5299           bsi_next (&pbsi);
5300         }
5301       break;
5302
5303     case MODIFY_EXPR:
5304       tgt = TREE_OPERAND (use->stmt, 0);
5305       bsi = bsi_for_stmt (use->stmt);
5306       break;
5307
5308     default:
5309       gcc_unreachable ();
5310     }
5311
5312   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5313
5314   if (TREE_CODE (use->stmt) == PHI_NODE)
5315     {
5316       if (stmts)
5317         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5318       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5319       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5320       remove_statement (use->stmt, false);
5321       SSA_NAME_DEF_STMT (tgt) = ass;
5322     }
5323   else
5324     {
5325       if (stmts)
5326         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5327       TREE_OPERAND (use->stmt, 1) = op;
5328     }
5329 }
5330
5331 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5332    for_each_index.  */
5333
5334 static bool
5335 idx_remove_ssa_names (tree base, tree *idx,
5336                       void *data ATTRIBUTE_UNUSED)
5337 {
5338   tree *op;
5339
5340   if (TREE_CODE (*idx) == SSA_NAME)
5341     *idx = SSA_NAME_VAR (*idx);
5342
5343   if (TREE_CODE (base) == ARRAY_REF)
5344     {
5345       op = &TREE_OPERAND (base, 2);
5346       if (*op
5347           && TREE_CODE (*op) == SSA_NAME)
5348         *op = SSA_NAME_VAR (*op);
5349       op = &TREE_OPERAND (base, 3);
5350       if (*op
5351           && TREE_CODE (*op) == SSA_NAME)
5352         *op = SSA_NAME_VAR (*op);
5353     }
5354
5355   return true;
5356 }
5357
5358 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5359
5360 static tree
5361 unshare_and_remove_ssa_names (tree ref)
5362 {
5363   ref = unshare_expr (ref);
5364   for_each_index (&ref, idx_remove_ssa_names, NULL);
5365
5366   return ref;
5367 }
5368
5369 /* Rewrites base of memory access OP with expression WITH in statement
5370    pointed to by BSI.  */
5371
5372 static void
5373 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5374 {
5375   tree bvar, var, new_name, copy, name;
5376   tree orig;
5377
5378   var = bvar = get_base_address (*op);
5379
5380   if (!var || TREE_CODE (with) != SSA_NAME)
5381     goto do_rewrite;
5382
5383   gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5384   gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5385   if (TREE_CODE (var) == INDIRECT_REF)
5386     var = TREE_OPERAND (var, 0);
5387   if (TREE_CODE (var) == SSA_NAME)
5388     {
5389       name = var;
5390       var = SSA_NAME_VAR (var);
5391     }
5392   else if (DECL_P (var))
5393     name = NULL_TREE;
5394   else
5395     goto do_rewrite;
5396
5397   /* We need to add a memory tag for the variable.  But we do not want
5398      to add it to the temporary used for the computations, since this leads
5399      to problems in redundancy elimination when there are common parts
5400      in two computations referring to the different arrays.  So we copy
5401      the variable to a new temporary.  */
5402   copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
5403
5404   if (name)
5405     new_name = duplicate_ssa_name (name, copy);
5406   else
5407     {
5408       tree tag = var_ann (var)->type_mem_tag;
5409       tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5410       add_referenced_tmp_var (new_ptr);
5411       if (tag)
5412         var_ann (new_ptr)->type_mem_tag = tag;
5413       else
5414         add_type_alias (new_ptr, var);
5415       new_name = make_ssa_name (new_ptr, copy);
5416     }
5417
5418   TREE_OPERAND (copy, 0) = new_name;
5419   update_stmt (copy);
5420   bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5421   with = new_name;
5422
5423 do_rewrite:
5424
5425   orig = NULL_TREE;
5426   gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5427   gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5428
5429   if (TREE_CODE (*op) == INDIRECT_REF)
5430     orig = REF_ORIGINAL (*op);
5431   if (!orig)
5432     orig = unshare_and_remove_ssa_names (*op);
5433
5434   *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
5435
5436   /* Record the original reference, for purposes of alias analysis.  */
5437   REF_ORIGINAL (*op) = orig;
5438
5439   /* Virtual operands in the original statement may have to be renamed
5440      because of the replacement.  */
5441   mark_new_vars_to_rename (bsi_stmt (*bsi));
5442 }
5443
5444 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5445
5446 static void
5447 rewrite_use_address (struct ivopts_data *data,
5448                      struct iv_use *use, struct iv_cand *cand)
5449 {
5450   tree comp = unshare_expr (get_computation (data->current_loop,
5451                                              use, cand));
5452   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5453   tree stmts;
5454   tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5455
5456   if (stmts)
5457     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5458
5459   rewrite_address_base (&bsi, use->op_p, op);
5460 }
5461
5462 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5463    candidate CAND.  */
5464
5465 static void
5466 rewrite_use_compare (struct ivopts_data *data,
5467                      struct iv_use *use, struct iv_cand *cand)
5468 {
5469   tree comp;
5470   tree *op_p, cond, op, stmts, bound;
5471   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5472   enum tree_code compare;
5473   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5474
5475   bound = cp->value;
5476   if (bound)
5477     {
5478       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5479       tree var_type = TREE_TYPE (var);
5480
5481       compare = iv_elimination_compare (data, use);
5482       bound = fold_convert (var_type, bound);
5483       op = force_gimple_operand (unshare_expr (bound), &stmts,
5484                                  true, NULL_TREE);
5485
5486       if (stmts)
5487         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5488
5489       *use->op_p = build2 (compare, boolean_type_node, var, op);
5490       update_stmt (use->stmt);
5491       return;
5492     }
5493
5494   /* The induction variable elimination failed; just express the original
5495      giv.  */
5496   comp = unshare_expr (get_computation (data->current_loop, use, cand));
5497
5498   cond = *use->op_p;
5499   op_p = &TREE_OPERAND (cond, 0);
5500   if (TREE_CODE (*op_p) != SSA_NAME
5501       || zero_p (get_iv (data, *op_p)->step))
5502     op_p = &TREE_OPERAND (cond, 1);
5503
5504   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5505   if (stmts)
5506     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5507
5508   *op_p = op;
5509 }
5510
5511 /* Ensure that operand *OP_P may be used at the end of EXIT without
5512    violating loop closed ssa form.  */
5513
5514 static void
5515 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5516 {
5517   basic_block def_bb;
5518   struct loop *def_loop;
5519   tree phi, use;
5520
5521   use = USE_FROM_PTR (op_p);
5522   if (TREE_CODE (use) != SSA_NAME)
5523     return;
5524
5525   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5526   if (!def_bb)
5527     return;
5528
5529   def_loop = def_bb->loop_father;
5530   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5531     return;
5532
5533   /* Try finding a phi node that copies the value out of the loop.  */
5534   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5535     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5536       break;
5537
5538   if (!phi)
5539     {
5540       /* Create such a phi node.  */
5541       tree new_name = duplicate_ssa_name (use, NULL);
5542
5543       phi = create_phi_node (new_name, exit->dest);
5544       SSA_NAME_DEF_STMT (new_name) = phi;
5545       add_phi_arg (phi, use, exit);
5546     }
5547
5548   SET_USE (op_p, PHI_RESULT (phi));
5549 }
5550
5551 /* Ensure that operands of STMT may be used at the end of EXIT without
5552    violating loop closed ssa form.  */
5553
5554 static void
5555 protect_loop_closed_ssa_form (edge exit, tree stmt)
5556 {
5557   ssa_op_iter iter;
5558   use_operand_p use_p;
5559
5560   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5561     protect_loop_closed_ssa_form_use (exit, use_p);
5562 }
5563
5564 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5565    so that they are emitted on the correct place, and so that the loop closed
5566    ssa form is preserved.  */
5567
5568 static void
5569 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5570 {
5571   tree_stmt_iterator tsi;
5572   block_stmt_iterator bsi;
5573   tree phi, stmt, def, next;
5574
5575   if (!single_pred_p (exit->dest))
5576     split_loop_exit_edge (exit);
5577
5578   /* Ensure there is label in exit->dest, so that we can
5579      insert after it.  */
5580   tree_block_label (exit->dest);
5581   bsi = bsi_after_labels (exit->dest);
5582
5583   if (TREE_CODE (stmts) == STATEMENT_LIST)
5584     {
5585       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5586         {
5587           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5588           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5589         }
5590     }
5591   else
5592     {
5593       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5594       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5595     }
5596
5597   if (!op)
5598     return;
5599
5600   for (phi = phi_nodes (exit->dest); phi; phi = next)
5601     {
5602       next = PHI_CHAIN (phi);
5603
5604       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5605         {
5606           def = PHI_RESULT (phi);
5607           remove_statement (phi, false);
5608           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5609                         def, op);
5610           SSA_NAME_DEF_STMT (def) = stmt;
5611           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5612         }
5613     }
5614 }
5615
5616 /* Rewrites the final value of USE (that is only needed outside of the loop)
5617    using candidate CAND.  */
5618
5619 static void
5620 rewrite_use_outer (struct ivopts_data *data,
5621                    struct iv_use *use, struct iv_cand *cand)
5622 {
5623   edge exit;
5624   tree value, op, stmts, tgt;
5625   tree phi;
5626
5627   switch (TREE_CODE (use->stmt))
5628     {
5629     case PHI_NODE:
5630       tgt = PHI_RESULT (use->stmt);
5631       break;
5632     case MODIFY_EXPR:
5633       tgt = TREE_OPERAND (use->stmt, 0);
5634       break;
5635     default:
5636       gcc_unreachable ();
5637     }
5638
5639   exit = single_dom_exit (data->current_loop);
5640
5641   if (exit)
5642     {
5643       if (!cand->iv)
5644         {
5645           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5646           value = cp->value;
5647         }
5648       else
5649         value = get_computation_at (data->current_loop,
5650                                     use, cand, last_stmt (exit->src));
5651
5652       value = unshare_expr (value);
5653       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5654
5655       /* If we will preserve the iv anyway and we would need to perform
5656          some computation to replace the final value, do nothing.  */
5657       if (stmts && name_info (data, tgt)->preserve_biv)
5658         return;
5659
5660       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5661         {
5662           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5663
5664           if (USE_FROM_PTR (use_p) == tgt)
5665             SET_USE (use_p, op);
5666         }
5667
5668       if (stmts)
5669         compute_phi_arg_on_exit (exit, stmts, op);
5670
5671       /* Enable removal of the statement.  We cannot remove it directly,
5672          since we may still need the aliasing information attached to the
5673          ssa name defined by it.  */
5674       name_info (data, tgt)->iv->have_use_for = false;
5675       return;
5676     }
5677
5678   /* If the variable is going to be preserved anyway, there is nothing to
5679      do.  */
5680   if (name_info (data, tgt)->preserve_biv)
5681     return;
5682
5683   /* Otherwise we just need to compute the iv.  */
5684   rewrite_use_nonlinear_expr (data, use, cand);
5685 }
5686
5687 /* Rewrites USE using candidate CAND.  */
5688
5689 static void
5690 rewrite_use (struct ivopts_data *data,
5691              struct iv_use *use, struct iv_cand *cand)
5692 {
5693   switch (use->type)
5694     {
5695       case USE_NONLINEAR_EXPR:
5696         rewrite_use_nonlinear_expr (data, use, cand);
5697         break;
5698
5699       case USE_OUTER:
5700         rewrite_use_outer (data, use, cand);
5701         break;
5702
5703       case USE_ADDRESS:
5704         rewrite_use_address (data, use, cand);
5705         break;
5706
5707       case USE_COMPARE:
5708         rewrite_use_compare (data, use, cand);
5709         break;
5710
5711       default:
5712         gcc_unreachable ();
5713     }
5714   update_stmt (use->stmt);
5715 }
5716
5717 /* Rewrite the uses using the selected induction variables.  */
5718
5719 static void
5720 rewrite_uses (struct ivopts_data *data)
5721 {
5722   unsigned i;
5723   struct iv_cand *cand;
5724   struct iv_use *use;
5725
5726   for (i = 0; i < n_iv_uses (data); i++)
5727     {
5728       use = iv_use (data, i);
5729       cand = use->selected;
5730       gcc_assert (cand);
5731
5732       rewrite_use (data, use, cand);
5733     }
5734 }
5735
5736 /* Removes the ivs that are not used after rewriting.  */
5737
5738 static void
5739 remove_unused_ivs (struct ivopts_data *data)
5740 {
5741   unsigned j;
5742   bitmap_iterator bi;
5743
5744   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5745     {
5746       struct version_info *info;
5747
5748       info = ver_info (data, j);
5749       if (info->iv
5750           && !zero_p (info->iv->step)
5751           && !info->inv_id
5752           && !info->iv->have_use_for
5753           && !info->preserve_biv)
5754         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5755     }
5756 }
5757
5758 /* Frees data allocated by the optimization of a single loop.  */
5759
5760 static void
5761 free_loop_data (struct ivopts_data *data)
5762 {
5763   unsigned i, j;
5764   bitmap_iterator bi;
5765   tree obj;
5766
5767   htab_empty (data->niters);
5768
5769   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5770     {
5771       struct version_info *info;
5772
5773       info = ver_info (data, i);
5774       if (info->iv)
5775         free (info->iv);
5776       info->iv = NULL;
5777       info->has_nonlin_use = false;
5778       info->preserve_biv = false;
5779       info->inv_id = 0;
5780     }
5781   bitmap_clear (data->relevant);
5782   bitmap_clear (data->important_candidates);
5783
5784   for (i = 0; i < n_iv_uses (data); i++)
5785     {
5786       struct iv_use *use = iv_use (data, i);
5787
5788       free (use->iv);
5789       BITMAP_FREE (use->related_cands);
5790       for (j = 0; j < use->n_map_members; j++)
5791         if (use->cost_map[j].depends_on)
5792           BITMAP_FREE (use->cost_map[j].depends_on);
5793       free (use->cost_map);
5794       free (use);
5795     }
5796   VEC_truncate (iv_use_p, data->iv_uses, 0);
5797
5798   for (i = 0; i < n_iv_cands (data); i++)
5799     {
5800       struct iv_cand *cand = iv_cand (data, i);
5801
5802       if (cand->iv)
5803         free (cand->iv);
5804       if (cand->depends_on)
5805         BITMAP_FREE (cand->depends_on);
5806       free (cand);
5807     }
5808   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5809
5810   if (data->version_info_size < num_ssa_names)
5811     {
5812       data->version_info_size = 2 * num_ssa_names;
5813       free (data->version_info);
5814       data->version_info = xcalloc (data->version_info_size,
5815                                     sizeof (struct version_info));
5816     }
5817
5818   data->max_inv_id = 0;
5819
5820   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5821     SET_DECL_RTL (obj, NULL_RTX);
5822
5823   VEC_truncate (tree, decl_rtl_to_reset, 0);
5824 }
5825
5826 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5827    loop tree.  */
5828
5829 static void
5830 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5831 {
5832   unsigned i;
5833
5834   for (i = 1; i < loops->num; i++)
5835     if (loops->parray[i])
5836       {
5837         free (loops->parray[i]->aux);
5838         loops->parray[i]->aux = NULL;
5839       }
5840
5841   free_loop_data (data);
5842   free (data->version_info);
5843   BITMAP_FREE (data->relevant);
5844   BITMAP_FREE (data->important_candidates);
5845   htab_delete (data->niters);
5846
5847   VEC_free (tree, heap, decl_rtl_to_reset);
5848   VEC_free (iv_use_p, heap, data->iv_uses);
5849   VEC_free (iv_cand_p, heap, data->iv_candidates);
5850 }
5851
5852 /* Optimizes the LOOP.  Returns true if anything changed.  */
5853
5854 static bool
5855 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5856 {
5857   bool changed = false;
5858   struct iv_ca *iv_ca;
5859   edge exit;
5860
5861   data->current_loop = loop;
5862
5863   if (dump_file && (dump_flags & TDF_DETAILS))
5864     {
5865       fprintf (dump_file, "Processing loop %d\n", loop->num);
5866
5867       exit = single_dom_exit (loop);
5868       if (exit)
5869         {
5870           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5871                    exit->src->index, exit->dest->index);
5872           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5873           fprintf (dump_file, "\n");
5874         }
5875
5876       fprintf (dump_file, "\n");
5877     }
5878
5879   /* For each ssa name determines whether it behaves as an induction variable
5880      in some loop.  */
5881   if (!find_induction_variables (data))
5882     goto finish;
5883
5884   /* Finds interesting uses (item 1).  */
5885   find_interesting_uses (data);
5886   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5887     goto finish;
5888
5889   /* Finds candidates for the induction variables (item 2).  */
5890   find_iv_candidates (data);
5891
5892   /* Calculates the costs (item 3, part 1).  */
5893   determine_use_iv_costs (data);
5894   determine_iv_costs (data);
5895   determine_set_costs (data);
5896
5897   /* Find the optimal set of induction variables (item 3, part 2).  */
5898   iv_ca = find_optimal_iv_set (data);
5899   if (!iv_ca)
5900     goto finish;
5901   changed = true;
5902
5903   /* Create the new induction variables (item 4, part 1).  */
5904   create_new_ivs (data, iv_ca);
5905   iv_ca_free (&iv_ca);
5906
5907   /* Rewrite the uses (item 4, part 2).  */
5908   rewrite_uses (data);
5909
5910   /* Remove the ivs that are unused after rewriting.  */
5911   remove_unused_ivs (data);
5912
5913   /* We have changed the structure of induction variables; it might happen
5914      that definitions in the scev database refer to some of them that were
5915      eliminated.  */
5916   scev_reset ();
5917
5918 finish:
5919   free_loop_data (data);
5920
5921   return changed;
5922 }
5923
5924 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5925
5926 void
5927 tree_ssa_iv_optimize (struct loops *loops)
5928 {
5929   struct loop *loop;
5930   struct ivopts_data data;
5931
5932   tree_ssa_iv_optimize_init (loops, &data);
5933
5934   /* Optimize the loops starting with the innermost ones.  */
5935   loop = loops->tree_root;
5936   while (loop->inner)
5937     loop = loop->inner;
5938
5939   /* Scan the loops, inner ones first.  */
5940   while (loop != loops->tree_root)
5941     {
5942       if (dump_file && (dump_flags & TDF_DETAILS))
5943         flow_loop_dump (loop, dump_file, NULL, 1);
5944
5945       tree_ssa_iv_optimize_loop (&data, loop);
5946
5947       if (loop->next)
5948         {
5949           loop = loop->next;
5950           while (loop->inner)
5951             loop = loop->inner;
5952         }
5953       else
5954         loop = loop->outer;
5955     }
5956
5957   /* FIXME.  IV opts introduces new aliases and call-clobbered
5958      variables, which need to be renamed.  However, when we call the
5959      renamer, not all statements will be scanned for operands.  In
5960      particular, the newly introduced aliases may appear in statements
5961      that are considered "unmodified", so the renamer will not get a
5962      chance to rename those operands.
5963
5964      Work around this problem by forcing an operand re-scan on every
5965      statement.  This will not be necessary once the new operand
5966      scanner is implemented.  */
5967   if (need_ssa_update_p ())
5968     {
5969       basic_block bb;
5970       block_stmt_iterator si;
5971       FOR_EACH_BB (bb)
5972         for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5973           update_stmt (bsi_stmt (si));
5974     }
5975
5976   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5977   tree_ssa_iv_optimize_finalize (loops, &data);
5978 }