gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 static edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter);
 716       *slot = nfe_desc;
 717     }
 718   else
 719     nfe_desc = *slot;
 720
 721   if (!nfe_desc->valid_p)
 722     return NULL;
 723
 724   return &nfe_desc->niter;
 725 }
 726
 727 /* Returns structure describing number of iterations determined from
 728    single dominating exit of DATA->current_loop, or NULL if something
 729    goes wrong.  */
 730
 731 static struct tree_niter_desc *
 732 niter_for_single_dom_exit (struct ivopts_data *data)
 733 {
 734   edge exit = single_dom_exit (data->current_loop);
 735
 736   if (!exit)
 737     return NULL;
 738
 739   return niter_for_exit (data, exit);
 740 }
 741
 742 /* Initializes data structures used by the iv optimization pass, stored
 743    in DATA.  LOOPS is the loop tree.  */
 744
 745 static void
 746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 747 {
 748   unsigned i;
 749
 750   data->version_info_size = 2 * num_ssa_names;
 751   data->version_info = xcalloc (data->version_info_size,
 752                                 sizeof (struct version_info));
 753   data->relevant = BITMAP_ALLOC (NULL);
 754   data->important_candidates = BITMAP_ALLOC (NULL);
 755   data->max_inv_id = 0;
 756   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 757
 758   for (i = 1; i < loops->num; i++)
 759     if (loops->parray[i])
 760       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 761
 762   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 763   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 764   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 765 }
 766
 767 /* Returns a memory object to that EXPR points.  In case we are able to
 768    determine that it does not point to any such object, NULL is returned.  */
 769
 770 static tree
 771 determine_base_object (tree expr)
 772 {
 773   enum tree_code code = TREE_CODE (expr);
 774   tree base, obj, op0, op1;
 775
 776   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 777     return NULL_TREE;
 778
 779   switch (code)
 780     {
 781     case INTEGER_CST:
 782       return NULL_TREE;
 783
 784     case ADDR_EXPR:
 785       obj = TREE_OPERAND (expr, 0);
 786       base = get_base_address (obj);
 787
 788       if (!base)
 789         return expr;
 790
 791       if (TREE_CODE (base) == INDIRECT_REF)
 792         return determine_base_object (TREE_OPERAND (base, 0));
 793
 794       return fold (build1 (ADDR_EXPR, ptr_type_node, base));
 795
 796     case PLUS_EXPR:
 797     case MINUS_EXPR:
 798       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 799       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 800
 801       if (!op1)
 802         return op0;
 803
 804       if (!op0)
 805         return (code == PLUS_EXPR
 806                 ? op1
 807                 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
 808
 809       return fold (build (code, ptr_type_node, op0, op1));
 810
 811     case NOP_EXPR:
 812     case CONVERT_EXPR:
 813       return determine_base_object (TREE_OPERAND (expr, 0));
 814
 815     default:
 816       return fold_convert (ptr_type_node, expr);
 817     }
 818 }
 819
 820 /* Allocates an induction variable with given initial value BASE and step STEP
 821    for loop LOOP.  */
 822
 823 static struct iv *
 824 alloc_iv (tree base, tree step)
 825 {
 826   struct iv *iv = xcalloc (1, sizeof (struct iv));
 827
 828   if (step && integer_zerop (step))
 829     step = NULL_TREE;
 830
 831   iv->base = base;
 832   iv->base_object = determine_base_object (base);
 833   iv->step = step;
 834   iv->biv_p = false;
 835   iv->have_use_for = false;
 836   iv->use_id = 0;
 837   iv->ssa_name = NULL_TREE;
 838
 839   return iv;
 840 }
 841
 842 /* Sets STEP and BASE for induction variable IV.  */
 843
 844 static void
 845 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 846 {
 847   struct version_info *info = name_info (data, iv);
 848
 849   gcc_assert (!info->iv);
 850
 851   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 852   info->iv = alloc_iv (base, step);
 853   info->iv->ssa_name = iv;
 854 }
 855
 856 /* Finds induction variable declaration for VAR.  */
 857
 858 static struct iv *
 859 get_iv (struct ivopts_data *data, tree var)
 860 {
 861   basic_block bb;
 862
 863   if (!name_info (data, var)->iv)
 864     {
 865       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 866
 867       if (!bb
 868           || !flow_bb_inside_loop_p (data->current_loop, bb))
 869         set_iv (data, var, var, NULL_TREE);
 870     }
 871
 872   return name_info (data, var)->iv;
 873 }
 874
 875 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 876    not define a simple affine biv with nonzero step.  */
 877
 878 static tree
 879 determine_biv_step (tree phi)
 880 {
 881   struct loop *loop = bb_for_stmt (phi)->loop_father;
 882   tree name = PHI_RESULT (phi), base, step;
 883
 884   if (!is_gimple_reg (name))
 885     return NULL_TREE;
 886
 887   if (!simple_iv (loop, phi, name, &base, &step, true))
 888     return NULL_TREE;
 889
 890   if (zero_p (step))
 891     return NULL_TREE;
 892
 893   return step;
 894 }
 895
 896 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 897
 898 static bool
 899 abnormal_ssa_name_p (tree exp)
 900 {
 901   if (!exp)
 902     return false;
 903
 904   if (TREE_CODE (exp) != SSA_NAME)
 905     return false;
 906
 907   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 908 }
 909
 910 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 911    abnormal phi node.  Callback for for_each_index.  */
 912
 913 static bool
 914 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 915                                   void *data ATTRIBUTE_UNUSED)
 916 {
 917   if (TREE_CODE (base) == ARRAY_REF)
 918     {
 919       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 920         return false;
 921       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 922         return false;
 923     }
 924
 925   return !abnormal_ssa_name_p (*index);
 926 }
 927
 928 /* Returns true if EXPR contains a ssa name that occurs in an
 929    abnormal phi node.  */
 930
 931 static bool
 932 contains_abnormal_ssa_name_p (tree expr)
 933 {
 934   enum tree_code code;
 935   enum tree_code_class class;
 936
 937   if (!expr)
 938     return false;
 939
 940   code = TREE_CODE (expr);
 941   class = TREE_CODE_CLASS (code);
 942
 943   if (code == SSA_NAME)
 944     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 945
 946   if (code == INTEGER_CST
 947       || is_gimple_min_invariant (expr))
 948     return false;
 949
 950   if (code == ADDR_EXPR)
 951     return !for_each_index (&TREE_OPERAND (expr, 0),
 952                             idx_contains_abnormal_ssa_name_p,
 953                             NULL);
 954
 955   switch (class)
 956     {
 957     case tcc_binary:
 958     case tcc_comparison:
 959       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 960         return true;
 961
 962       /* Fallthru.  */
 963     case tcc_unary:
 964       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 965         return true;
 966
 967       break;
 968
 969     default:
 970       gcc_unreachable ();
 971     }
 972
 973   return false;
 974 }
 975
 976 /* Finds basic ivs.  */
 977
 978 static bool
 979 find_bivs (struct ivopts_data *data)
 980 {
 981   tree phi, step, type, base;
 982   bool found = false;
 983   struct loop *loop = data->current_loop;
 984
 985   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 986     {
 987       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 988         continue;
 989
 990       step = determine_biv_step (phi);
 991       if (!step)
 992         continue;
 993
 994       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 995       if (contains_abnormal_ssa_name_p (base)
 996           || contains_abnormal_ssa_name_p (step))
 997         continue;
 998
 999       type = TREE_TYPE (PHI_RESULT (phi));
1000       base = fold_convert (type, base);
1001       if (step)
1002         step = fold_convert (type, step);
1003
1004       set_iv (data, PHI_RESULT (phi), base, step);
1005       found = true;
1006     }
1007
1008   return found;
1009 }
1010
1011 /* Marks basic ivs.  */
1012
1013 static void
1014 mark_bivs (struct ivopts_data *data)
1015 {
1016   tree phi, var;
1017   struct iv *iv, *incr_iv;
1018   struct loop *loop = data->current_loop;
1019   basic_block incr_bb;
1020
1021   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1022     {
1023       iv = get_iv (data, PHI_RESULT (phi));
1024       if (!iv)
1025         continue;
1026
1027       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1028       incr_iv = get_iv (data, var);
1029       if (!incr_iv)
1030         continue;
1031
1032       /* If the increment is in the subloop, ignore it.  */
1033       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1034       if (incr_bb->loop_father != data->current_loop
1035           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1036         continue;
1037
1038       iv->biv_p = true;
1039       incr_iv->biv_p = true;
1040     }
1041 }
1042
1043 /* Checks whether STMT defines a linear induction variable and stores its
1044    parameters to BASE and STEP.  */
1045
1046 static bool
1047 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1048                         tree *base, tree *step)
1049 {
1050   tree lhs;
1051   struct loop *loop = data->current_loop;
1052
1053   *base = NULL_TREE;
1054   *step = NULL_TREE;
1055
1056   if (TREE_CODE (stmt) != MODIFY_EXPR)
1057     return false;
1058
1059   lhs = TREE_OPERAND (stmt, 0);
1060   if (TREE_CODE (lhs) != SSA_NAME)
1061     return false;
1062
1063   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1064     return false;
1065
1066   if (contains_abnormal_ssa_name_p (*base)
1067       || contains_abnormal_ssa_name_p (*step))
1068     return false;
1069
1070   return true;
1071 }
1072
1073 /* Finds general ivs in statement STMT.  */
1074
1075 static void
1076 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1077 {
1078   tree base, step;
1079
1080   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1081     return;
1082
1083   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1084 }
1085
1086 /* Finds general ivs in basic block BB.  */
1087
1088 static void
1089 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1090 {
1091   block_stmt_iterator bsi;
1092
1093   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1094     find_givs_in_stmt (data, bsi_stmt (bsi));
1095 }
1096
1097 /* Finds general ivs.  */
1098
1099 static void
1100 find_givs (struct ivopts_data *data)
1101 {
1102   struct loop *loop = data->current_loop;
1103   basic_block *body = get_loop_body_in_dom_order (loop);
1104   unsigned i;
1105
1106   for (i = 0; i < loop->num_nodes; i++)
1107     find_givs_in_bb (data, body[i]);
1108   free (body);
1109 }
1110
1111 /* For each ssa name defined in LOOP determines whether it is an induction
1112    variable and if so, its initial value and step.  */
1113
1114 static bool
1115 find_induction_variables (struct ivopts_data *data)
1116 {
1117   unsigned i;
1118   bitmap_iterator bi;
1119
1120   if (!find_bivs (data))
1121     return false;
1122
1123   find_givs (data);
1124   mark_bivs (data);
1125
1126   if (dump_file && (dump_flags & TDF_DETAILS))
1127     {
1128       struct tree_niter_desc *niter;
1129
1130       niter = niter_for_single_dom_exit (data);
1131
1132       if (niter)
1133         {
1134           fprintf (dump_file, "  number of iterations ");
1135           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1136           fprintf (dump_file, "\n");
1137
1138           fprintf (dump_file, "  may be zero if ");
1139           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1140           fprintf (dump_file, "\n");
1141           fprintf (dump_file, "\n");
1142         };
1143
1144       fprintf (dump_file, "Induction variables:\n\n");
1145
1146       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1147         {
1148           if (ver_info (data, i)->iv)
1149             dump_iv (dump_file, ver_info (data, i)->iv);
1150         }
1151     }
1152
1153   return true;
1154 }
1155
1156 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1157
1158 static struct iv_use *
1159 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1160             tree stmt, enum use_type use_type)
1161 {
1162   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1163
1164   use->id = n_iv_uses (data);
1165   use->type = use_type;
1166   use->iv = iv;
1167   use->stmt = stmt;
1168   use->op_p = use_p;
1169   use->related_cands = BITMAP_ALLOC (NULL);
1170
1171   /* To avoid showing ssa name in the dumps, if it was not reset by the
1172      caller.  */
1173   iv->ssa_name = NULL_TREE;
1174
1175   if (dump_file && (dump_flags & TDF_DETAILS))
1176     dump_use (dump_file, use);
1177
1178   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1179
1180   return use;
1181 }
1182
1183 /* Checks whether OP is a loop-level invariant and if so, records it.
1184    NONLINEAR_USE is true if the invariant is used in a way we do not
1185    handle specially.  */
1186
1187 static void
1188 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1189 {
1190   basic_block bb;
1191   struct version_info *info;
1192
1193   if (TREE_CODE (op) != SSA_NAME
1194       || !is_gimple_reg (op))
1195     return;
1196
1197   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1198   if (bb
1199       && flow_bb_inside_loop_p (data->current_loop, bb))
1200     return;
1201
1202   info = name_info (data, op);
1203   info->name = op;
1204   info->has_nonlin_use |= nonlinear_use;
1205   if (!info->inv_id)
1206     info->inv_id = ++data->max_inv_id;
1207   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1208 }
1209
1210 /* Checks whether the use OP is interesting and if so, records it
1211    as TYPE.  */
1212
1213 static struct iv_use *
1214 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1215                                        enum use_type type)
1216 {
1217   struct iv *iv;
1218   struct iv *civ;
1219   tree stmt;
1220   struct iv_use *use;
1221
1222   if (TREE_CODE (op) != SSA_NAME)
1223     return NULL;
1224
1225   iv = get_iv (data, op);
1226   if (!iv)
1227     return NULL;
1228
1229   if (iv->have_use_for)
1230     {
1231       use = iv_use (data, iv->use_id);
1232
1233       gcc_assert (use->type == USE_NONLINEAR_EXPR
1234                   || use->type == USE_OUTER);
1235
1236       if (type == USE_NONLINEAR_EXPR)
1237         use->type = USE_NONLINEAR_EXPR;
1238       return use;
1239     }
1240
1241   if (zero_p (iv->step))
1242     {
1243       record_invariant (data, op, true);
1244       return NULL;
1245     }
1246   iv->have_use_for = true;
1247
1248   civ = xmalloc (sizeof (struct iv));
1249   *civ = *iv;
1250
1251   stmt = SSA_NAME_DEF_STMT (op);
1252   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1253               || TREE_CODE (stmt) == MODIFY_EXPR);
1254
1255   use = record_use (data, NULL, civ, stmt, type);
1256   iv->use_id = use->id;
1257
1258   return use;
1259 }
1260
1261 /* Checks whether the use OP is interesting and if so, records it.  */
1262
1263 static struct iv_use *
1264 find_interesting_uses_op (struct ivopts_data *data, tree op)
1265 {
1266   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1267 }
1268
1269 /* Records a definition of induction variable OP that is used outside of the
1270    loop.  */
1271
1272 static struct iv_use *
1273 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1274 {
1275   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1276 }
1277
1278 /* Checks whether the condition *COND_P in STMT is interesting
1279    and if so, records it.  */
1280
1281 static void
1282 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1283 {
1284   tree *op0_p;
1285   tree *op1_p;
1286   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1287   struct iv const_iv;
1288   tree zero = integer_zero_node;
1289
1290   const_iv.step = NULL_TREE;
1291
1292   if (TREE_CODE (*cond_p) != SSA_NAME
1293       && !COMPARISON_CLASS_P (*cond_p))
1294     return;
1295
1296   if (TREE_CODE (*cond_p) == SSA_NAME)
1297     {
1298       op0_p = cond_p;
1299       op1_p = &zero;
1300     }
1301   else
1302     {
1303       op0_p = &TREE_OPERAND (*cond_p, 0);
1304       op1_p = &TREE_OPERAND (*cond_p, 1);
1305     }
1306
1307   if (TREE_CODE (*op0_p) == SSA_NAME)
1308     iv0 = get_iv (data, *op0_p);
1309   else
1310     iv0 = &const_iv;
1311
1312   if (TREE_CODE (*op1_p) == SSA_NAME)
1313     iv1 = get_iv (data, *op1_p);
1314   else
1315     iv1 = &const_iv;
1316
1317   if (/* When comparing with non-invariant value, we may not do any senseful
1318          induction variable elimination.  */
1319       (!iv0 || !iv1)
1320       /* Eliminating condition based on two ivs would be nontrivial.
1321          ??? TODO -- it is not really important to handle this case.  */
1322       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1323     {
1324       find_interesting_uses_op (data, *op0_p);
1325       find_interesting_uses_op (data, *op1_p);
1326       return;
1327     }
1328
1329   if (zero_p (iv0->step) && zero_p (iv1->step))
1330     {
1331       /* If both are invariants, this is a work for unswitching.  */
1332       return;
1333     }
1334
1335   civ = xmalloc (sizeof (struct iv));
1336   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1337   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1338 }
1339
1340 /* Returns true if expression EXPR is obviously invariant in LOOP,
1341    i.e. if all its operands are defined outside of the LOOP.  */
1342
1343 bool
1344 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1345 {
1346   basic_block def_bb;
1347   unsigned i, len;
1348
1349   if (is_gimple_min_invariant (expr))
1350     return true;
1351
1352   if (TREE_CODE (expr) == SSA_NAME)
1353     {
1354       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1355       if (def_bb
1356           && flow_bb_inside_loop_p (loop, def_bb))
1357         return false;
1358
1359       return true;
1360     }
1361
1362   if (!EXPR_P (expr))
1363     return false;
1364
1365   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1366   for (i = 0; i < len; i++)
1367     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1368       return false;
1369
1370   return true;
1371 }
1372
1373 /* Cumulates the steps of indices into DATA and replaces their values with the
1374    initial ones.  Returns false when the value of the index cannot be determined.
1375    Callback for for_each_index.  */
1376
1377 struct ifs_ivopts_data
1378 {
1379   struct ivopts_data *ivopts_data;
1380   tree stmt;
1381   tree *step_p;
1382 };
1383
1384 static bool
1385 idx_find_step (tree base, tree *idx, void *data)
1386 {
1387   struct ifs_ivopts_data *dta = data;
1388   struct iv *iv;
1389   tree step, type, iv_type, iv_step, lbound, off;
1390   struct loop *loop = dta->ivopts_data->current_loop;
1391
1392   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1393       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1394     return false;
1395
1396   /* If base is a component ref, require that the offset of the reference
1397      be invariant.  */
1398   if (TREE_CODE (base) == COMPONENT_REF)
1399     {
1400       off = component_ref_field_offset (base);
1401       return expr_invariant_in_loop_p (loop, off);
1402     }
1403
1404   /* If base is array, first check whether we will be able to move the
1405      reference out of the loop (in order to take its address in strength
1406      reduction).  In order for this to work we need both lower bound
1407      and step to be loop invariants.  */
1408   if (TREE_CODE (base) == ARRAY_REF)
1409     {
1410       step = array_ref_element_size (base);
1411       lbound = array_ref_low_bound (base);
1412
1413       if (!expr_invariant_in_loop_p (loop, step)
1414           || !expr_invariant_in_loop_p (loop, lbound))
1415         return false;
1416     }
1417
1418   if (TREE_CODE (*idx) != SSA_NAME)
1419     return true;
1420
1421   iv = get_iv (dta->ivopts_data, *idx);
1422   if (!iv)
1423     return false;
1424
1425   *idx = iv->base;
1426
1427   if (!iv->step)
1428     return true;
1429
1430   iv_type = TREE_TYPE (iv->base);
1431   type = build_pointer_type (TREE_TYPE (base));
1432   if (TREE_CODE (base) == ARRAY_REF)
1433     {
1434       step = array_ref_element_size (base);
1435
1436       /* We only handle addresses whose step is an integer constant.  */
1437       if (TREE_CODE (step) != INTEGER_CST)
1438         return false;
1439     }
1440   else
1441     /* The step for pointer arithmetics already is 1 byte.  */
1442     step = build_int_cst (type, 1);
1443
1444   if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1445     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1446                                           type, iv->base, iv->step, dta->stmt);
1447   else
1448     iv_step = fold_convert (iv_type, iv->step);
1449
1450   if (!iv_step)
1451     {
1452       /* The index might wrap.  */
1453       return false;
1454     }
1455
1456   step = fold_build2 (MULT_EXPR, type, step, iv_step);
1457
1458   if (!*dta->step_p)
1459     *dta->step_p = step;
1460   else
1461     *dta->step_p = fold_build2 (PLUS_EXPR, type, *dta->step_p, step);
1462
1463   return true;
1464 }
1465
1466 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1467    object is passed to it in DATA.  */
1468
1469 static bool
1470 idx_record_use (tree base, tree *idx,
1471                 void *data)
1472 {
1473   find_interesting_uses_op (data, *idx);
1474   if (TREE_CODE (base) == ARRAY_REF)
1475     {
1476       find_interesting_uses_op (data, array_ref_element_size (base));
1477       find_interesting_uses_op (data, array_ref_low_bound (base));
1478     }
1479   return true;
1480 }
1481
1482 /* Returns true if memory reference REF may be unaligned.  */
1483
1484 static bool
1485 may_be_unaligned_p (tree ref)
1486 {
1487   tree base;
1488   tree base_type;
1489   HOST_WIDE_INT bitsize;
1490   HOST_WIDE_INT bitpos;
1491   tree toffset;
1492   enum machine_mode mode;
1493   int unsignedp, volatilep;
1494   unsigned base_align;
1495
1496   /* The test below is basically copy of what expr.c:normal_inner_ref
1497      does to check whether the object must be loaded by parts when
1498      STRICT_ALIGNMENT is true.  */
1499   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1500                               &unsignedp, &volatilep, true);
1501   base_type = TREE_TYPE (base);
1502   base_align = TYPE_ALIGN (base_type);
1503
1504   if (mode != BLKmode
1505       && (base_align < GET_MODE_ALIGNMENT (mode)
1506           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1507           || bitpos % BITS_PER_UNIT != 0))
1508     return true;
1509
1510   return false;
1511 }
1512
1513 /* Builds ADDR_EXPR of object OBJ.  If OBJ is an INDIRECT_REF, the indirect_ref
1514    is stripped instead.  */
1515
1516 static tree
1517 build_addr_strip_iref (tree obj)
1518 {
1519   tree type;
1520
1521   if (TREE_CODE (obj) == INDIRECT_REF)
1522     {
1523       type = build_pointer_type (TREE_TYPE (obj));
1524       obj = fold_convert (type, TREE_OPERAND (obj, 0));
1525     }
1526   else
1527     obj = build_addr (obj);
1528
1529   return obj;
1530 }
1531
1532 /* Finds addresses in *OP_P inside STMT.  */
1533
1534 static void
1535 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1536 {
1537   tree base = unshare_expr (*op_p), step = NULL;
1538   struct iv *civ;
1539   struct ifs_ivopts_data ifs_ivopts_data;
1540
1541   /* Do not play with volatile memory references.  A bit too conservative,
1542      perhaps, but safe.  */
1543   if (stmt_ann (stmt)->has_volatile_ops)
1544     goto fail;
1545
1546   /* Ignore bitfields for now.  Not really something terribly complicated
1547      to handle.  TODO.  */
1548   if (TREE_CODE (base) == COMPONENT_REF
1549       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1550     goto fail;
1551
1552   if (STRICT_ALIGNMENT
1553       && may_be_unaligned_p (base))
1554     goto fail;
1555
1556   ifs_ivopts_data.ivopts_data = data;
1557   ifs_ivopts_data.stmt = stmt;
1558   ifs_ivopts_data.step_p = &step;
1559   if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1560       || zero_p (step))
1561     goto fail;
1562
1563   gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1564   gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1565
1566   base = build_addr_strip_iref (base);
1567
1568   civ = alloc_iv (base, step);
1569   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1570   return;
1571
1572 fail:
1573   for_each_index (op_p, idx_record_use, data);
1574 }
1575
1576 /* Finds and records invariants used in STMT.  */
1577
1578 static void
1579 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1580 {
1581   use_optype uses = NULL;
1582   unsigned i, n;
1583   tree op;
1584
1585   if (TREE_CODE (stmt) == PHI_NODE)
1586     n = PHI_NUM_ARGS (stmt);
1587   else
1588     {
1589       uses = STMT_USE_OPS (stmt);
1590       n = NUM_USES (uses);
1591     }
1592
1593   for (i = 0; i < n; i++)
1594     {
1595       if (TREE_CODE (stmt) == PHI_NODE)
1596         op = PHI_ARG_DEF (stmt, i);
1597       else
1598         op = USE_OP (uses, i);
1599
1600       record_invariant (data, op, false);
1601     }
1602 }
1603
1604 /* Finds interesting uses of induction variables in the statement STMT.  */
1605
1606 static void
1607 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1608 {
1609   struct iv *iv;
1610   tree op, lhs, rhs;
1611   use_optype uses = NULL;
1612   unsigned i, n;
1613
1614   find_invariants_stmt (data, stmt);
1615
1616   if (TREE_CODE (stmt) == COND_EXPR)
1617     {
1618       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1619       return;
1620     }
1621
1622   if (TREE_CODE (stmt) == MODIFY_EXPR)
1623     {
1624       lhs = TREE_OPERAND (stmt, 0);
1625       rhs = TREE_OPERAND (stmt, 1);
1626
1627       if (TREE_CODE (lhs) == SSA_NAME)
1628         {
1629           /* If the statement defines an induction variable, the uses are not
1630              interesting by themselves.  */
1631
1632           iv = get_iv (data, lhs);
1633
1634           if (iv && !zero_p (iv->step))
1635             return;
1636         }
1637
1638       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1639         {
1640         case tcc_comparison:
1641           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1642           return;
1643
1644         case tcc_reference:
1645           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1646           if (REFERENCE_CLASS_P (lhs))
1647             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1648           return;
1649
1650         default: ;
1651         }
1652
1653       if (REFERENCE_CLASS_P (lhs)
1654           && is_gimple_val (rhs))
1655         {
1656           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1657           find_interesting_uses_op (data, rhs);
1658           return;
1659         }
1660
1661       /* TODO -- we should also handle address uses of type
1662
1663          memory = call (whatever);
1664
1665          and
1666
1667          call (memory).  */
1668     }
1669
1670   if (TREE_CODE (stmt) == PHI_NODE
1671       && bb_for_stmt (stmt) == data->current_loop->header)
1672     {
1673       lhs = PHI_RESULT (stmt);
1674       iv = get_iv (data, lhs);
1675
1676       if (iv && !zero_p (iv->step))
1677         return;
1678     }
1679
1680   if (TREE_CODE (stmt) == PHI_NODE)
1681     n = PHI_NUM_ARGS (stmt);
1682   else
1683     {
1684       uses = STMT_USE_OPS (stmt);
1685       n = NUM_USES (uses);
1686     }
1687
1688   for (i = 0; i < n; i++)
1689     {
1690       if (TREE_CODE (stmt) == PHI_NODE)
1691         op = PHI_ARG_DEF (stmt, i);
1692       else
1693         op = USE_OP (uses, i);
1694
1695       if (TREE_CODE (op) != SSA_NAME)
1696         continue;
1697
1698       iv = get_iv (data, op);
1699       if (!iv)
1700         continue;
1701
1702       find_interesting_uses_op (data, op);
1703     }
1704 }
1705
1706 /* Finds interesting uses of induction variables outside of loops
1707    on loop exit edge EXIT.  */
1708
1709 static void
1710 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1711 {
1712   tree phi, def;
1713
1714   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1715     {
1716       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1717       find_interesting_uses_outer (data, def);
1718     }
1719 }
1720
1721 /* Finds uses of the induction variables that are interesting.  */
1722
1723 static void
1724 find_interesting_uses (struct ivopts_data *data)
1725 {
1726   basic_block bb;
1727   block_stmt_iterator bsi;
1728   tree phi;
1729   basic_block *body = get_loop_body (data->current_loop);
1730   unsigned i;
1731   struct version_info *info;
1732   edge e;
1733
1734   if (dump_file && (dump_flags & TDF_DETAILS))
1735     fprintf (dump_file, "Uses:\n\n");
1736
1737   for (i = 0; i < data->current_loop->num_nodes; i++)
1738     {
1739       edge_iterator ei;
1740       bb = body[i];
1741
1742       FOR_EACH_EDGE (e, ei, bb->succs)
1743         if (e->dest != EXIT_BLOCK_PTR
1744             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1745           find_interesting_uses_outside (data, e);
1746
1747       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1748         find_interesting_uses_stmt (data, phi);
1749       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1750         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1751     }
1752
1753   if (dump_file && (dump_flags & TDF_DETAILS))
1754     {
1755       bitmap_iterator bi;
1756
1757       fprintf (dump_file, "\n");
1758
1759       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1760         {
1761           info = ver_info (data, i);
1762           if (info->inv_id)
1763             {
1764               fprintf (dump_file, "  ");
1765               print_generic_expr (dump_file, info->name, TDF_SLIM);
1766               fprintf (dump_file, " is invariant (%d)%s\n",
1767                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1768             }
1769         }
1770
1771       fprintf (dump_file, "\n");
1772     }
1773
1774   free (body);
1775 }
1776
1777 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1778    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1779    we are at the top-level of the processed address.  */
1780
1781 static tree
1782 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1783                 unsigned HOST_WIDE_INT *offset)
1784 {
1785   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1786   enum tree_code code;
1787   tree type, orig_type = TREE_TYPE (expr);
1788   unsigned HOST_WIDE_INT off0, off1, st;
1789   tree orig_expr = expr;
1790
1791   STRIP_NOPS (expr);
1792
1793   type = TREE_TYPE (expr);
1794   code = TREE_CODE (expr);
1795   *offset = 0;
1796
1797   switch (code)
1798     {
1799     case INTEGER_CST:
1800       if (!cst_and_fits_in_hwi (expr)
1801           || zero_p (expr))
1802         return orig_expr;
1803
1804       *offset = int_cst_value (expr);
1805       return build_int_cst_type (orig_type, 0);
1806
1807     case PLUS_EXPR:
1808     case MINUS_EXPR:
1809       op0 = TREE_OPERAND (expr, 0);
1810       op1 = TREE_OPERAND (expr, 1);
1811
1812       op0 = strip_offset_1 (op0, false, false, &off0);
1813       op1 = strip_offset_1 (op1, false, false, &off1);
1814
1815       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1816       if (op0 == TREE_OPERAND (expr, 0)
1817           && op1 == TREE_OPERAND (expr, 1))
1818         return orig_expr;
1819
1820       if (zero_p (op1))
1821         expr = op0;
1822       else if (zero_p (op0))
1823         {
1824           if (code == PLUS_EXPR)
1825             expr = op1;
1826           else
1827             expr = fold_build1 (NEGATE_EXPR, type, op1);
1828         }
1829       else
1830         expr = fold_build2 (code, type, op0, op1);
1831
1832       return fold_convert (orig_type, expr);
1833
1834     case ARRAY_REF:
1835       if (!inside_addr)
1836         return orig_expr;
1837
1838       step = array_ref_element_size (expr);
1839       if (!cst_and_fits_in_hwi (step))
1840         break;
1841
1842       st = int_cst_value (step);
1843       op1 = TREE_OPERAND (expr, 1);
1844       op1 = strip_offset_1 (op1, false, false, &off1);
1845       *offset = off1 * st;
1846
1847       if (top_compref
1848           && zero_p (op1))
1849         {
1850           /* Strip the component reference completely.  */
1851           op0 = TREE_OPERAND (expr, 0);
1852           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1853           *offset += off0;
1854           return op0;
1855         }
1856       break;
1857
1858     case COMPONENT_REF:
1859       if (!inside_addr)
1860         return orig_expr;
1861
1862       tmp = component_ref_field_offset (expr);
1863       if (top_compref
1864           && cst_and_fits_in_hwi (tmp))
1865         {
1866           /* Strip the component reference completely.  */
1867           op0 = TREE_OPERAND (expr, 0);
1868           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1869           *offset = off0 + int_cst_value (tmp);
1870           return op0;
1871         }
1872       break;
1873
1874     case ADDR_EXPR:
1875       op0 = TREE_OPERAND (expr, 0);
1876       op0 = strip_offset_1 (op0, true, true, &off0);
1877       *offset += off0;
1878
1879       if (op0 == TREE_OPERAND (expr, 0))
1880         return orig_expr;
1881
1882       expr = build_addr_strip_iref (op0);
1883       return fold_convert (orig_type, expr);
1884
1885     case INDIRECT_REF:
1886       inside_addr = false;
1887       break;
1888
1889     default:
1890       return orig_expr;
1891     }
1892
1893   /* Default handling of expressions for that we want to recurse into
1894      the first operand.  */
1895   op0 = TREE_OPERAND (expr, 0);
1896   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1897   *offset += off0;
1898
1899   if (op0 == TREE_OPERAND (expr, 0)
1900       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1901     return orig_expr;
1902
1903   expr = copy_node (expr);
1904   TREE_OPERAND (expr, 0) = op0;
1905   if (op1)
1906     TREE_OPERAND (expr, 1) = op1;
1907
1908   /* Inside address, we might strip the top level component references,
1909      thus changing type of the expresion.  Handling of ADDR_EXPR
1910      will fix that.  */
1911   expr = fold_convert (orig_type, expr);
1912
1913   return expr;
1914 }
1915
1916 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1917
1918 static tree
1919 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1920 {
1921   return strip_offset_1 (expr, false, false, offset);
1922 }
1923
1924 /* Returns variant of TYPE that can be used as base for different uses.
1925    For integer types, we return unsigned variant of the type, which
1926    avoids problems with overflows.  For pointer types, we return void *.  */
1927
1928 static tree
1929 generic_type_for (tree type)
1930 {
1931   if (POINTER_TYPE_P (type))
1932     return ptr_type_node;
1933
1934   if (TYPE_UNSIGNED (type))
1935     return type;
1936
1937   return unsigned_type_for (type);
1938 }
1939
1940 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1941    the bitmap to that we should store it.  */
1942
1943 static struct ivopts_data *fd_ivopts_data;
1944 static tree
1945 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1946 {
1947   bitmap *depends_on = data;
1948   struct version_info *info;
1949
1950   if (TREE_CODE (*expr_p) != SSA_NAME)
1951     return NULL_TREE;
1952   info = name_info (fd_ivopts_data, *expr_p);
1953
1954   if (!info->inv_id || info->has_nonlin_use)
1955     return NULL_TREE;
1956
1957   if (!*depends_on)
1958     *depends_on = BITMAP_ALLOC (NULL);
1959   bitmap_set_bit (*depends_on, info->inv_id);
1960
1961   return NULL_TREE;
1962 }
1963
1964 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1965    position to POS.  If USE is not NULL, the candidate is set as related to
1966    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1967    replacement of the final value of the iv by a direct computation.  */
1968
1969 static struct iv_cand *
1970 add_candidate_1 (struct ivopts_data *data,
1971                  tree base, tree step, bool important, enum iv_position pos,
1972                  struct iv_use *use, tree incremented_at)
1973 {
1974   unsigned i;
1975   struct iv_cand *cand = NULL;
1976   tree type, orig_type;
1977
1978   if (base)
1979     {
1980       orig_type = TREE_TYPE (base);
1981       type = generic_type_for (orig_type);
1982       if (type != orig_type)
1983         {
1984           base = fold_convert (type, base);
1985           if (step)
1986             step = fold_convert (type, step);
1987         }
1988     }
1989
1990   for (i = 0; i < n_iv_cands (data); i++)
1991     {
1992       cand = iv_cand (data, i);
1993
1994       if (cand->pos != pos)
1995         continue;
1996
1997       if (cand->incremented_at != incremented_at)
1998         continue;
1999
2000       if (!cand->iv)
2001         {
2002           if (!base && !step)
2003             break;
2004
2005           continue;
2006         }
2007
2008       if (!base && !step)
2009         continue;
2010
2011       if (!operand_equal_p (base, cand->iv->base, 0))
2012         continue;
2013
2014       if (zero_p (cand->iv->step))
2015         {
2016           if (zero_p (step))
2017             break;
2018         }
2019       else
2020         {
2021           if (step && operand_equal_p (step, cand->iv->step, 0))
2022             break;
2023         }
2024     }
2025
2026   if (i == n_iv_cands (data))
2027     {
2028       cand = xcalloc (1, sizeof (struct iv_cand));
2029       cand->id = i;
2030
2031       if (!base && !step)
2032         cand->iv = NULL;
2033       else
2034         cand->iv = alloc_iv (base, step);
2035
2036       cand->pos = pos;
2037       if (pos != IP_ORIGINAL && cand->iv)
2038         {
2039           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2040           cand->var_after = cand->var_before;
2041         }
2042       cand->important = important;
2043       cand->incremented_at = incremented_at;
2044       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2045
2046       if (step
2047           && TREE_CODE (step) != INTEGER_CST)
2048         {
2049           fd_ivopts_data = data;
2050           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2051         }
2052
2053       if (dump_file && (dump_flags & TDF_DETAILS))
2054         dump_cand (dump_file, cand);
2055     }
2056
2057   if (important && !cand->important)
2058     {
2059       cand->important = true;
2060       if (dump_file && (dump_flags & TDF_DETAILS))
2061         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2062     }
2063
2064   if (use)
2065     {
2066       bitmap_set_bit (use->related_cands, i);
2067       if (dump_file && (dump_flags & TDF_DETAILS))
2068         fprintf (dump_file, "Candidate %d is related to use %d\n",
2069                  cand->id, use->id);
2070     }
2071
2072   return cand;
2073 }
2074
2075 /* Returns true if incrementing the induction variable at the end of the LOOP
2076    is allowed.
2077
2078    The purpose is to avoid splitting latch edge with a biv increment, thus
2079    creating a jump, possibly confusing other optimization passes and leaving
2080    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2081    is not available (so we do not have a better alternative), or if the latch
2082    edge is already nonempty.  */
2083
2084 static bool
2085 allow_ip_end_pos_p (struct loop *loop)
2086 {
2087   if (!ip_normal_pos (loop))
2088     return true;
2089
2090   if (!empty_block_p (ip_end_pos (loop)))
2091     return true;
2092
2093   return false;
2094 }
2095
2096 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2097    position to POS.  If USE is not NULL, the candidate is set as related to
2098    it.  The candidate computation is scheduled on all available positions.  */
2099
2100 static void
2101 add_candidate (struct ivopts_data *data,
2102                tree base, tree step, bool important, struct iv_use *use)
2103 {
2104   if (ip_normal_pos (data->current_loop))
2105     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2106   if (ip_end_pos (data->current_loop)
2107       && allow_ip_end_pos_p (data->current_loop))
2108     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2109 }
2110
2111 /* Add a standard "0 + 1 * iteration" iv candidate for a
2112    type with SIZE bits.  */
2113
2114 static void
2115 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2116                                      unsigned int size)
2117 {
2118   tree type = lang_hooks.types.type_for_size (size, true);
2119   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2120                  true, NULL);
2121 }
2122
2123 /* Adds standard iv candidates.  */
2124
2125 static void
2126 add_standard_iv_candidates (struct ivopts_data *data)
2127 {
2128   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2129
2130   /* The same for a double-integer type if it is still fast enough.  */
2131   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2132     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2133 }
2134
2135
2136 /* Adds candidates bases on the old induction variable IV.  */
2137
2138 static void
2139 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2140 {
2141   tree phi, def;
2142   struct iv_cand *cand;
2143
2144   add_candidate (data, iv->base, iv->step, true, NULL);
2145
2146   /* The same, but with initial value zero.  */
2147   add_candidate (data,
2148                  build_int_cst (TREE_TYPE (iv->base), 0),
2149                  iv->step, true, NULL);
2150
2151   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2152   if (TREE_CODE (phi) == PHI_NODE)
2153     {
2154       /* Additionally record the possibility of leaving the original iv
2155          untouched.  */
2156       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2157       cand = add_candidate_1 (data,
2158                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2159                               SSA_NAME_DEF_STMT (def));
2160       cand->var_before = iv->ssa_name;
2161       cand->var_after = def;
2162     }
2163 }
2164
2165 /* Adds candidates based on the old induction variables.  */
2166
2167 static void
2168 add_old_ivs_candidates (struct ivopts_data *data)
2169 {
2170   unsigned i;
2171   struct iv *iv;
2172   bitmap_iterator bi;
2173
2174   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2175     {
2176       iv = ver_info (data, i)->iv;
2177       if (iv && iv->biv_p && !zero_p (iv->step))
2178         add_old_iv_candidates (data, iv);
2179     }
2180 }
2181
2182 /* Adds candidates based on the value of the induction variable IV and USE.  */
2183
2184 static void
2185 add_iv_value_candidates (struct ivopts_data *data,
2186                          struct iv *iv, struct iv_use *use)
2187 {
2188   unsigned HOST_WIDE_INT offset;
2189   tree base;
2190
2191   add_candidate (data, iv->base, iv->step, false, use);
2192
2193   /* The same, but with initial value zero.  Make such variable important,
2194      since it is generic enough so that possibly many uses may be based
2195      on it.  */
2196   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2197                  iv->step, true, use);
2198
2199   /* Third, try removing the constant offset.  */
2200   base = strip_offset (iv->base, &offset);
2201   if (offset)
2202     add_candidate (data, base, iv->step, false, use);
2203 }
2204
2205 /* Possibly adds pseudocandidate for replacing the final value of USE by
2206    a direct computation.  */
2207
2208 static void
2209 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2210 {
2211   struct tree_niter_desc *niter;
2212
2213   /* We must know where we exit the loop and how many times does it roll.  */
2214   niter = niter_for_single_dom_exit (data);
2215   if (!niter
2216       || !zero_p (niter->may_be_zero))
2217     return;
2218
2219   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2220 }
2221
2222 /* Adds candidates based on the uses.  */
2223
2224 static void
2225 add_derived_ivs_candidates (struct ivopts_data *data)
2226 {
2227   unsigned i;
2228
2229   for (i = 0; i < n_iv_uses (data); i++)
2230     {
2231       struct iv_use *use = iv_use (data, i);
2232
2233       if (!use)
2234         continue;
2235
2236       switch (use->type)
2237         {
2238         case USE_NONLINEAR_EXPR:
2239         case USE_COMPARE:
2240         case USE_ADDRESS:
2241           /* Just add the ivs based on the value of the iv used here.  */
2242           add_iv_value_candidates (data, use->iv, use);
2243           break;
2244
2245         case USE_OUTER:
2246           add_iv_value_candidates (data, use->iv, use);
2247
2248           /* Additionally, add the pseudocandidate for the possibility to
2249              replace the final value by a direct computation.  */
2250           add_iv_outer_candidates (data, use);
2251           break;
2252
2253         default:
2254           gcc_unreachable ();
2255         }
2256     }
2257 }
2258
2259 /* Record important candidates and add them to related_cands bitmaps
2260    if needed.  */
2261
2262 static void
2263 record_important_candidates (struct ivopts_data *data)
2264 {
2265   unsigned i;
2266   struct iv_use *use;
2267
2268   for (i = 0; i < n_iv_cands (data); i++)
2269     {
2270       struct iv_cand *cand = iv_cand (data, i);
2271
2272       if (cand->important)
2273         bitmap_set_bit (data->important_candidates, i);
2274     }
2275
2276   data->consider_all_candidates = (n_iv_cands (data)
2277                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2278
2279   if (data->consider_all_candidates)
2280     {
2281       /* We will not need "related_cands" bitmaps in this case,
2282          so release them to decrease peak memory consumption.  */
2283       for (i = 0; i < n_iv_uses (data); i++)
2284         {
2285           use = iv_use (data, i);
2286           BITMAP_FREE (use->related_cands);
2287         }
2288     }
2289   else
2290     {
2291       /* Add important candidates to the related_cands bitmaps.  */
2292       for (i = 0; i < n_iv_uses (data); i++)
2293         bitmap_ior_into (iv_use (data, i)->related_cands,
2294                          data->important_candidates);
2295     }
2296 }
2297
2298 /* Finds the candidates for the induction variables.  */
2299
2300 static void
2301 find_iv_candidates (struct ivopts_data *data)
2302 {
2303   /* Add commonly used ivs.  */
2304   add_standard_iv_candidates (data);
2305
2306   /* Add old induction variables.  */
2307   add_old_ivs_candidates (data);
2308
2309   /* Add induction variables derived from uses.  */
2310   add_derived_ivs_candidates (data);
2311
2312   /* Record the important candidates.  */
2313   record_important_candidates (data);
2314 }
2315
2316 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2317    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2318    we allocate a simple list to every use.  */
2319
2320 static void
2321 alloc_use_cost_map (struct ivopts_data *data)
2322 {
2323   unsigned i, size, s, j;
2324
2325   for (i = 0; i < n_iv_uses (data); i++)
2326     {
2327       struct iv_use *use = iv_use (data, i);
2328       bitmap_iterator bi;
2329
2330       if (data->consider_all_candidates)
2331         size = n_iv_cands (data);
2332       else
2333         {
2334           s = 0;
2335           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2336             {
2337               s++;
2338             }
2339
2340           /* Round up to the power of two, so that moduling by it is fast.  */
2341           for (size = 1; size < s; size <<= 1)
2342             continue;
2343         }
2344
2345       use->n_map_members = size;
2346       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2347     }
2348 }
2349
2350 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2351    on invariants DEPENDS_ON and that the value used in expressing it
2352    is VALUE.*/
2353
2354 static void
2355 set_use_iv_cost (struct ivopts_data *data,
2356                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2357                  bitmap depends_on, tree value)
2358 {
2359   unsigned i, s;
2360
2361   if (cost == INFTY)
2362     {
2363       BITMAP_FREE (depends_on);
2364       return;
2365     }
2366
2367   if (data->consider_all_candidates)
2368     {
2369       use->cost_map[cand->id].cand = cand;
2370       use->cost_map[cand->id].cost = cost;
2371       use->cost_map[cand->id].depends_on = depends_on;
2372       use->cost_map[cand->id].value = value;
2373       return;
2374     }
2375
2376   /* n_map_members is a power of two, so this computes modulo.  */
2377   s = cand->id & (use->n_map_members - 1);
2378   for (i = s; i < use->n_map_members; i++)
2379     if (!use->cost_map[i].cand)
2380       goto found;
2381   for (i = 0; i < s; i++)
2382     if (!use->cost_map[i].cand)
2383       goto found;
2384
2385   gcc_unreachable ();
2386
2387 found:
2388   use->cost_map[i].cand = cand;
2389   use->cost_map[i].cost = cost;
2390   use->cost_map[i].depends_on = depends_on;
2391   use->cost_map[i].value = value;
2392 }
2393
2394 /* Gets cost of (USE, CANDIDATE) pair.  */
2395
2396 static struct cost_pair *
2397 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2398                  struct iv_cand *cand)
2399 {
2400   unsigned i, s;
2401   struct cost_pair *ret;
2402
2403   if (!cand)
2404     return NULL;
2405
2406   if (data->consider_all_candidates)
2407     {
2408       ret = use->cost_map + cand->id;
2409       if (!ret->cand)
2410         return NULL;
2411
2412       return ret;
2413     }
2414
2415   /* n_map_members is a power of two, so this computes modulo.  */
2416   s = cand->id & (use->n_map_members - 1);
2417   for (i = s; i < use->n_map_members; i++)
2418     if (use->cost_map[i].cand == cand)
2419       return use->cost_map + i;
2420
2421   for (i = 0; i < s; i++)
2422     if (use->cost_map[i].cand == cand)
2423       return use->cost_map + i;
2424
2425   return NULL;
2426 }
2427
2428 /* Returns estimate on cost of computing SEQ.  */
2429
2430 static unsigned
2431 seq_cost (rtx seq)
2432 {
2433   unsigned cost = 0;
2434   rtx set;
2435
2436   for (; seq; seq = NEXT_INSN (seq))
2437     {
2438       set = single_set (seq);
2439       if (set)
2440         cost += rtx_cost (set, SET);
2441       else
2442         cost++;
2443     }
2444
2445   return cost;
2446 }
2447
2448 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2449 static rtx
2450 produce_memory_decl_rtl (tree obj, int *regno)
2451 {
2452   rtx x;
2453
2454   gcc_assert (obj);
2455   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2456     {
2457       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2458       x = gen_rtx_SYMBOL_REF (Pmode, name);
2459     }
2460   else
2461     x = gen_raw_REG (Pmode, (*regno)++);
2462
2463   return gen_rtx_MEM (DECL_MODE (obj), x);
2464 }
2465
2466 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2467    walk_tree.  DATA contains the actual fake register number.  */
2468
2469 static tree
2470 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2471 {
2472   tree obj = NULL_TREE;
2473   rtx x = NULL_RTX;
2474   int *regno = data;
2475
2476   switch (TREE_CODE (*expr_p))
2477     {
2478     case ADDR_EXPR:
2479       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2480            handled_component_p (*expr_p);
2481            expr_p = &TREE_OPERAND (*expr_p, 0))
2482         continue;
2483       obj = *expr_p;
2484       if (DECL_P (obj))
2485         x = produce_memory_decl_rtl (obj, regno);
2486       break;
2487
2488     case SSA_NAME:
2489       *ws = 0;
2490       obj = SSA_NAME_VAR (*expr_p);
2491       if (!DECL_RTL_SET_P (obj))
2492         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2493       break;
2494
2495     case VAR_DECL:
2496     case PARM_DECL:
2497     case RESULT_DECL:
2498       *ws = 0;
2499       obj = *expr_p;
2500
2501       if (DECL_RTL_SET_P (obj))
2502         break;
2503
2504       if (DECL_MODE (obj) == BLKmode)
2505         x = produce_memory_decl_rtl (obj, regno);
2506       else
2507         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2508
2509       break;
2510
2511     default:
2512       break;
2513     }
2514
2515   if (x)
2516     {
2517       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2518       SET_DECL_RTL (obj, x);
2519     }
2520
2521   return NULL_TREE;
2522 }
2523
2524 /* Determines cost of the computation of EXPR.  */
2525
2526 static unsigned
2527 computation_cost (tree expr)
2528 {
2529   rtx seq, rslt;
2530   tree type = TREE_TYPE (expr);
2531   unsigned cost;
2532   /* Avoid using hard regs in ways which may be unsupported.  */
2533   int regno = LAST_VIRTUAL_REGISTER + 1;
2534
2535   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2536   start_sequence ();
2537   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2538   seq = get_insns ();
2539   end_sequence ();
2540
2541   cost = seq_cost (seq);
2542   if (MEM_P (rslt))
2543     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2544
2545   return cost;
2546 }
2547
2548 /* Returns variable containing the value of candidate CAND at statement AT.  */
2549
2550 static tree
2551 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2552 {
2553   if (stmt_after_increment (loop, cand, stmt))
2554     return cand->var_after;
2555   else
2556     return cand->var_before;
2557 }
2558
2559 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2560    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2561
2562 static int
2563 tree_int_cst_sign_bit (tree t)
2564 {
2565   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2566   unsigned HOST_WIDE_INT w;
2567
2568   if (bitno < HOST_BITS_PER_WIDE_INT)
2569     w = TREE_INT_CST_LOW (t);
2570   else
2571     {
2572       w = TREE_INT_CST_HIGH (t);
2573       bitno -= HOST_BITS_PER_WIDE_INT;
2574     }
2575
2576   return (w >> bitno) & 1;
2577 }
2578
2579 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2580    return cst.  Otherwise return NULL_TREE.  */
2581
2582 static tree
2583 constant_multiple_of (tree type, tree top, tree bot)
2584 {
2585   tree res, mby, p0, p1;
2586   enum tree_code code;
2587   bool negate;
2588
2589   STRIP_NOPS (top);
2590   STRIP_NOPS (bot);
2591
2592   if (operand_equal_p (top, bot, 0))
2593     return build_int_cst (type, 1);
2594
2595   code = TREE_CODE (top);
2596   switch (code)
2597     {
2598     case MULT_EXPR:
2599       mby = TREE_OPERAND (top, 1);
2600       if (TREE_CODE (mby) != INTEGER_CST)
2601         return NULL_TREE;
2602
2603       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2604       if (!res)
2605         return NULL_TREE;
2606
2607       return fold_binary_to_constant (MULT_EXPR, type, res,
2608                                       fold_convert (type, mby));
2609
2610     case PLUS_EXPR:
2611     case MINUS_EXPR:
2612       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2613       if (!p0)
2614         return NULL_TREE;
2615       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2616       if (!p1)
2617         return NULL_TREE;
2618
2619       return fold_binary_to_constant (code, type, p0, p1);
2620
2621     case INTEGER_CST:
2622       if (TREE_CODE (bot) != INTEGER_CST)
2623         return NULL_TREE;
2624
2625       bot = fold_convert (type, bot);
2626       top = fold_convert (type, top);
2627
2628       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2629          the result afterwards.  */
2630       if (tree_int_cst_sign_bit (bot))
2631         {
2632           negate = true;
2633           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2634         }
2635       else
2636         negate = false;
2637
2638       /* Ditto for TOP.  */
2639       if (tree_int_cst_sign_bit (top))
2640         {
2641           negate = !negate;
2642           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2643         }
2644
2645       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2646         return NULL_TREE;
2647
2648       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2649       if (negate)
2650         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2651       return res;
2652
2653     default:
2654       return NULL_TREE;
2655     }
2656 }
2657
2658 /* Affine combination of trees.  We keep track of at most MAX_AFF_ELTS elements
2659    to make things simpler; this is sufficient in most cases.  */
2660
2661 #define MAX_AFF_ELTS 8
2662
2663 struct affine_tree_combination
2664 {
2665   /* Type of the result of the combination.  */
2666   tree type;
2667
2668   /* Mask modulo that the operations are performed.  */
2669   unsigned HOST_WIDE_INT mask;
2670
2671   /* Constant offset.  */
2672   unsigned HOST_WIDE_INT offset;
2673
2674   /* Number of elements of the combination.  */
2675   unsigned n;
2676
2677   /* Elements and their coefficients.  */
2678   tree elts[MAX_AFF_ELTS];
2679   unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2680
2681   /* Remainder of the expression.  */
2682   tree rest;
2683 };
2684
2685 /* Sets COMB to CST.  */
2686
2687 static void
2688 aff_combination_const (struct affine_tree_combination *comb, tree type,
2689                        unsigned HOST_WIDE_INT cst)
2690 {
2691   unsigned prec = TYPE_PRECISION (type);
2692
2693   comb->type = type;
2694   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2695
2696   comb->n = 0;
2697   comb->rest = NULL_TREE;
2698   comb->offset = cst & comb->mask;
2699 }
2700
2701 /* Sets COMB to single element ELT.  */
2702
2703 static void
2704 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2705 {
2706   unsigned prec = TYPE_PRECISION (type);
2707
2708   comb->type = type;
2709   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2710
2711   comb->n = 1;
2712   comb->elts[0] = elt;
2713   comb->coefs[0] = 1;
2714   comb->rest = NULL_TREE;
2715   comb->offset = 0;
2716 }
2717
2718 /* Scales COMB by SCALE.  */
2719
2720 static void
2721 aff_combination_scale (struct affine_tree_combination *comb,
2722                        unsigned HOST_WIDE_INT scale)
2723 {
2724   unsigned i, j;
2725
2726   if (scale == 1)
2727     return;
2728
2729   if (scale == 0)
2730     {
2731       aff_combination_const (comb, comb->type, 0);
2732       return;
2733     }
2734
2735   comb->offset = (scale * comb->offset) & comb->mask;
2736   for (i = 0, j = 0; i < comb->n; i++)
2737     {
2738       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2739       comb->elts[j] = comb->elts[i];
2740       if (comb->coefs[j] != 0)
2741         j++;
2742     }
2743   comb->n = j;
2744
2745   if (comb->rest)
2746     {
2747       if (comb->n < MAX_AFF_ELTS)
2748         {
2749           comb->coefs[comb->n] = scale;
2750           comb->elts[comb->n] = comb->rest;
2751           comb->rest = NULL_TREE;
2752           comb->n++;
2753         }
2754       else
2755         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2756                                   build_int_cst_type (comb->type, scale));
2757     }
2758 }
2759
2760 /* Adds ELT * SCALE to COMB.  */
2761
2762 static void
2763 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2764                          unsigned HOST_WIDE_INT scale)
2765 {
2766   unsigned i;
2767
2768   if (scale == 0)
2769     return;
2770
2771   for (i = 0; i < comb->n; i++)
2772     if (operand_equal_p (comb->elts[i], elt, 0))
2773       {
2774         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2775         if (comb->coefs[i])
2776           return;
2777
2778         comb->n--;
2779         comb->coefs[i] = comb->coefs[comb->n];
2780         comb->elts[i] = comb->elts[comb->n];
2781         return;
2782       }
2783   if (comb->n < MAX_AFF_ELTS)
2784     {
2785       comb->coefs[comb->n] = scale;
2786       comb->elts[comb->n] = elt;
2787       comb->n++;
2788       return;
2789     }
2790
2791   if (scale == 1)
2792     elt = fold_convert (comb->type, elt);
2793   else
2794     elt = fold_build2 (MULT_EXPR, comb->type,
2795                        fold_convert (comb->type, elt),
2796                        build_int_cst_type (comb->type, scale));
2797
2798   if (comb->rest)
2799     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2800   else
2801     comb->rest = elt;
2802 }
2803
2804 /* Adds COMB2 to COMB1.  */
2805
2806 static void
2807 aff_combination_add (struct affine_tree_combination *comb1,
2808                      struct affine_tree_combination *comb2)
2809 {
2810   unsigned i;
2811
2812   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2813   for (i = 0; i < comb2-> n; i++)
2814     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2815   if (comb2->rest)
2816     aff_combination_add_elt (comb1, comb2->rest, 1);
2817 }
2818
2819 /* Splits EXPR into an affine combination of parts.  */
2820
2821 static void
2822 tree_to_aff_combination (tree expr, tree type,
2823                          struct affine_tree_combination *comb)
2824 {
2825   struct affine_tree_combination tmp;
2826   enum tree_code code;
2827   tree cst, core, toffset;
2828   HOST_WIDE_INT bitpos, bitsize;
2829   enum machine_mode mode;
2830   int unsignedp, volatilep;
2831
2832   STRIP_NOPS (expr);
2833
2834   code = TREE_CODE (expr);
2835   switch (code)
2836     {
2837     case INTEGER_CST:
2838       aff_combination_const (comb, type, int_cst_value (expr));
2839       return;
2840
2841     case PLUS_EXPR:
2842     case MINUS_EXPR:
2843       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2844       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2845       if (code == MINUS_EXPR)
2846         aff_combination_scale (&tmp, -1);
2847       aff_combination_add (comb, &tmp);
2848       return;
2849
2850     case MULT_EXPR:
2851       cst = TREE_OPERAND (expr, 1);
2852       if (TREE_CODE (cst) != INTEGER_CST)
2853         break;
2854       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2855       aff_combination_scale (comb, int_cst_value (cst));
2856       return;
2857
2858     case NEGATE_EXPR:
2859       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2860       aff_combination_scale (comb, -1);
2861       return;
2862
2863     case ADDR_EXPR:
2864       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2865                                   &toffset, &mode, &unsignedp, &volatilep,
2866                                   false);
2867       if (bitpos % BITS_PER_UNIT != 0)
2868         break;
2869       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2870       core = build_addr_strip_iref (core);
2871       if (TREE_CODE (core) == ADDR_EXPR)
2872         aff_combination_add_elt (comb, core, 1);
2873       else
2874         {
2875           tree_to_aff_combination (core, type, &tmp);
2876           aff_combination_add (comb, &tmp);
2877         }
2878       if (toffset)
2879         {
2880           tree_to_aff_combination (toffset, type, &tmp);
2881           aff_combination_add (comb, &tmp);
2882         }
2883       return;
2884
2885     default:
2886       break;
2887     }
2888
2889   aff_combination_elt (comb, type, expr);
2890 }
2891
2892 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2893
2894 static tree
2895 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2896                  unsigned HOST_WIDE_INT mask)
2897 {
2898   enum tree_code code;
2899
2900   scale &= mask;
2901   elt = fold_convert (type, elt);
2902
2903   if (scale == 1)
2904     {
2905       if (!expr)
2906         return elt;
2907
2908       return fold_build2 (PLUS_EXPR, type, expr, elt);
2909     }
2910
2911   if (scale == mask)
2912     {
2913       if (!expr)
2914         return fold_build1 (NEGATE_EXPR, type, elt);
2915
2916       return fold_build2 (MINUS_EXPR, type, expr, elt);
2917     }
2918
2919   if (!expr)
2920     return fold_build2 (MULT_EXPR, type, elt,
2921                         build_int_cst_type (type, scale));
2922
2923   if ((scale | (mask >> 1)) == mask)
2924     {
2925       /* Scale is negative.  */
2926       code = MINUS_EXPR;
2927       scale = (-scale) & mask;
2928     }
2929   else
2930     code = PLUS_EXPR;
2931
2932   elt = fold_build2 (MULT_EXPR, type, elt,
2933                      build_int_cst_type (type, scale));
2934   return fold_build2 (code, type, expr, elt);
2935 }
2936
2937 /* Makes tree from the affine combination COMB.  */
2938
2939 static tree
2940 aff_combination_to_tree (struct affine_tree_combination *comb)
2941 {
2942   tree type = comb->type;
2943   tree expr = comb->rest;
2944   unsigned i;
2945   unsigned HOST_WIDE_INT off, sgn;
2946
2947   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2948
2949   for (i = 0; i < comb->n; i++)
2950     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2951                             comb->mask);
2952
2953   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2954     {
2955       /* Offset is negative.  */
2956       off = (-comb->offset) & comb->mask;
2957       sgn = comb->mask;
2958     }
2959   else
2960     {
2961       off = comb->offset;
2962       sgn = 1;
2963     }
2964   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2965                           comb->mask);
2966 }
2967
2968 /* Folds X + RATIO * Y in TYPE.  */
2969
2970 static tree
2971 fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2972 {
2973   enum tree_code code;
2974   tree cst;
2975   struct affine_tree_combination cx, cy;
2976
2977   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2978     {
2979       if (ratio == 1)
2980         return fold_build2 (PLUS_EXPR, type, x, y);
2981       if (ratio == -1)
2982         return fold_build2 (MINUS_EXPR, type, x, y);
2983
2984       if (ratio < 0)
2985         {
2986           code = MINUS_EXPR;
2987           ratio = -ratio;
2988         }
2989       else
2990         code = PLUS_EXPR;
2991
2992       cst = build_int_cst_type (type, ratio);
2993       y = fold_build2 (MULT_EXPR, type, y, cst);
2994       return fold_build2 (code, type, x, y);
2995     }
2996
2997   tree_to_aff_combination (x, type, &cx);
2998   tree_to_aff_combination (y, type, &cy);
2999   aff_combination_scale (&cy, ratio);
3000   aff_combination_add (&cx, &cy);
3001
3002   return aff_combination_to_tree (&cx);
3003 }
3004
3005 /* Determines the expression by that USE is expressed from induction variable
3006    CAND at statement AT in LOOP.  */
3007
3008 static tree
3009 get_computation_at (struct loop *loop,
3010                     struct iv_use *use, struct iv_cand *cand, tree at)
3011 {
3012   tree ubase = use->iv->base;
3013   tree ustep = use->iv->step;
3014   tree cbase = cand->iv->base;
3015   tree cstep = cand->iv->step;
3016   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3017   tree uutype;
3018   tree expr, delta;
3019   tree ratio;
3020   unsigned HOST_WIDE_INT ustepi, cstepi;
3021   HOST_WIDE_INT ratioi;
3022
3023   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3024     {
3025       /* We do not have a precision to express the values of use.  */
3026       return NULL_TREE;
3027     }
3028
3029   expr = var_at_stmt (loop, cand, at);
3030
3031   if (TREE_TYPE (expr) != ctype)
3032     {
3033       /* This may happen with the original ivs.  */
3034       expr = fold_convert (ctype, expr);
3035     }
3036
3037   if (TYPE_UNSIGNED (utype))
3038     uutype = utype;
3039   else
3040     {
3041       uutype = unsigned_type_for (utype);
3042       ubase = fold_convert (uutype, ubase);
3043       ustep = fold_convert (uutype, ustep);
3044     }
3045
3046   if (uutype != ctype)
3047     {
3048       expr = fold_convert (uutype, expr);
3049       cbase = fold_convert (uutype, cbase);
3050       cstep = fold_convert (uutype, cstep);
3051     }
3052
3053   if (cst_and_fits_in_hwi (cstep)
3054       && cst_and_fits_in_hwi (ustep))
3055     {
3056       ustepi = int_cst_value (ustep);
3057       cstepi = int_cst_value (cstep);
3058
3059       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3060         {
3061           /* TODO maybe consider case when ustep divides cstep and the ratio is
3062              a power of 2 (so that the division is fast to execute)?  We would
3063              need to be much more careful with overflows etc. then.  */
3064           return NULL_TREE;
3065         }
3066
3067       ratio = build_int_cst_type (uutype, ratioi);
3068     }
3069   else
3070     {
3071       ratio = constant_multiple_of (uutype, ustep, cstep);
3072       if (!ratio)
3073         return NULL_TREE;
3074
3075       /* Ratioi is only used to detect special cases when the multiplicative
3076          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3077          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3078          to integer_onep/integer_all_onesp, since the former ignores
3079          TREE_OVERFLOW.  */
3080       if (cst_and_fits_in_hwi (ratio))
3081         ratioi = int_cst_value (ratio);
3082       else if (integer_onep (ratio))
3083         ratioi = 1;
3084       else if (integer_all_onesp (ratio))
3085         ratioi = -1;
3086       else
3087         ratioi = 0;
3088     }
3089
3090   /* We may need to shift the value if we are after the increment.  */
3091   if (stmt_after_increment (loop, cand, at))
3092     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3093
3094   /* use = ubase - ratio * cbase + ratio * var.
3095
3096      In general case ubase + ratio * (var - cbase) could be better (one less
3097      multiplication), but often it is possible to eliminate redundant parts
3098      of computations from (ubase - ratio * cbase) term, and if it does not
3099      happen, fold is able to apply the distributive law to obtain this form
3100      anyway.  */
3101
3102   if (ratioi == 1)
3103     {
3104       delta = fold_affine_sum (uutype, ubase, cbase, -1);
3105       expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3106     }
3107   else if (ratioi == -1)
3108     {
3109       delta = fold_affine_sum (uutype, ubase, cbase, 1);
3110       expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3111     }
3112   else
3113     {
3114       if (ratioi)
3115         delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3116       else
3117         {
3118           delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3119           delta = fold_affine_sum (uutype, ubase, delta, -1);
3120         }
3121       expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3122       expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3123     }
3124
3125   return fold_convert (utype, expr);
3126 }
3127
3128 /* Determines the expression by that USE is expressed from induction variable
3129    CAND in LOOP.  */
3130
3131 static tree
3132 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3133 {
3134   return get_computation_at (loop, use, cand, use->stmt);
3135 }
3136
3137 /* Returns cost of addition in MODE.  */
3138
3139 static unsigned
3140 add_cost (enum machine_mode mode)
3141 {
3142   static unsigned costs[NUM_MACHINE_MODES];
3143   rtx seq;
3144   unsigned cost;
3145
3146   if (costs[mode])
3147     return costs[mode];
3148
3149   start_sequence ();
3150   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3151                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3152                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3153                  NULL_RTX);
3154   seq = get_insns ();
3155   end_sequence ();
3156
3157   cost = seq_cost (seq);
3158   if (!cost)
3159     cost = 1;
3160
3161   costs[mode] = cost;
3162
3163   if (dump_file && (dump_flags & TDF_DETAILS))
3164     fprintf (dump_file, "Addition in %s costs %d\n",
3165              GET_MODE_NAME (mode), cost);
3166   return cost;
3167 }
3168
3169 /* Entry in a hashtable of already known costs for multiplication.  */
3170 struct mbc_entry
3171 {
3172   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3173   enum machine_mode mode;       /* In mode.  */
3174   unsigned cost;                /* The cost.  */
3175 };
3176
3177 /* Counts hash value for the ENTRY.  */
3178
3179 static hashval_t
3180 mbc_entry_hash (const void *entry)
3181 {
3182   const struct mbc_entry *e = entry;
3183
3184   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3185 }
3186
3187 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3188
3189 static int
3190 mbc_entry_eq (const void *entry1, const void *entry2)
3191 {
3192   const struct mbc_entry *e1 = entry1;
3193   const struct mbc_entry *e2 = entry2;
3194
3195   return (e1->mode == e2->mode
3196           && e1->cst == e2->cst);
3197 }
3198
3199 /* Returns cost of multiplication by constant CST in MODE.  */
3200
3201 static unsigned
3202 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3203 {
3204   static htab_t costs;
3205   struct mbc_entry **cached, act;
3206   rtx seq;
3207   unsigned cost;
3208
3209   if (!costs)
3210     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3211
3212   act.mode = mode;
3213   act.cst = cst;
3214   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3215   if (*cached)
3216     return (*cached)->cost;
3217
3218   *cached = xmalloc (sizeof (struct mbc_entry));
3219   (*cached)->mode = mode;
3220   (*cached)->cst = cst;
3221
3222   start_sequence ();
3223   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3224                NULL_RTX, 0);
3225   seq = get_insns ();
3226   end_sequence ();
3227
3228   cost = seq_cost (seq);
3229
3230   if (dump_file && (dump_flags & TDF_DETAILS))
3231     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3232              (int) cst, GET_MODE_NAME (mode), cost);
3233
3234   (*cached)->cost = cost;
3235
3236   return cost;
3237 }
3238
3239 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3240    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3241    variable is omitted.  The created memory accesses MODE.
3242
3243    TODO -- there must be some better way.  This all is quite crude.  */
3244
3245 static unsigned
3246 get_address_cost (bool symbol_present, bool var_present,
3247                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3248 {
3249 #define MAX_RATIO 128
3250   static sbitmap valid_mult;
3251   static HOST_WIDE_INT rat, off;
3252   static HOST_WIDE_INT min_offset, max_offset;
3253   static unsigned costs[2][2][2][2];
3254   unsigned cost, acost;
3255   rtx seq, addr, base;
3256   bool offset_p, ratio_p;
3257   rtx reg1;
3258   HOST_WIDE_INT s_offset;
3259   unsigned HOST_WIDE_INT mask;
3260   unsigned bits;
3261
3262   if (!valid_mult)
3263     {
3264       HOST_WIDE_INT i;
3265
3266       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3267
3268       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3269       for (i = 1; i <= 1 << 20; i <<= 1)
3270         {
3271           XEXP (addr, 1) = GEN_INT (i);
3272           if (!memory_address_p (Pmode, addr))
3273             break;
3274         }
3275       max_offset = i >> 1;
3276       off = max_offset;
3277
3278       for (i = 1; i <= 1 << 20; i <<= 1)
3279         {
3280           XEXP (addr, 1) = GEN_INT (-i);
3281           if (!memory_address_p (Pmode, addr))
3282             break;
3283         }
3284       min_offset = -(i >> 1);
3285
3286       if (dump_file && (dump_flags & TDF_DETAILS))
3287         {
3288           fprintf (dump_file, "get_address_cost:\n");
3289           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3290           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3291         }
3292
3293       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3294       sbitmap_zero (valid_mult);
3295       rat = 1;
3296       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3297       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3298         {
3299           XEXP (addr, 1) = GEN_INT (i);
3300           if (memory_address_p (Pmode, addr))
3301             {
3302               SET_BIT (valid_mult, i + MAX_RATIO);
3303               rat = i;
3304             }
3305         }
3306
3307       if (dump_file && (dump_flags & TDF_DETAILS))
3308         {
3309           fprintf (dump_file, "  allowed multipliers:");
3310           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3311             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3312               fprintf (dump_file, " %d", (int) i);
3313           fprintf (dump_file, "\n");
3314           fprintf (dump_file, "\n");
3315         }
3316     }
3317
3318   bits = GET_MODE_BITSIZE (Pmode);
3319   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3320   offset &= mask;
3321   if ((offset >> (bits - 1) & 1))
3322     offset |= ~mask;
3323   s_offset = offset;
3324
3325   cost = 0;
3326   offset_p = (s_offset != 0
3327               && min_offset <= s_offset && s_offset <= max_offset);
3328   ratio_p = (ratio != 1
3329              && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3330              && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3331
3332   if (ratio != 1 && !ratio_p)
3333     cost += multiply_by_cost (ratio, Pmode);
3334
3335   if (s_offset && !offset_p && !symbol_present)
3336     {
3337       cost += add_cost (Pmode);
3338       var_present = true;
3339     }
3340
3341   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3342   if (!acost)
3343     {
3344       acost = 0;
3345
3346       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3347       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3348       if (ratio_p)
3349         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3350
3351       if (var_present)
3352         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3353
3354       if (symbol_present)
3355         {
3356           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3357           if (offset_p)
3358             base = gen_rtx_fmt_e (CONST, Pmode,
3359                                   gen_rtx_fmt_ee (PLUS, Pmode,
3360                                                   base,
3361                                                   GEN_INT (off)));
3362         }
3363       else if (offset_p)
3364         base = GEN_INT (off);
3365       else
3366         base = NULL_RTX;
3367
3368       if (base)
3369         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3370
3371       start_sequence ();
3372       addr = memory_address (Pmode, addr);
3373       seq = get_insns ();
3374       end_sequence ();
3375
3376       acost = seq_cost (seq);
3377       acost += address_cost (addr, Pmode);
3378
3379       if (!acost)
3380         acost = 1;
3381       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3382     }
3383
3384   return cost + acost;
3385 }
3386 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3387    invariants the computation depends on.  */
3388
3389 static unsigned
3390 force_var_cost (struct ivopts_data *data,
3391                 tree expr, bitmap *depends_on)
3392 {
3393   static bool costs_initialized = false;
3394   static unsigned integer_cost;
3395   static unsigned symbol_cost;
3396   static unsigned address_cost;
3397   tree op0, op1;
3398   unsigned cost0, cost1, cost;
3399   enum machine_mode mode;
3400
3401   if (!costs_initialized)
3402     {
3403       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3404       rtx x = gen_rtx_MEM (DECL_MODE (var),
3405                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3406       tree addr;
3407       tree type = build_pointer_type (integer_type_node);
3408
3409       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3410                                                            2000));
3411
3412       SET_DECL_RTL (var, x);
3413       TREE_STATIC (var) = 1;
3414       addr = build1 (ADDR_EXPR, type, var);
3415       symbol_cost = computation_cost (addr) + 1;
3416
3417       address_cost
3418         = computation_cost (build2 (PLUS_EXPR, type,
3419                                     addr,
3420                                     build_int_cst_type (type, 2000))) + 1;
3421       if (dump_file && (dump_flags & TDF_DETAILS))
3422         {
3423           fprintf (dump_file, "force_var_cost:\n");
3424           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3425           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3426           fprintf (dump_file, "  address %d\n", (int) address_cost);
3427           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3428           fprintf (dump_file, "\n");
3429         }
3430
3431       costs_initialized = true;
3432     }
3433
3434   STRIP_NOPS (expr);
3435
3436   if (depends_on)
3437     {
3438       fd_ivopts_data = data;
3439       walk_tree (&expr, find_depends, depends_on, NULL);
3440     }
3441
3442   if (SSA_VAR_P (expr))
3443     return 0;
3444
3445   if (TREE_INVARIANT (expr))
3446     {
3447       if (TREE_CODE (expr) == INTEGER_CST)
3448         return integer_cost;
3449
3450       if (TREE_CODE (expr) == ADDR_EXPR)
3451         {
3452           tree obj = TREE_OPERAND (expr, 0);
3453
3454           if (TREE_CODE (obj) == VAR_DECL
3455               || TREE_CODE (obj) == PARM_DECL
3456               || TREE_CODE (obj) == RESULT_DECL)
3457             return symbol_cost;
3458         }
3459
3460       return address_cost;
3461     }
3462
3463   switch (TREE_CODE (expr))
3464     {
3465     case PLUS_EXPR:
3466     case MINUS_EXPR:
3467     case MULT_EXPR:
3468       op0 = TREE_OPERAND (expr, 0);
3469       op1 = TREE_OPERAND (expr, 1);
3470       STRIP_NOPS (op0);
3471       STRIP_NOPS (op1);
3472
3473       if (is_gimple_val (op0))
3474         cost0 = 0;
3475       else
3476         cost0 = force_var_cost (data, op0, NULL);
3477
3478       if (is_gimple_val (op1))
3479         cost1 = 0;
3480       else
3481         cost1 = force_var_cost (data, op1, NULL);
3482
3483       break;
3484
3485     default:
3486       /* Just an arbitrary value, FIXME.  */
3487       return target_spill_cost;
3488     }
3489
3490   mode = TYPE_MODE (TREE_TYPE (expr));
3491   switch (TREE_CODE (expr))
3492     {
3493     case PLUS_EXPR:
3494     case MINUS_EXPR:
3495       cost = add_cost (mode);
3496       break;
3497
3498     case MULT_EXPR:
3499       if (cst_and_fits_in_hwi (op0))
3500         cost = multiply_by_cost (int_cst_value (op0), mode);
3501       else if (cst_and_fits_in_hwi (op1))
3502         cost = multiply_by_cost (int_cst_value (op1), mode);
3503       else
3504         return target_spill_cost;
3505       break;
3506
3507     default:
3508       gcc_unreachable ();
3509     }
3510
3511   cost += cost0;
3512   cost += cost1;
3513
3514   /* Bound the cost by target_spill_cost.  The parts of complicated
3515      computations often are either loop invariant or at least can
3516      be shared between several iv uses, so letting this grow without
3517      limits would not give reasonable results.  */
3518   return cost < target_spill_cost ? cost : target_spill_cost;
3519 }
3520
3521 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3522    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3523    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3524    invariants the computation depends on.  */
3525
3526 static unsigned
3527 split_address_cost (struct ivopts_data *data,
3528                     tree addr, bool *symbol_present, bool *var_present,
3529                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3530 {
3531   tree core;
3532   HOST_WIDE_INT bitsize;
3533   HOST_WIDE_INT bitpos;
3534   tree toffset;
3535   enum machine_mode mode;
3536   int unsignedp, volatilep;
3537
3538   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3539                               &unsignedp, &volatilep, false);
3540
3541   if (toffset != 0
3542       || bitpos % BITS_PER_UNIT != 0
3543       || TREE_CODE (core) != VAR_DECL)
3544     {
3545       *symbol_present = false;
3546       *var_present = true;
3547       fd_ivopts_data = data;
3548       walk_tree (&addr, find_depends, depends_on, NULL);
3549       return target_spill_cost;
3550     }
3551
3552   *offset += bitpos / BITS_PER_UNIT;
3553   if (TREE_STATIC (core)
3554       || DECL_EXTERNAL (core))
3555     {
3556       *symbol_present = true;
3557       *var_present = false;
3558       return 0;
3559     }
3560
3561   *symbol_present = false;
3562   *var_present = true;
3563   return 0;
3564 }
3565
3566 /* Estimates cost of expressing difference of addresses E1 - E2 as
3567    var + symbol + offset.  The value of offset is added to OFFSET,
3568    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3569    part is missing.  DEPENDS_ON is a set of the invariants the computation
3570    depends on.  */
3571
3572 static unsigned
3573 ptr_difference_cost (struct ivopts_data *data,
3574                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3575                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3576 {
3577   HOST_WIDE_INT diff = 0;
3578   unsigned cost;
3579
3580   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3581
3582   if (ptr_difference_const (e1, e2, &diff))
3583     {
3584       *offset += diff;
3585       *symbol_present = false;
3586       *var_present = false;
3587       return 0;
3588     }
3589
3590   if (e2 == integer_zero_node)
3591     return split_address_cost (data, TREE_OPERAND (e1, 0),
3592                                symbol_present, var_present, offset, depends_on);
3593
3594   *symbol_present = false;
3595   *var_present = true;
3596
3597   cost = force_var_cost (data, e1, depends_on);
3598   cost += force_var_cost (data, e2, depends_on);
3599   cost += add_cost (Pmode);
3600
3601   return cost;
3602 }
3603
3604 /* Estimates cost of expressing difference E1 - E2 as
3605    var + symbol + offset.  The value of offset is added to OFFSET,
3606    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3607    part is missing.  DEPENDS_ON is a set of the invariants the computation
3608    depends on.  */
3609
3610 static unsigned
3611 difference_cost (struct ivopts_data *data,
3612                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3613                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3614 {
3615   unsigned cost;
3616   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3617   unsigned HOST_WIDE_INT off1, off2;
3618
3619   e1 = strip_offset (e1, &off1);
3620   e2 = strip_offset (e2, &off2);
3621   *offset += off1 - off2;
3622
3623   STRIP_NOPS (e1);
3624   STRIP_NOPS (e2);
3625
3626   if (TREE_CODE (e1) == ADDR_EXPR)
3627     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3628                                 depends_on);
3629   *symbol_present = false;
3630
3631   if (operand_equal_p (e1, e2, 0))
3632     {
3633       *var_present = false;
3634       return 0;
3635     }
3636   *var_present = true;
3637   if (zero_p (e2))
3638     return force_var_cost (data, e1, depends_on);
3639
3640   if (zero_p (e1))
3641     {
3642       cost = force_var_cost (data, e2, depends_on);
3643       cost += multiply_by_cost (-1, mode);
3644
3645       return cost;
3646     }
3647
3648   cost = force_var_cost (data, e1, depends_on);
3649   cost += force_var_cost (data, e2, depends_on);
3650   cost += add_cost (mode);
3651
3652   return cost;
3653 }
3654
3655 /* Determines the cost of the computation by that USE is expressed
3656    from induction variable CAND.  If ADDRESS_P is true, we just need
3657    to create an address from it, otherwise we want to get it into
3658    register.  A set of invariants we depend on is stored in
3659    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3660
3661 static unsigned
3662 get_computation_cost_at (struct ivopts_data *data,
3663                          struct iv_use *use, struct iv_cand *cand,
3664                          bool address_p, bitmap *depends_on, tree at)
3665 {
3666   tree ubase = use->iv->base, ustep = use->iv->step;
3667   tree cbase, cstep;
3668   tree utype = TREE_TYPE (ubase), ctype;
3669   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3670   HOST_WIDE_INT ratio, aratio;
3671   bool var_present, symbol_present;
3672   unsigned cost = 0, n_sums;
3673
3674   *depends_on = NULL;
3675
3676   /* Only consider real candidates.  */
3677   if (!cand->iv)
3678     return INFTY;
3679
3680   cbase = cand->iv->base;
3681   cstep = cand->iv->step;
3682   ctype = TREE_TYPE (cbase);
3683
3684   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3685     {
3686       /* We do not have a precision to express the values of use.  */
3687       return INFTY;
3688     }
3689
3690   if (address_p)
3691     {
3692       /* Do not try to express address of an object with computation based
3693          on address of a different object.  This may cause problems in rtl
3694          level alias analysis (that does not expect this to be happening,
3695          as this is illegal in C), and would be unlikely to be useful
3696          anyway.  */
3697       if (use->iv->base_object
3698           && cand->iv->base_object
3699           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3700         return INFTY;
3701     }
3702
3703   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3704     {
3705       /* TODO -- add direct handling of this case.  */
3706       goto fallback;
3707     }
3708
3709   /* CSTEPI is removed from the offset in case statement is after the
3710      increment.  If the step is not constant, we use zero instead.
3711      This is a bit imprecise (there is the extra addition), but
3712      redundancy elimination is likely to transform the code so that
3713      it uses value of the variable before increment anyway,
3714      so it is not that much unrealistic.  */
3715   if (cst_and_fits_in_hwi (cstep))
3716     cstepi = int_cst_value (cstep);
3717   else
3718     cstepi = 0;
3719
3720   if (cst_and_fits_in_hwi (ustep)
3721       && cst_and_fits_in_hwi (cstep))
3722     {
3723       ustepi = int_cst_value (ustep);
3724
3725       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3726         return INFTY;
3727     }
3728   else
3729     {
3730       tree rat;
3731
3732       rat = constant_multiple_of (utype, ustep, cstep);
3733
3734       if (!rat)
3735         return INFTY;
3736
3737       if (cst_and_fits_in_hwi (rat))
3738         ratio = int_cst_value (rat);
3739       else if (integer_onep (rat))
3740         ratio = 1;
3741       else if (integer_all_onesp (rat))
3742         ratio = -1;
3743       else
3744         return INFTY;
3745     }
3746
3747   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3748      or ratio == 1, it is better to handle this like
3749
3750      ubase - ratio * cbase + ratio * var
3751
3752      (also holds in the case ratio == -1, TODO.  */
3753
3754   if (cst_and_fits_in_hwi (cbase))
3755     {
3756       offset = - ratio * int_cst_value (cbase);
3757       cost += difference_cost (data,
3758                                ubase, integer_zero_node,
3759                                &symbol_present, &var_present, &offset,
3760                                depends_on);
3761     }
3762   else if (ratio == 1)
3763     {
3764       cost += difference_cost (data,
3765                                ubase, cbase,
3766                                &symbol_present, &var_present, &offset,
3767                                depends_on);
3768     }
3769   else
3770     {
3771       cost += force_var_cost (data, cbase, depends_on);
3772       cost += add_cost (TYPE_MODE (ctype));
3773       cost += difference_cost (data,
3774                                ubase, integer_zero_node,
3775                                &symbol_present, &var_present, &offset,
3776                                depends_on);
3777     }
3778
3779   /* If we are after the increment, the value of the candidate is higher by
3780      one iteration.  */
3781   if (stmt_after_increment (data->current_loop, cand, at))
3782     offset -= ratio * cstepi;
3783
3784   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3785      (symbol/var/const parts may be omitted).  If we are looking for an address,
3786      find the cost of addressing this.  */
3787   if (address_p)
3788     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3789
3790   /* Otherwise estimate the costs for computing the expression.  */
3791   aratio = ratio > 0 ? ratio : -ratio;
3792   if (!symbol_present && !var_present && !offset)
3793     {
3794       if (ratio != 1)
3795         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3796
3797       return cost;
3798     }
3799
3800   if (aratio != 1)
3801     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3802
3803   n_sums = 1;
3804   if (var_present
3805       /* Symbol + offset should be compile-time computable.  */
3806       && (symbol_present || offset))
3807     n_sums++;
3808
3809   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3810
3811 fallback:
3812   {
3813     /* Just get the expression, expand it and measure the cost.  */
3814     tree comp = get_computation_at (data->current_loop, use, cand, at);
3815
3816     if (!comp)
3817       return INFTY;
3818
3819     if (address_p)
3820       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3821
3822     return computation_cost (comp);
3823   }
3824 }
3825
3826 /* Determines the cost of the computation by that USE is expressed
3827    from induction variable CAND.  If ADDRESS_P is true, we just need
3828    to create an address from it, otherwise we want to get it into
3829    register.  A set of invariants we depend on is stored in
3830    DEPENDS_ON.  */
3831
3832 static unsigned
3833 get_computation_cost (struct ivopts_data *data,
3834                       struct iv_use *use, struct iv_cand *cand,
3835                       bool address_p, bitmap *depends_on)
3836 {
3837   return get_computation_cost_at (data,
3838                                   use, cand, address_p, depends_on, use->stmt);
3839 }
3840
3841 /* Determines cost of basing replacement of USE on CAND in a generic
3842    expression.  */
3843
3844 static bool
3845 determine_use_iv_cost_generic (struct ivopts_data *data,
3846                                struct iv_use *use, struct iv_cand *cand)
3847 {
3848   bitmap depends_on;
3849   unsigned cost;
3850
3851   /* The simple case first -- if we need to express value of the preserved
3852      original biv, the cost is 0.  This also prevents us from counting the
3853      cost of increment twice -- once at this use and once in the cost of
3854      the candidate.  */
3855   if (cand->pos == IP_ORIGINAL
3856       && cand->incremented_at == use->stmt)
3857     {
3858       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3859       return true;
3860     }
3861
3862   cost = get_computation_cost (data, use, cand, false, &depends_on);
3863   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3864
3865   return cost != INFTY;
3866 }
3867
3868 /* Determines cost of basing replacement of USE on CAND in an address.  */
3869
3870 static bool
3871 determine_use_iv_cost_address (struct ivopts_data *data,
3872                                struct iv_use *use, struct iv_cand *cand)
3873 {
3874   bitmap depends_on;
3875   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3876
3877   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3878
3879   return cost != INFTY;
3880 }
3881
3882 /* Computes value of induction variable IV in iteration NITER.  */
3883
3884 static tree
3885 iv_value (struct iv *iv, tree niter)
3886 {
3887   tree val;
3888   tree type = TREE_TYPE (iv->base);
3889
3890   niter = fold_convert (type, niter);
3891   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3892
3893   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3894 }
3895
3896 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3897
3898 static tree
3899 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3900 {
3901   tree val = iv_value (cand->iv, niter);
3902   tree type = TREE_TYPE (cand->iv->base);
3903
3904   if (stmt_after_increment (loop, cand, at))
3905     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3906
3907   return val;
3908 }
3909
3910 /* Returns period of induction variable iv.  */
3911
3912 static tree
3913 iv_period (struct iv *iv)
3914 {
3915   tree step = iv->step, period, type;
3916   tree pow2div;
3917
3918   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3919
3920   /* Period of the iv is gcd (step, type range).  Since type range is power
3921      of two, it suffices to determine the maximum power of two that divides
3922      step.  */
3923   pow2div = num_ending_zeros (step);
3924   type = unsigned_type_for (TREE_TYPE (step));
3925
3926   period = build_low_bits_mask (type,
3927                                 (TYPE_PRECISION (type)
3928                                  - tree_low_cst (pow2div, 1)));
3929
3930   return period;
3931 }
3932
3933 /* Returns the comparison operator used when eliminating the iv USE.  */
3934
3935 static enum tree_code
3936 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3937 {
3938   struct loop *loop = data->current_loop;
3939   basic_block ex_bb;
3940   edge exit;
3941
3942   ex_bb = bb_for_stmt (use->stmt);
3943   exit = EDGE_SUCC (ex_bb, 0);
3944   if (flow_bb_inside_loop_p (loop, exit->dest))
3945     exit = EDGE_SUCC (ex_bb, 1);
3946
3947   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3948 }
3949
3950 /* Check whether it is possible to express the condition in USE by comparison
3951    of candidate CAND.  If so, store the value compared with to BOUND.  */
3952
3953 static bool
3954 may_eliminate_iv (struct ivopts_data *data,
3955                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3956 {
3957   basic_block ex_bb;
3958   edge exit;
3959   struct tree_niter_desc *niter;
3960   tree nit, nit_type;
3961   tree wider_type, period, per_type;
3962   struct loop *loop = data->current_loop;
3963
3964   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3965     return false;
3966
3967   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3968      for other conditions inside loop body.  */
3969   ex_bb = bb_for_stmt (use->stmt);
3970   if (use->stmt != last_stmt (ex_bb)
3971       || TREE_CODE (use->stmt) != COND_EXPR)
3972     return false;
3973   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3974     return false;
3975
3976   exit = EDGE_SUCC (ex_bb, 0);
3977   if (flow_bb_inside_loop_p (loop, exit->dest))
3978     exit = EDGE_SUCC (ex_bb, 1);
3979   if (flow_bb_inside_loop_p (loop, exit->dest))
3980     return false;
3981
3982   niter = niter_for_exit (data, exit);
3983   if (!niter
3984       || !zero_p (niter->may_be_zero))
3985     return false;
3986
3987   nit = niter->niter;
3988   nit_type = TREE_TYPE (nit);
3989
3990   /* Determine whether we may use the variable to test whether niter iterations
3991      elapsed.  This is the case iff the period of the induction variable is
3992      greater than the number of iterations.  */
3993   period = iv_period (cand->iv);
3994   if (!period)
3995     return false;
3996   per_type = TREE_TYPE (period);
3997
3998   wider_type = TREE_TYPE (period);
3999   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4000     wider_type = per_type;
4001   else
4002     wider_type = nit_type;
4003
4004   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
4005                                        fold_convert (wider_type, period),
4006                                        fold_convert (wider_type, nit)))))
4007     return false;
4008
4009   *bound = cand_value_at (loop, cand, use->stmt, nit);
4010   return true;
4011 }
4012
4013 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4014
4015 static bool
4016 determine_use_iv_cost_condition (struct ivopts_data *data,
4017                                  struct iv_use *use, struct iv_cand *cand)
4018 {
4019   tree bound = NULL_TREE, op, cond;
4020   bitmap depends_on = NULL;
4021   unsigned cost;
4022
4023   /* Only consider real candidates.  */
4024   if (!cand->iv)
4025     {
4026       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4027       return false;
4028     }
4029
4030   if (may_eliminate_iv (data, use, cand, &bound))
4031     {
4032       cost = force_var_cost (data, bound, &depends_on);
4033
4034       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4035       return cost != INFTY;
4036     }
4037
4038   /* The induction variable elimination failed; just express the original
4039      giv.  If it is compared with an invariant, note that we cannot get
4040      rid of it.  */
4041   cost = get_computation_cost (data, use, cand, false, &depends_on);
4042
4043   cond = *use->op_p;
4044   if (TREE_CODE (cond) != SSA_NAME)
4045     {
4046       op = TREE_OPERAND (cond, 0);
4047       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4048         op = TREE_OPERAND (cond, 1);
4049       if (TREE_CODE (op) == SSA_NAME)
4050         {
4051           op = get_iv (data, op)->base;
4052           fd_ivopts_data = data;
4053           walk_tree (&op, find_depends, &depends_on, NULL);
4054         }
4055     }
4056
4057   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4058   return cost != INFTY;
4059 }
4060
4061 /* Checks whether it is possible to replace the final value of USE by
4062    a direct computation.  If so, the formula is stored to *VALUE.  */
4063
4064 static bool
4065 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4066                          tree *value)
4067 {
4068   struct loop *loop = data->current_loop;
4069   edge exit;
4070   struct tree_niter_desc *niter;
4071
4072   exit = single_dom_exit (loop);
4073   if (!exit)
4074     return false;
4075
4076   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4077                               bb_for_stmt (use->stmt)));
4078
4079   niter = niter_for_single_dom_exit (data);
4080   if (!niter
4081       || !zero_p (niter->may_be_zero))
4082     return false;
4083
4084   *value = iv_value (use->iv, niter->niter);
4085
4086   return true;
4087 }
4088
4089 /* Determines cost of replacing final value of USE using CAND.  */
4090
4091 static bool
4092 determine_use_iv_cost_outer (struct ivopts_data *data,
4093                              struct iv_use *use, struct iv_cand *cand)
4094 {
4095   bitmap depends_on;
4096   unsigned cost;
4097   edge exit;
4098   tree value = NULL_TREE;
4099   struct loop *loop = data->current_loop;
4100
4101   /* The simple case first -- if we need to express value of the preserved
4102      original biv, the cost is 0.  This also prevents us from counting the
4103      cost of increment twice -- once at this use and once in the cost of
4104      the candidate.  */
4105   if (cand->pos == IP_ORIGINAL
4106       && cand->incremented_at == use->stmt)
4107     {
4108       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4109       return true;
4110     }
4111
4112   if (!cand->iv)
4113     {
4114       if (!may_replace_final_value (data, use, &value))
4115         {
4116           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4117           return false;
4118         }
4119
4120       depends_on = NULL;
4121       cost = force_var_cost (data, value, &depends_on);
4122
4123       cost /= AVG_LOOP_NITER (loop);
4124
4125       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4126       return cost != INFTY;
4127     }
4128
4129   exit = single_dom_exit (loop);
4130   if (exit)
4131     {
4132       /* If there is just a single exit, we may use value of the candidate
4133          after we take it to determine the value of use.  */
4134       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4135                                       last_stmt (exit->src));
4136       if (cost != INFTY)
4137         cost /= AVG_LOOP_NITER (loop);
4138     }
4139   else
4140     {
4141       /* Otherwise we just need to compute the iv.  */
4142       cost = get_computation_cost (data, use, cand, false, &depends_on);
4143     }
4144
4145   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4146
4147   return cost != INFTY;
4148 }
4149
4150 /* Determines cost of basing replacement of USE on CAND.  Returns false
4151    if USE cannot be based on CAND.  */
4152
4153 static bool
4154 determine_use_iv_cost (struct ivopts_data *data,
4155                        struct iv_use *use, struct iv_cand *cand)
4156 {
4157   switch (use->type)
4158     {
4159     case USE_NONLINEAR_EXPR:
4160       return determine_use_iv_cost_generic (data, use, cand);
4161
4162     case USE_OUTER:
4163       return determine_use_iv_cost_outer (data, use, cand);
4164
4165     case USE_ADDRESS:
4166       return determine_use_iv_cost_address (data, use, cand);
4167
4168     case USE_COMPARE:
4169       return determine_use_iv_cost_condition (data, use, cand);
4170
4171     default:
4172       gcc_unreachable ();
4173     }
4174 }
4175
4176 /* Determines costs of basing the use of the iv on an iv candidate.  */
4177
4178 static void
4179 determine_use_iv_costs (struct ivopts_data *data)
4180 {
4181   unsigned i, j;
4182   struct iv_use *use;
4183   struct iv_cand *cand;
4184   bitmap to_clear = BITMAP_ALLOC (NULL);
4185
4186   alloc_use_cost_map (data);
4187
4188   for (i = 0; i < n_iv_uses (data); i++)
4189     {
4190       use = iv_use (data, i);
4191
4192       if (data->consider_all_candidates)
4193         {
4194           for (j = 0; j < n_iv_cands (data); j++)
4195             {
4196               cand = iv_cand (data, j);
4197               determine_use_iv_cost (data, use, cand);
4198             }
4199         }
4200       else
4201         {
4202           bitmap_iterator bi;
4203
4204           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4205             {
4206               cand = iv_cand (data, j);
4207               if (!determine_use_iv_cost (data, use, cand))
4208                 bitmap_set_bit (to_clear, j);
4209             }
4210
4211           /* Remove the candidates for that the cost is infinite from
4212              the list of related candidates.  */
4213           bitmap_and_compl_into (use->related_cands, to_clear);
4214           bitmap_clear (to_clear);
4215         }
4216     }
4217
4218   BITMAP_FREE (to_clear);
4219
4220   if (dump_file && (dump_flags & TDF_DETAILS))
4221     {
4222       fprintf (dump_file, "Use-candidate costs:\n");
4223
4224       for (i = 0; i < n_iv_uses (data); i++)
4225         {
4226           use = iv_use (data, i);
4227
4228           fprintf (dump_file, "Use %d:\n", i);
4229           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4230           for (j = 0; j < use->n_map_members; j++)
4231             {
4232               if (!use->cost_map[j].cand
4233                   || use->cost_map[j].cost == INFTY)
4234                 continue;
4235
4236               fprintf (dump_file, "  %d\t%d\t",
4237                        use->cost_map[j].cand->id,
4238                        use->cost_map[j].cost);
4239               if (use->cost_map[j].depends_on)
4240                 bitmap_print (dump_file,
4241                               use->cost_map[j].depends_on, "","");
4242               fprintf (dump_file, "\n");
4243             }
4244
4245           fprintf (dump_file, "\n");
4246         }
4247       fprintf (dump_file, "\n");
4248     }
4249 }
4250
4251 /* Determines cost of the candidate CAND.  */
4252
4253 static void
4254 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4255 {
4256   unsigned cost_base, cost_step;
4257   tree base;
4258
4259   if (!cand->iv)
4260     {
4261       cand->cost = 0;
4262       return;
4263     }
4264
4265   /* There are two costs associated with the candidate -- its increment
4266      and its initialization.  The second is almost negligible for any loop
4267      that rolls enough, so we take it just very little into account.  */
4268
4269   base = cand->iv->base;
4270   cost_base = force_var_cost (data, base, NULL);
4271   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4272
4273   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4274
4275   /* Prefer the original iv unless we may gain something by replacing it;
4276      this is not really relevant for artificial ivs created by other
4277      passes.  */
4278   if (cand->pos == IP_ORIGINAL
4279       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4280     cand->cost--;
4281
4282   /* Prefer not to insert statements into latch unless there are some
4283      already (so that we do not create unnecessary jumps).  */
4284   if (cand->pos == IP_END
4285       && empty_block_p (ip_end_pos (data->current_loop)))
4286     cand->cost++;
4287 }
4288
4289 /* Determines costs of computation of the candidates.  */
4290
4291 static void
4292 determine_iv_costs (struct ivopts_data *data)
4293 {
4294   unsigned i;
4295
4296   if (dump_file && (dump_flags & TDF_DETAILS))
4297     {
4298       fprintf (dump_file, "Candidate costs:\n");
4299       fprintf (dump_file, "  cand\tcost\n");
4300     }
4301
4302   for (i = 0; i < n_iv_cands (data); i++)
4303     {
4304       struct iv_cand *cand = iv_cand (data, i);
4305
4306       determine_iv_cost (data, cand);
4307
4308       if (dump_file && (dump_flags & TDF_DETAILS))
4309         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4310     }
4311
4312 if (dump_file && (dump_flags & TDF_DETAILS))
4313       fprintf (dump_file, "\n");
4314 }
4315
4316 /* Calculates cost for having SIZE induction variables.  */
4317
4318 static unsigned
4319 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4320 {
4321   return global_cost_for_size (size,
4322                                loop_data (data->current_loop)->regs_used,
4323                                n_iv_uses (data));
4324 }
4325
4326 /* For each size of the induction variable set determine the penalty.  */
4327
4328 static void
4329 determine_set_costs (struct ivopts_data *data)
4330 {
4331   unsigned j, n;
4332   tree phi, op;
4333   struct loop *loop = data->current_loop;
4334   bitmap_iterator bi;
4335
4336   /* We use the following model (definitely improvable, especially the
4337      cost function -- TODO):
4338
4339      We estimate the number of registers available (using MD data), name it A.
4340
4341      We estimate the number of registers used by the loop, name it U.  This
4342      number is obtained as the number of loop phi nodes (not counting virtual
4343      registers and bivs) + the number of variables from outside of the loop.
4344
4345      We set a reserve R (free regs that are used for temporary computations,
4346      etc.).  For now the reserve is a constant 3.
4347
4348      Let I be the number of induction variables.
4349
4350      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4351         make a lot of ivs without a reason).
4352      -- if A - R < U + I <= A, the cost is I * PRES_COST
4353      -- if U + I > A, the cost is I * PRES_COST and
4354         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4355
4356   if (dump_file && (dump_flags & TDF_DETAILS))
4357     {
4358       fprintf (dump_file, "Global costs:\n");
4359       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4360       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4361       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4362       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4363     }
4364
4365   n = 0;
4366   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4367     {
4368       op = PHI_RESULT (phi);
4369
4370       if (!is_gimple_reg (op))
4371         continue;
4372
4373       if (get_iv (data, op))
4374         continue;
4375
4376       n++;
4377     }
4378
4379   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4380     {
4381       struct version_info *info = ver_info (data, j);
4382
4383       if (info->inv_id && info->has_nonlin_use)
4384         n++;
4385     }
4386
4387   loop_data (loop)->regs_used = n;
4388   if (dump_file && (dump_flags & TDF_DETAILS))
4389     fprintf (dump_file, "  regs_used %d\n", n);
4390
4391   if (dump_file && (dump_flags & TDF_DETAILS))
4392     {
4393       fprintf (dump_file, "  cost for size:\n");
4394       fprintf (dump_file, "  ivs\tcost\n");
4395       for (j = 0; j <= 2 * target_avail_regs; j++)
4396         fprintf (dump_file, "  %d\t%d\n", j,
4397                  ivopts_global_cost_for_size (data, j));
4398       fprintf (dump_file, "\n");
4399     }
4400 }
4401
4402 /* Returns true if A is a cheaper cost pair than B.  */
4403
4404 static bool
4405 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4406 {
4407   if (!a)
4408     return false;
4409
4410   if (!b)
4411     return true;
4412
4413   if (a->cost < b->cost)
4414     return true;
4415
4416   if (a->cost > b->cost)
4417     return false;
4418
4419   /* In case the costs are the same, prefer the cheaper candidate.  */
4420   if (a->cand->cost < b->cand->cost)
4421     return true;
4422
4423   return false;
4424 }
4425
4426 /* Computes the cost field of IVS structure.  */
4427
4428 static void
4429 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4430 {
4431   unsigned cost = 0;
4432
4433   cost += ivs->cand_use_cost;
4434   cost += ivs->cand_cost;
4435   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4436
4437   ivs->cost = cost;
4438 }
4439
4440 /* Remove invariants in set INVS to set IVS.  */
4441
4442 static void
4443 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4444 {
4445   bitmap_iterator bi;
4446   unsigned iid;
4447
4448   if (!invs)
4449     return;
4450
4451   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4452     {
4453       ivs->n_invariant_uses[iid]--;
4454       if (ivs->n_invariant_uses[iid] == 0)
4455         ivs->n_regs--;
4456     }
4457 }
4458
4459 /* Set USE not to be expressed by any candidate in IVS.  */
4460
4461 static void
4462 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4463                  struct iv_use *use)
4464 {
4465   unsigned uid = use->id, cid;
4466   struct cost_pair *cp;
4467
4468   cp = ivs->cand_for_use[uid];
4469   if (!cp)
4470     return;
4471   cid = cp->cand->id;
4472
4473   ivs->bad_uses++;
4474   ivs->cand_for_use[uid] = NULL;
4475   ivs->n_cand_uses[cid]--;
4476
4477   if (ivs->n_cand_uses[cid] == 0)
4478     {
4479       bitmap_clear_bit (ivs->cands, cid);
4480       /* Do not count the pseudocandidates.  */
4481       if (cp->cand->iv)
4482         ivs->n_regs--;
4483       ivs->n_cands--;
4484       ivs->cand_cost -= cp->cand->cost;
4485
4486       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4487     }
4488
4489   ivs->cand_use_cost -= cp->cost;
4490
4491   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4492   iv_ca_recount_cost (data, ivs);
4493 }
4494
4495 /* Add invariants in set INVS to set IVS.  */
4496
4497 static void
4498 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4499 {
4500   bitmap_iterator bi;
4501   unsigned iid;
4502
4503   if (!invs)
4504     return;
4505
4506   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4507     {
4508       ivs->n_invariant_uses[iid]++;
4509       if (ivs->n_invariant_uses[iid] == 1)
4510         ivs->n_regs++;
4511     }
4512 }
4513
4514 /* Set cost pair for USE in set IVS to CP.  */
4515
4516 static void
4517 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4518               struct iv_use *use, struct cost_pair *cp)
4519 {
4520   unsigned uid = use->id, cid;
4521
4522   if (ivs->cand_for_use[uid] == cp)
4523     return;
4524
4525   if (ivs->cand_for_use[uid])
4526     iv_ca_set_no_cp (data, ivs, use);
4527
4528   if (cp)
4529     {
4530       cid = cp->cand->id;
4531
4532       ivs->bad_uses--;
4533       ivs->cand_for_use[uid] = cp;
4534       ivs->n_cand_uses[cid]++;
4535       if (ivs->n_cand_uses[cid] == 1)
4536         {
4537           bitmap_set_bit (ivs->cands, cid);
4538           /* Do not count the pseudocandidates.  */
4539           if (cp->cand->iv)
4540             ivs->n_regs++;
4541           ivs->n_cands++;
4542           ivs->cand_cost += cp->cand->cost;
4543
4544           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4545         }
4546
4547       ivs->cand_use_cost += cp->cost;
4548       iv_ca_set_add_invariants (ivs, cp->depends_on);
4549       iv_ca_recount_cost (data, ivs);
4550     }
4551 }
4552
4553 /* Extend set IVS by expressing USE by some of the candidates in it
4554    if possible.  */
4555
4556 static void
4557 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4558                struct iv_use *use)
4559 {
4560   struct cost_pair *best_cp = NULL, *cp;
4561   bitmap_iterator bi;
4562   unsigned i;
4563
4564   gcc_assert (ivs->upto >= use->id);
4565
4566   if (ivs->upto == use->id)
4567     {
4568       ivs->upto++;
4569       ivs->bad_uses++;
4570     }
4571
4572   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4573     {
4574       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4575
4576       if (cheaper_cost_pair (cp, best_cp))
4577         best_cp = cp;
4578     }
4579
4580   iv_ca_set_cp (data, ivs, use, best_cp);
4581 }
4582
4583 /* Get cost for assignment IVS.  */
4584
4585 static unsigned
4586 iv_ca_cost (struct iv_ca *ivs)
4587 {
4588   return (ivs->bad_uses ? INFTY : ivs->cost);
4589 }
4590
4591 /* Returns true if all dependences of CP are among invariants in IVS.  */
4592
4593 static bool
4594 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4595 {
4596   unsigned i;
4597   bitmap_iterator bi;
4598
4599   if (!cp->depends_on)
4600     return true;
4601
4602   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4603     {
4604       if (ivs->n_invariant_uses[i] == 0)
4605         return false;
4606     }
4607
4608   return true;
4609 }
4610
4611 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4612    it before NEXT_CHANGE.  */
4613
4614 static struct iv_ca_delta *
4615 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4616                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4617 {
4618   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4619
4620   change->use = use;
4621   change->old_cp = old_cp;
4622   change->new_cp = new_cp;
4623   change->next_change = next_change;
4624
4625   return change;
4626 }
4627
4628 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4629    are rewritten.  */
4630
4631 static struct iv_ca_delta *
4632 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4633 {
4634   struct iv_ca_delta *last;
4635
4636   if (!l2)
4637     return l1;
4638
4639   if (!l1)
4640     return l2;
4641
4642   for (last = l1; last->next_change; last = last->next_change)
4643     continue;
4644   last->next_change = l2;
4645
4646   return l1;
4647 }
4648
4649 /* Returns candidate by that USE is expressed in IVS.  */
4650
4651 static struct cost_pair *
4652 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4653 {
4654   return ivs->cand_for_use[use->id];
4655 }
4656
4657 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4658
4659 static struct iv_ca_delta *
4660 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4661 {
4662   struct iv_ca_delta *act, *next, *prev = NULL;
4663   struct cost_pair *tmp;
4664
4665   for (act = delta; act; act = next)
4666     {
4667       next = act->next_change;
4668       act->next_change = prev;
4669       prev = act;
4670
4671       tmp = act->old_cp;
4672       act->old_cp = act->new_cp;
4673       act->new_cp = tmp;
4674     }
4675
4676   return prev;
4677 }
4678
4679 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4680    reverted instead.  */
4681
4682 static void
4683 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4684                     struct iv_ca_delta *delta, bool forward)
4685 {
4686   struct cost_pair *from, *to;
4687   struct iv_ca_delta *act;
4688
4689   if (!forward)
4690     delta = iv_ca_delta_reverse (delta);
4691
4692   for (act = delta; act; act = act->next_change)
4693     {
4694       from = act->old_cp;
4695       to = act->new_cp;
4696       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4697       iv_ca_set_cp (data, ivs, act->use, to);
4698     }
4699
4700   if (!forward)
4701     iv_ca_delta_reverse (delta);
4702 }
4703
4704 /* Returns true if CAND is used in IVS.  */
4705
4706 static bool
4707 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4708 {
4709   return ivs->n_cand_uses[cand->id] > 0;
4710 }
4711
4712 /* Returns number of induction variable candidates in the set IVS.  */
4713
4714 static unsigned
4715 iv_ca_n_cands (struct iv_ca *ivs)
4716 {
4717   return ivs->n_cands;
4718 }
4719
4720 /* Free the list of changes DELTA.  */
4721
4722 static void
4723 iv_ca_delta_free (struct iv_ca_delta **delta)
4724 {
4725   struct iv_ca_delta *act, *next;
4726
4727   for (act = *delta; act; act = next)
4728     {
4729       next = act->next_change;
4730       free (act);
4731     }
4732
4733   *delta = NULL;
4734 }
4735
4736 /* Allocates new iv candidates assignment.  */
4737
4738 static struct iv_ca *
4739 iv_ca_new (struct ivopts_data *data)
4740 {
4741   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4742
4743   nw->upto = 0;
4744   nw->bad_uses = 0;
4745   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4746   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4747   nw->cands = BITMAP_ALLOC (NULL);
4748   nw->n_cands = 0;
4749   nw->n_regs = 0;
4750   nw->cand_use_cost = 0;
4751   nw->cand_cost = 0;
4752   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4753   nw->cost = 0;
4754
4755   return nw;
4756 }
4757
4758 /* Free memory occupied by the set IVS.  */
4759
4760 static void
4761 iv_ca_free (struct iv_ca **ivs)
4762 {
4763   free ((*ivs)->cand_for_use);
4764   free ((*ivs)->n_cand_uses);
4765   BITMAP_FREE ((*ivs)->cands);
4766   free ((*ivs)->n_invariant_uses);
4767   free (*ivs);
4768   *ivs = NULL;
4769 }
4770
4771 /* Dumps IVS to FILE.  */
4772
4773 static void
4774 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4775 {
4776   const char *pref = "  invariants ";
4777   unsigned i;
4778
4779   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4780   bitmap_print (file, ivs->cands, "  candidates ","\n");
4781
4782   for (i = 1; i <= data->max_inv_id; i++)
4783     if (ivs->n_invariant_uses[i])
4784       {
4785         fprintf (file, "%s%d", pref, i);
4786         pref = ", ";
4787       }
4788   fprintf (file, "\n");
4789 }
4790
4791 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4792    new set, and store differences in DELTA.  Number of induction variables
4793    in the new set is stored to N_IVS.  */
4794
4795 static unsigned
4796 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4797               struct iv_cand *cand, struct iv_ca_delta **delta,
4798               unsigned *n_ivs)
4799 {
4800   unsigned i, cost;
4801   struct iv_use *use;
4802   struct cost_pair *old_cp, *new_cp;
4803
4804   *delta = NULL;
4805   for (i = 0; i < ivs->upto; i++)
4806     {
4807       use = iv_use (data, i);
4808       old_cp = iv_ca_cand_for_use (ivs, use);
4809
4810       if (old_cp
4811           && old_cp->cand == cand)
4812         continue;
4813
4814       new_cp = get_use_iv_cost (data, use, cand);
4815       if (!new_cp)
4816         continue;
4817
4818       if (!iv_ca_has_deps (ivs, new_cp))
4819         continue;
4820
4821       if (!cheaper_cost_pair (new_cp, old_cp))
4822         continue;
4823
4824       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4825     }
4826
4827   iv_ca_delta_commit (data, ivs, *delta, true);
4828   cost = iv_ca_cost (ivs);
4829   if (n_ivs)
4830     *n_ivs = iv_ca_n_cands (ivs);
4831   iv_ca_delta_commit (data, ivs, *delta, false);
4832
4833   return cost;
4834 }
4835
4836 /* Try narrowing set IVS by removing CAND.  Return the cost of
4837    the new set and store the differences in DELTA.  */
4838
4839 static unsigned
4840 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4841               struct iv_cand *cand, struct iv_ca_delta **delta)
4842 {
4843   unsigned i, ci;
4844   struct iv_use *use;
4845   struct cost_pair *old_cp, *new_cp, *cp;
4846   bitmap_iterator bi;
4847   struct iv_cand *cnd;
4848   unsigned cost;
4849
4850   *delta = NULL;
4851   for (i = 0; i < n_iv_uses (data); i++)
4852     {
4853       use = iv_use (data, i);
4854
4855       old_cp = iv_ca_cand_for_use (ivs, use);
4856       if (old_cp->cand != cand)
4857         continue;
4858
4859       new_cp = NULL;
4860
4861       if (data->consider_all_candidates)
4862         {
4863           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4864             {
4865               if (ci == cand->id)
4866                 continue;
4867
4868               cnd = iv_cand (data, ci);
4869
4870               cp = get_use_iv_cost (data, use, cnd);
4871               if (!cp)
4872                 continue;
4873               if (!iv_ca_has_deps (ivs, cp))
4874                 continue;
4875
4876               if (!cheaper_cost_pair (cp, new_cp))
4877                 continue;
4878
4879               new_cp = cp;
4880             }
4881         }
4882       else
4883         {
4884           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4885             {
4886               if (ci == cand->id)
4887                 continue;
4888
4889               cnd = iv_cand (data, ci);
4890
4891               cp = get_use_iv_cost (data, use, cnd);
4892               if (!cp)
4893                 continue;
4894               if (!iv_ca_has_deps (ivs, cp))
4895                 continue;
4896
4897               if (!cheaper_cost_pair (cp, new_cp))
4898                 continue;
4899
4900               new_cp = cp;
4901             }
4902         }
4903
4904       if (!new_cp)
4905         {
4906           iv_ca_delta_free (delta);
4907           return INFTY;
4908         }
4909
4910       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4911     }
4912
4913   iv_ca_delta_commit (data, ivs, *delta, true);
4914   cost = iv_ca_cost (ivs);
4915   iv_ca_delta_commit (data, ivs, *delta, false);
4916
4917   return cost;
4918 }
4919
4920 /* Try optimizing the set of candidates IVS by removing candidates different
4921    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4922    differences in DELTA.  */
4923
4924 static unsigned
4925 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4926              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4927 {
4928   bitmap_iterator bi;
4929   struct iv_ca_delta *act_delta, *best_delta;
4930   unsigned i, best_cost, acost;
4931   struct iv_cand *cand;
4932
4933   best_delta = NULL;
4934   best_cost = iv_ca_cost (ivs);
4935
4936   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4937     {
4938       cand = iv_cand (data, i);
4939
4940       if (cand == except_cand)
4941         continue;
4942
4943       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4944
4945       if (acost < best_cost)
4946         {
4947           best_cost = acost;
4948           iv_ca_delta_free (&best_delta);
4949           best_delta = act_delta;
4950         }
4951       else
4952         iv_ca_delta_free (&act_delta);
4953     }
4954
4955   if (!best_delta)
4956     {
4957       *delta = NULL;
4958       return best_cost;
4959     }
4960
4961   /* Recurse to possibly remove other unnecessary ivs.  */
4962   iv_ca_delta_commit (data, ivs, best_delta, true);
4963   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4964   iv_ca_delta_commit (data, ivs, best_delta, false);
4965   *delta = iv_ca_delta_join (best_delta, *delta);
4966   return best_cost;
4967 }
4968
4969 /* Tries to extend the sets IVS in the best possible way in order
4970    to express the USE.  */
4971
4972 static bool
4973 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4974                   struct iv_use *use)
4975 {
4976   unsigned best_cost, act_cost;
4977   unsigned i;
4978   bitmap_iterator bi;
4979   struct iv_cand *cand;
4980   struct iv_ca_delta *best_delta = NULL, *act_delta;
4981   struct cost_pair *cp;
4982
4983   iv_ca_add_use (data, ivs, use);
4984   best_cost = iv_ca_cost (ivs);
4985
4986   cp = iv_ca_cand_for_use (ivs, use);
4987   if (cp)
4988     {
4989       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4990       iv_ca_set_no_cp (data, ivs, use);
4991     }
4992
4993   /* First try important candidates.  Only if it fails, try the specific ones.
4994      Rationale -- in loops with many variables the best choice often is to use
4995      just one generic biv.  If we added here many ivs specific to the uses,
4996      the optimization algorithm later would be likely to get stuck in a local
4997      minimum, thus causing us to create too many ivs.  The approach from
4998      few ivs to more seems more likely to be successful -- starting from few
4999      ivs, replacing an expensive use by a specific iv should always be a
5000      win.  */
5001   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5002     {
5003       cand = iv_cand (data, i);
5004
5005       if (iv_ca_cand_used_p (ivs, cand))
5006         continue;
5007
5008       cp = get_use_iv_cost (data, use, cand);
5009       if (!cp)
5010         continue;
5011
5012       iv_ca_set_cp (data, ivs, use, cp);
5013       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5014       iv_ca_set_no_cp (data, ivs, use);
5015       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5016
5017       if (act_cost < best_cost)
5018         {
5019           best_cost = act_cost;
5020
5021           iv_ca_delta_free (&best_delta);
5022           best_delta = act_delta;
5023         }
5024       else
5025         iv_ca_delta_free (&act_delta);
5026     }
5027
5028   if (best_cost == INFTY)
5029     {
5030       for (i = 0; i < use->n_map_members; i++)
5031         {
5032           cp = use->cost_map + i;
5033           cand = cp->cand;
5034           if (!cand)
5035             continue;
5036
5037           /* Already tried this.  */
5038           if (cand->important)
5039             continue;
5040
5041           if (iv_ca_cand_used_p (ivs, cand))
5042             continue;
5043
5044           act_delta = NULL;
5045           iv_ca_set_cp (data, ivs, use, cp);
5046           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5047           iv_ca_set_no_cp (data, ivs, use);
5048           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5049                                        cp, act_delta);
5050
5051           if (act_cost < best_cost)
5052             {
5053               best_cost = act_cost;
5054
5055               if (best_delta)
5056                 iv_ca_delta_free (&best_delta);
5057               best_delta = act_delta;
5058             }
5059           else
5060             iv_ca_delta_free (&act_delta);
5061         }
5062     }
5063
5064   iv_ca_delta_commit (data, ivs, best_delta, true);
5065   iv_ca_delta_free (&best_delta);
5066
5067   return (best_cost != INFTY);
5068 }
5069
5070 /* Finds an initial assignment of candidates to uses.  */
5071
5072 static struct iv_ca *
5073 get_initial_solution (struct ivopts_data *data)
5074 {
5075   struct iv_ca *ivs = iv_ca_new (data);
5076   unsigned i;
5077
5078   for (i = 0; i < n_iv_uses (data); i++)
5079     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5080       {
5081         iv_ca_free (&ivs);
5082         return NULL;
5083       }
5084
5085   return ivs;
5086 }
5087
5088 /* Tries to improve set of induction variables IVS.  */
5089
5090 static bool
5091 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5092 {
5093   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5094   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5095   struct iv_cand *cand;
5096
5097   /* Try extending the set of induction variables by one.  */
5098   for (i = 0; i < n_iv_cands (data); i++)
5099     {
5100       cand = iv_cand (data, i);
5101
5102       if (iv_ca_cand_used_p (ivs, cand))
5103         continue;
5104
5105       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5106       if (!act_delta)
5107         continue;
5108
5109       /* If we successfully added the candidate and the set is small enough,
5110          try optimizing it by removing other candidates.  */
5111       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5112         {
5113           iv_ca_delta_commit (data, ivs, act_delta, true);
5114           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5115           iv_ca_delta_commit (data, ivs, act_delta, false);
5116           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5117         }
5118
5119       if (acost < best_cost)
5120         {
5121           best_cost = acost;
5122           iv_ca_delta_free (&best_delta);
5123           best_delta = act_delta;
5124         }
5125       else
5126         iv_ca_delta_free (&act_delta);
5127     }
5128
5129   if (!best_delta)
5130     {
5131       /* Try removing the candidates from the set instead.  */
5132       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5133
5134       /* Nothing more we can do.  */
5135       if (!best_delta)
5136         return false;
5137     }
5138
5139   iv_ca_delta_commit (data, ivs, best_delta, true);
5140   gcc_assert (best_cost == iv_ca_cost (ivs));
5141   iv_ca_delta_free (&best_delta);
5142   return true;
5143 }
5144
5145 /* Attempts to find the optimal set of induction variables.  We do simple
5146    greedy heuristic -- we try to replace at most one candidate in the selected
5147    solution and remove the unused ivs while this improves the cost.  */
5148
5149 static struct iv_ca *
5150 find_optimal_iv_set (struct ivopts_data *data)
5151 {
5152   unsigned i;
5153   struct iv_ca *set;
5154   struct iv_use *use;
5155
5156   /* Get the initial solution.  */
5157   set = get_initial_solution (data);
5158   if (!set)
5159     {
5160       if (dump_file && (dump_flags & TDF_DETAILS))
5161         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5162       return NULL;
5163     }
5164
5165   if (dump_file && (dump_flags & TDF_DETAILS))
5166     {
5167       fprintf (dump_file, "Initial set of candidates:\n");
5168       iv_ca_dump (data, dump_file, set);
5169     }
5170
5171   while (try_improve_iv_set (data, set))
5172     {
5173       if (dump_file && (dump_flags & TDF_DETAILS))
5174         {
5175           fprintf (dump_file, "Improved to:\n");
5176           iv_ca_dump (data, dump_file, set);
5177         }
5178     }
5179
5180   if (dump_file && (dump_flags & TDF_DETAILS))
5181     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5182
5183   for (i = 0; i < n_iv_uses (data); i++)
5184     {
5185       use = iv_use (data, i);
5186       use->selected = iv_ca_cand_for_use (set, use)->cand;
5187     }
5188
5189   return set;
5190 }
5191
5192 /* Creates a new induction variable corresponding to CAND.  */
5193
5194 static void
5195 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5196 {
5197   block_stmt_iterator incr_pos;
5198   tree base;
5199   bool after = false;
5200
5201   if (!cand->iv)
5202     return;
5203
5204   switch (cand->pos)
5205     {
5206     case IP_NORMAL:
5207       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5208       break;
5209
5210     case IP_END:
5211       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5212       after = true;
5213       break;
5214
5215     case IP_ORIGINAL:
5216       /* Mark that the iv is preserved.  */
5217       name_info (data, cand->var_before)->preserve_biv = true;
5218       name_info (data, cand->var_after)->preserve_biv = true;
5219
5220       /* Rewrite the increment so that it uses var_before directly.  */
5221       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5222
5223       return;
5224     }
5225
5226   gimple_add_tmp_var (cand->var_before);
5227   add_referenced_tmp_var (cand->var_before);
5228
5229   base = unshare_expr (cand->iv->base);
5230
5231   create_iv (base, unshare_expr (cand->iv->step),
5232              cand->var_before, data->current_loop,
5233              &incr_pos, after, &cand->var_before, &cand->var_after);
5234 }
5235
5236 /* Creates new induction variables described in SET.  */
5237
5238 static void
5239 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5240 {
5241   unsigned i;
5242   struct iv_cand *cand;
5243   bitmap_iterator bi;
5244
5245   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5246     {
5247       cand = iv_cand (data, i);
5248       create_new_iv (data, cand);
5249     }
5250 }
5251
5252 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5253    is true, remove also the ssa name defined by the statement.  */
5254
5255 static void
5256 remove_statement (tree stmt, bool including_defined_name)
5257 {
5258   if (TREE_CODE (stmt) == PHI_NODE)
5259     {
5260       if (!including_defined_name)
5261         {
5262           /* Prevent the ssa name defined by the statement from being removed.  */
5263           SET_PHI_RESULT (stmt, NULL);
5264         }
5265       remove_phi_node (stmt, NULL_TREE);
5266     }
5267   else
5268     {
5269       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5270
5271       bsi_remove (&bsi);
5272     }
5273 }
5274
5275 /* Rewrites USE (definition of iv used in a nonlinear expression)
5276    using candidate CAND.  */
5277
5278 static void
5279 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5280                             struct iv_use *use, struct iv_cand *cand)
5281 {
5282   tree comp;
5283   tree op, stmts, tgt, ass;
5284   block_stmt_iterator bsi, pbsi;
5285
5286   /* An important special case -- if we are asked to express value of
5287      the original iv by itself, just exit; there is no need to
5288      introduce a new computation (that might also need casting the
5289      variable to unsigned and back).  */
5290   if (cand->pos == IP_ORIGINAL
5291       && TREE_CODE (use->stmt) == MODIFY_EXPR
5292       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5293     {
5294       op = TREE_OPERAND (use->stmt, 1);
5295
5296       /* Be a bit careful.  In case variable is expressed in some
5297          complicated way, rewrite it so that we may get rid of this
5298          complicated expression.  */
5299       if ((TREE_CODE (op) == PLUS_EXPR
5300            || TREE_CODE (op) == MINUS_EXPR)
5301           && TREE_OPERAND (op, 0) == cand->var_before
5302           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5303         return;
5304     }
5305
5306   comp = unshare_expr (get_computation (data->current_loop,
5307                                         use, cand));
5308   switch (TREE_CODE (use->stmt))
5309     {
5310     case PHI_NODE:
5311       tgt = PHI_RESULT (use->stmt);
5312
5313       /* If we should keep the biv, do not replace it.  */
5314       if (name_info (data, tgt)->preserve_biv)
5315         return;
5316
5317       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5318       while (!bsi_end_p (pbsi)
5319              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5320         {
5321           bsi = pbsi;
5322           bsi_next (&pbsi);
5323         }
5324       break;
5325
5326     case MODIFY_EXPR:
5327       tgt = TREE_OPERAND (use->stmt, 0);
5328       bsi = bsi_for_stmt (use->stmt);
5329       break;
5330
5331     default:
5332       gcc_unreachable ();
5333     }
5334
5335   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5336
5337   if (TREE_CODE (use->stmt) == PHI_NODE)
5338     {
5339       if (stmts)
5340         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5341       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5342       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5343       remove_statement (use->stmt, false);
5344       SSA_NAME_DEF_STMT (tgt) = ass;
5345     }
5346   else
5347     {
5348       if (stmts)
5349         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5350       TREE_OPERAND (use->stmt, 1) = op;
5351     }
5352 }
5353
5354 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5355    for_each_index.  */
5356
5357 static bool
5358 idx_remove_ssa_names (tree base, tree *idx,
5359                       void *data ATTRIBUTE_UNUSED)
5360 {
5361   tree *op;
5362
5363   if (TREE_CODE (*idx) == SSA_NAME)
5364     *idx = SSA_NAME_VAR (*idx);
5365
5366   if (TREE_CODE (base) == ARRAY_REF)
5367     {
5368       op = &TREE_OPERAND (base, 2);
5369       if (*op
5370           && TREE_CODE (*op) == SSA_NAME)
5371         *op = SSA_NAME_VAR (*op);
5372       op = &TREE_OPERAND (base, 3);
5373       if (*op
5374           && TREE_CODE (*op) == SSA_NAME)
5375         *op = SSA_NAME_VAR (*op);
5376     }
5377
5378   return true;
5379 }
5380
5381 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5382
5383 static tree
5384 unshare_and_remove_ssa_names (tree ref)
5385 {
5386   ref = unshare_expr (ref);
5387   for_each_index (&ref, idx_remove_ssa_names, NULL);
5388
5389   return ref;
5390 }
5391
5392 /* Rewrites base of memory access OP with expression WITH in statement
5393    pointed to by BSI.  */
5394
5395 static void
5396 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5397 {
5398   tree bvar, var, new_name, copy, name;
5399   tree orig;
5400
5401   var = bvar = get_base_address (*op);
5402
5403   if (!var || TREE_CODE (with) != SSA_NAME)
5404     goto do_rewrite;
5405
5406   gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5407   gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5408   if (TREE_CODE (var) == INDIRECT_REF)
5409     var = TREE_OPERAND (var, 0);
5410   if (TREE_CODE (var) == SSA_NAME)
5411     {
5412       name = var;
5413       var = SSA_NAME_VAR (var);
5414     }
5415   else if (DECL_P (var))
5416     name = NULL_TREE;
5417   else
5418     goto do_rewrite;
5419
5420   /* We need to add a memory tag for the variable.  But we do not want
5421      to add it to the temporary used for the computations, since this leads
5422      to problems in redundancy elimination when there are common parts
5423      in two computations referring to the different arrays.  So we copy
5424      the variable to a new temporary.  */
5425   copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
5426
5427   if (name)
5428     new_name = duplicate_ssa_name (name, copy);
5429   else
5430     {
5431       tree tag = var_ann (var)->type_mem_tag;
5432       tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5433       add_referenced_tmp_var (new_ptr);
5434       if (tag)
5435         var_ann (new_ptr)->type_mem_tag = tag;
5436       else
5437         add_type_alias (new_ptr, var);
5438       new_name = make_ssa_name (new_ptr, copy);
5439     }
5440
5441   TREE_OPERAND (copy, 0) = new_name;
5442   update_stmt (copy);
5443   bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5444   with = new_name;
5445
5446 do_rewrite:
5447
5448   orig = NULL_TREE;
5449   gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5450   gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5451
5452   if (TREE_CODE (*op) == INDIRECT_REF)
5453     orig = REF_ORIGINAL (*op);
5454   if (!orig)
5455     orig = unshare_and_remove_ssa_names (*op);
5456
5457   *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
5458
5459   /* Record the original reference, for purposes of alias analysis.  */
5460   REF_ORIGINAL (*op) = orig;
5461
5462   /* Virtual operands in the original statement may have to be renamed
5463      because of the replacement.  */
5464   mark_new_vars_to_rename (bsi_stmt (*bsi));
5465 }
5466
5467 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5468
5469 static void
5470 rewrite_use_address (struct ivopts_data *data,
5471                      struct iv_use *use, struct iv_cand *cand)
5472 {
5473   tree comp = unshare_expr (get_computation (data->current_loop,
5474                                              use, cand));
5475   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5476   tree stmts;
5477   tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5478
5479   if (stmts)
5480     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5481
5482   rewrite_address_base (&bsi, use->op_p, op);
5483 }
5484
5485 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5486    candidate CAND.  */
5487
5488 static void
5489 rewrite_use_compare (struct ivopts_data *data,
5490                      struct iv_use *use, struct iv_cand *cand)
5491 {
5492   tree comp;
5493   tree *op_p, cond, op, stmts, bound;
5494   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5495   enum tree_code compare;
5496   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5497
5498   bound = cp->value;
5499   if (bound)
5500     {
5501       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5502       tree var_type = TREE_TYPE (var);
5503
5504       compare = iv_elimination_compare (data, use);
5505       bound = fold_convert (var_type, bound);
5506       op = force_gimple_operand (unshare_expr (bound), &stmts,
5507                                  true, NULL_TREE);
5508
5509       if (stmts)
5510         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5511
5512       *use->op_p = build2 (compare, boolean_type_node, var, op);
5513       update_stmt (use->stmt);
5514       return;
5515     }
5516
5517   /* The induction variable elimination failed; just express the original
5518      giv.  */
5519   comp = unshare_expr (get_computation (data->current_loop, use, cand));
5520
5521   cond = *use->op_p;
5522   op_p = &TREE_OPERAND (cond, 0);
5523   if (TREE_CODE (*op_p) != SSA_NAME
5524       || zero_p (get_iv (data, *op_p)->step))
5525     op_p = &TREE_OPERAND (cond, 1);
5526
5527   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5528   if (stmts)
5529     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5530
5531   *op_p = op;
5532 }
5533
5534 /* Ensure that operand *OP_P may be used at the end of EXIT without
5535    violating loop closed ssa form.  */
5536
5537 static void
5538 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5539 {
5540   basic_block def_bb;
5541   struct loop *def_loop;
5542   tree phi, use;
5543
5544   use = USE_FROM_PTR (op_p);
5545   if (TREE_CODE (use) != SSA_NAME)
5546     return;
5547
5548   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5549   if (!def_bb)
5550     return;
5551
5552   def_loop = def_bb->loop_father;
5553   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5554     return;
5555
5556   /* Try finding a phi node that copies the value out of the loop.  */
5557   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5558     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5559       break;
5560
5561   if (!phi)
5562     {
5563       /* Create such a phi node.  */
5564       tree new_name = duplicate_ssa_name (use, NULL);
5565
5566       phi = create_phi_node (new_name, exit->dest);
5567       SSA_NAME_DEF_STMT (new_name) = phi;
5568       add_phi_arg (phi, use, exit);
5569     }
5570
5571   SET_USE (op_p, PHI_RESULT (phi));
5572 }
5573
5574 /* Ensure that operands of STMT may be used at the end of EXIT without
5575    violating loop closed ssa form.  */
5576
5577 static void
5578 protect_loop_closed_ssa_form (edge exit, tree stmt)
5579 {
5580   use_optype uses;
5581   vuse_optype vuses;
5582   v_may_def_optype v_may_defs;
5583   unsigned i;
5584
5585   uses = STMT_USE_OPS (stmt);
5586   for (i = 0; i < NUM_USES (uses); i++)
5587     protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5588
5589   vuses = STMT_VUSE_OPS (stmt);
5590   for (i = 0; i < NUM_VUSES (vuses); i++)
5591     protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5592
5593   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5594   for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5595     protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5596 }
5597
5598 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5599    so that they are emitted on the correct place, and so that the loop closed
5600    ssa form is preserved.  */
5601
5602 static void
5603 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5604 {
5605   tree_stmt_iterator tsi;
5606   block_stmt_iterator bsi;
5607   tree phi, stmt, def, next;
5608
5609   if (!single_pred_p (exit->dest))
5610     split_loop_exit_edge (exit);
5611
5612   /* Ensure there is label in exit->dest, so that we can
5613      insert after it.  */
5614   tree_block_label (exit->dest);
5615   bsi = bsi_after_labels (exit->dest);
5616
5617   if (TREE_CODE (stmts) == STATEMENT_LIST)
5618     {
5619       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5620         {
5621           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5622           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5623         }
5624     }
5625   else
5626     {
5627       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5628       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5629     }
5630
5631   if (!op)
5632     return;
5633
5634   for (phi = phi_nodes (exit->dest); phi; phi = next)
5635     {
5636       next = PHI_CHAIN (phi);
5637
5638       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5639         {
5640           def = PHI_RESULT (phi);
5641           remove_statement (phi, false);
5642           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5643                         def, op);
5644           SSA_NAME_DEF_STMT (def) = stmt;
5645           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5646         }
5647     }
5648 }
5649
5650 /* Rewrites the final value of USE (that is only needed outside of the loop)
5651    using candidate CAND.  */
5652
5653 static void
5654 rewrite_use_outer (struct ivopts_data *data,
5655                    struct iv_use *use, struct iv_cand *cand)
5656 {
5657   edge exit;
5658   tree value, op, stmts, tgt;
5659   tree phi;
5660
5661   switch (TREE_CODE (use->stmt))
5662     {
5663     case PHI_NODE:
5664       tgt = PHI_RESULT (use->stmt);
5665       break;
5666     case MODIFY_EXPR:
5667       tgt = TREE_OPERAND (use->stmt, 0);
5668       break;
5669     default:
5670       gcc_unreachable ();
5671     }
5672
5673   exit = single_dom_exit (data->current_loop);
5674
5675   if (exit)
5676     {
5677       if (!cand->iv)
5678         {
5679           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5680           value = cp->value;
5681         }
5682       else
5683         value = get_computation_at (data->current_loop,
5684                                     use, cand, last_stmt (exit->src));
5685
5686       value = unshare_expr (value);
5687       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5688
5689       /* If we will preserve the iv anyway and we would need to perform
5690          some computation to replace the final value, do nothing.  */
5691       if (stmts && name_info (data, tgt)->preserve_biv)
5692         return;
5693
5694       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5695         {
5696           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5697
5698           if (USE_FROM_PTR (use_p) == tgt)
5699             SET_USE (use_p, op);
5700         }
5701
5702       if (stmts)
5703         compute_phi_arg_on_exit (exit, stmts, op);
5704
5705       /* Enable removal of the statement.  We cannot remove it directly,
5706          since we may still need the aliasing information attached to the
5707          ssa name defined by it.  */
5708       name_info (data, tgt)->iv->have_use_for = false;
5709       return;
5710     }
5711
5712   /* If the variable is going to be preserved anyway, there is nothing to
5713      do.  */
5714   if (name_info (data, tgt)->preserve_biv)
5715     return;
5716
5717   /* Otherwise we just need to compute the iv.  */
5718   rewrite_use_nonlinear_expr (data, use, cand);
5719 }
5720
5721 /* Rewrites USE using candidate CAND.  */
5722
5723 static void
5724 rewrite_use (struct ivopts_data *data,
5725              struct iv_use *use, struct iv_cand *cand)
5726 {
5727   switch (use->type)
5728     {
5729       case USE_NONLINEAR_EXPR:
5730         rewrite_use_nonlinear_expr (data, use, cand);
5731         break;
5732
5733       case USE_OUTER:
5734         rewrite_use_outer (data, use, cand);
5735         break;
5736
5737       case USE_ADDRESS:
5738         rewrite_use_address (data, use, cand);
5739         break;
5740
5741       case USE_COMPARE:
5742         rewrite_use_compare (data, use, cand);
5743         break;
5744
5745       default:
5746         gcc_unreachable ();
5747     }
5748   update_stmt (use->stmt);
5749 }
5750
5751 /* Rewrite the uses using the selected induction variables.  */
5752
5753 static void
5754 rewrite_uses (struct ivopts_data *data)
5755 {
5756   unsigned i;
5757   struct iv_cand *cand;
5758   struct iv_use *use;
5759
5760   for (i = 0; i < n_iv_uses (data); i++)
5761     {
5762       use = iv_use (data, i);
5763       cand = use->selected;
5764       gcc_assert (cand);
5765
5766       rewrite_use (data, use, cand);
5767     }
5768 }
5769
5770 /* Removes the ivs that are not used after rewriting.  */
5771
5772 static void
5773 remove_unused_ivs (struct ivopts_data *data)
5774 {
5775   unsigned j;
5776   bitmap_iterator bi;
5777
5778   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5779     {
5780       struct version_info *info;
5781
5782       info = ver_info (data, j);
5783       if (info->iv
5784           && !zero_p (info->iv->step)
5785           && !info->inv_id
5786           && !info->iv->have_use_for
5787           && !info->preserve_biv)
5788         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5789     }
5790 }
5791
5792 /* Frees data allocated by the optimization of a single loop.  */
5793
5794 static void
5795 free_loop_data (struct ivopts_data *data)
5796 {
5797   unsigned i, j;
5798   bitmap_iterator bi;
5799   tree obj;
5800
5801   htab_empty (data->niters);
5802
5803   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5804     {
5805       struct version_info *info;
5806
5807       info = ver_info (data, i);
5808       if (info->iv)
5809         free (info->iv);
5810       info->iv = NULL;
5811       info->has_nonlin_use = false;
5812       info->preserve_biv = false;
5813       info->inv_id = 0;
5814     }
5815   bitmap_clear (data->relevant);
5816   bitmap_clear (data->important_candidates);
5817
5818   for (i = 0; i < n_iv_uses (data); i++)
5819     {
5820       struct iv_use *use = iv_use (data, i);
5821
5822       free (use->iv);
5823       BITMAP_FREE (use->related_cands);
5824       for (j = 0; j < use->n_map_members; j++)
5825         if (use->cost_map[j].depends_on)
5826           BITMAP_FREE (use->cost_map[j].depends_on);
5827       free (use->cost_map);
5828       free (use);
5829     }
5830   VEC_truncate (iv_use_p, data->iv_uses, 0);
5831
5832   for (i = 0; i < n_iv_cands (data); i++)
5833     {
5834       struct iv_cand *cand = iv_cand (data, i);
5835
5836       if (cand->iv)
5837         free (cand->iv);
5838       if (cand->depends_on)
5839         BITMAP_FREE (cand->depends_on);
5840       free (cand);
5841     }
5842   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5843
5844   if (data->version_info_size < num_ssa_names)
5845     {
5846       data->version_info_size = 2 * num_ssa_names;
5847       free (data->version_info);
5848       data->version_info = xcalloc (data->version_info_size,
5849                                     sizeof (struct version_info));
5850     }
5851
5852   data->max_inv_id = 0;
5853
5854   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5855     SET_DECL_RTL (obj, NULL_RTX);
5856
5857   VEC_truncate (tree, decl_rtl_to_reset, 0);
5858 }
5859
5860 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5861    loop tree.  */
5862
5863 static void
5864 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5865 {
5866   unsigned i;
5867
5868   for (i = 1; i < loops->num; i++)
5869     if (loops->parray[i])
5870       {
5871         free (loops->parray[i]->aux);
5872         loops->parray[i]->aux = NULL;
5873       }
5874
5875   free_loop_data (data);
5876   free (data->version_info);
5877   BITMAP_FREE (data->relevant);
5878   BITMAP_FREE (data->important_candidates);
5879   htab_delete (data->niters);
5880
5881   VEC_free (tree, heap, decl_rtl_to_reset);
5882   VEC_free (iv_use_p, heap, data->iv_uses);
5883   VEC_free (iv_cand_p, heap, data->iv_candidates);
5884 }
5885
5886 /* Optimizes the LOOP.  Returns true if anything changed.  */
5887
5888 static bool
5889 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5890 {
5891   bool changed = false;
5892   struct iv_ca *iv_ca;
5893   edge exit;
5894
5895   data->current_loop = loop;
5896
5897   if (dump_file && (dump_flags & TDF_DETAILS))
5898     {
5899       fprintf (dump_file, "Processing loop %d\n", loop->num);
5900
5901       exit = single_dom_exit (loop);
5902       if (exit)
5903         {
5904           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5905                    exit->src->index, exit->dest->index);
5906           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5907           fprintf (dump_file, "\n");
5908         }
5909
5910       fprintf (dump_file, "\n");
5911     }
5912
5913   /* For each ssa name determines whether it behaves as an induction variable
5914      in some loop.  */
5915   if (!find_induction_variables (data))
5916     goto finish;
5917
5918   /* Finds interesting uses (item 1).  */
5919   find_interesting_uses (data);
5920   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5921     goto finish;
5922
5923   /* Finds candidates for the induction variables (item 2).  */
5924   find_iv_candidates (data);
5925
5926   /* Calculates the costs (item 3, part 1).  */
5927   determine_use_iv_costs (data);
5928   determine_iv_costs (data);
5929   determine_set_costs (data);
5930
5931   /* Find the optimal set of induction variables (item 3, part 2).  */
5932   iv_ca = find_optimal_iv_set (data);
5933   if (!iv_ca)
5934     goto finish;
5935   changed = true;
5936
5937   /* Create the new induction variables (item 4, part 1).  */
5938   create_new_ivs (data, iv_ca);
5939   iv_ca_free (&iv_ca);
5940
5941   /* Rewrite the uses (item 4, part 2).  */
5942   rewrite_uses (data);
5943
5944   /* Remove the ivs that are unused after rewriting.  */
5945   remove_unused_ivs (data);
5946
5947   /* We have changed the structure of induction variables; it might happen
5948      that definitions in the scev database refer to some of them that were
5949      eliminated.  */
5950   scev_reset ();
5951
5952 finish:
5953   free_loop_data (data);
5954
5955   return changed;
5956 }
5957
5958 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5959
5960 void
5961 tree_ssa_iv_optimize (struct loops *loops)
5962 {
5963   struct loop *loop;
5964   struct ivopts_data data;
5965
5966   tree_ssa_iv_optimize_init (loops, &data);
5967
5968   /* Optimize the loops starting with the innermost ones.  */
5969   loop = loops->tree_root;
5970   while (loop->inner)
5971     loop = loop->inner;
5972
5973   /* Scan the loops, inner ones first.  */
5974   while (loop != loops->tree_root)
5975     {
5976       if (dump_file && (dump_flags & TDF_DETAILS))
5977         flow_loop_dump (loop, dump_file, NULL, 1);
5978
5979       tree_ssa_iv_optimize_loop (&data, loop);
5980
5981       if (loop->next)
5982         {
5983           loop = loop->next;
5984           while (loop->inner)
5985             loop = loop->inner;
5986         }
5987       else
5988         loop = loop->outer;
5989     }
5990
5991   /* FIXME.  IV opts introduces new aliases and call-clobbered
5992      variables, which need to be renamed.  However, when we call the
5993      renamer, not all statements will be scanned for operands.  In
5994      particular, the newly introduced aliases may appear in statements
5995      that are considered "unmodified", so the renamer will not get a
5996      chance to rename those operands.
5997
5998      Work around this problem by forcing an operand re-scan on every
5999      statement.  This will not be necessary once the new operand
6000      scanner is implemented.  */
6001   if (need_ssa_update_p ())
6002     {
6003       basic_block bb;
6004       block_stmt_iterator si;
6005       FOR_EACH_BB (bb)
6006         for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
6007           update_stmt (bsi_stmt (si));
6008     }
6009
6010   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
6011   tree_ssa_iv_optimize_finalize (loops, &data);
6012 }