gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 static edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter);
 716       *slot = nfe_desc;
 717     }
 718   else
 719     nfe_desc = *slot;
 720
 721   if (!nfe_desc->valid_p)
 722     return NULL;
 723
 724   return &nfe_desc->niter;
 725 }
 726
 727 /* Returns structure describing number of iterations determined from
 728    single dominating exit of DATA->current_loop, or NULL if something
 729    goes wrong.  */
 730
 731 static struct tree_niter_desc *
 732 niter_for_single_dom_exit (struct ivopts_data *data)
 733 {
 734   edge exit = single_dom_exit (data->current_loop);
 735
 736   if (!exit)
 737     return NULL;
 738
 739   return niter_for_exit (data, exit);
 740 }
 741
 742 /* Initializes data structures used by the iv optimization pass, stored
 743    in DATA.  LOOPS is the loop tree.  */
 744
 745 static void
 746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 747 {
 748   unsigned i;
 749
 750   data->version_info_size = 2 * num_ssa_names;
 751   data->version_info = xcalloc (data->version_info_size,
 752                                 sizeof (struct version_info));
 753   data->relevant = BITMAP_ALLOC (NULL);
 754   data->important_candidates = BITMAP_ALLOC (NULL);
 755   data->max_inv_id = 0;
 756   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 757
 758   for (i = 1; i < loops->num; i++)
 759     if (loops->parray[i])
 760       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 761
 762   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 763   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 764   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 765 }
 766
 767 /* Returns a memory object to that EXPR points.  In case we are able to
 768    determine that it does not point to any such object, NULL is returned.  */
 769
 770 static tree
 771 determine_base_object (tree expr)
 772 {
 773   enum tree_code code = TREE_CODE (expr);
 774   tree base, obj, op0, op1;
 775
 776   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 777     return NULL_TREE;
 778
 779   switch (code)
 780     {
 781     case INTEGER_CST:
 782       return NULL_TREE;
 783
 784     case ADDR_EXPR:
 785       obj = TREE_OPERAND (expr, 0);
 786       base = get_base_address (obj);
 787
 788       if (!base)
 789         return expr;
 790
 791       if (TREE_CODE (base) == INDIRECT_REF)
 792         return determine_base_object (TREE_OPERAND (base, 0));
 793
 794       return fold_convert (ptr_type_node,
 795                            build_fold_addr_expr (base));
 796
 797     case PLUS_EXPR:
 798     case MINUS_EXPR:
 799       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 800       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 801
 802       if (!op1)
 803         return op0;
 804
 805       if (!op0)
 806         return (code == PLUS_EXPR
 807                 ? op1
 808                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 809
 810       return fold_build2 (code, ptr_type_node, op0, op1);
 811
 812     case NOP_EXPR:
 813     case CONVERT_EXPR:
 814       return determine_base_object (TREE_OPERAND (expr, 0));
 815
 816     default:
 817       return fold_convert (ptr_type_node, expr);
 818     }
 819 }
 820
 821 /* Allocates an induction variable with given initial value BASE and step STEP
 822    for loop LOOP.  */
 823
 824 static struct iv *
 825 alloc_iv (tree base, tree step)
 826 {
 827   struct iv *iv = xcalloc (1, sizeof (struct iv));
 828
 829   if (step && integer_zerop (step))
 830     step = NULL_TREE;
 831
 832   iv->base = base;
 833   iv->base_object = determine_base_object (base);
 834   iv->step = step;
 835   iv->biv_p = false;
 836   iv->have_use_for = false;
 837   iv->use_id = 0;
 838   iv->ssa_name = NULL_TREE;
 839
 840   return iv;
 841 }
 842
 843 /* Sets STEP and BASE for induction variable IV.  */
 844
 845 static void
 846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 847 {
 848   struct version_info *info = name_info (data, iv);
 849
 850   gcc_assert (!info->iv);
 851
 852   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 853   info->iv = alloc_iv (base, step);
 854   info->iv->ssa_name = iv;
 855 }
 856
 857 /* Finds induction variable declaration for VAR.  */
 858
 859 static struct iv *
 860 get_iv (struct ivopts_data *data, tree var)
 861 {
 862   basic_block bb;
 863
 864   if (!name_info (data, var)->iv)
 865     {
 866       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 867
 868       if (!bb
 869           || !flow_bb_inside_loop_p (data->current_loop, bb))
 870         set_iv (data, var, var, NULL_TREE);
 871     }
 872
 873   return name_info (data, var)->iv;
 874 }
 875
 876 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 877    not define a simple affine biv with nonzero step.  */
 878
 879 static tree
 880 determine_biv_step (tree phi)
 881 {
 882   struct loop *loop = bb_for_stmt (phi)->loop_father;
 883   tree name = PHI_RESULT (phi), base, step;
 884
 885   if (!is_gimple_reg (name))
 886     return NULL_TREE;
 887
 888   if (!simple_iv (loop, phi, name, &base, &step, true))
 889     return NULL_TREE;
 890
 891   if (zero_p (step))
 892     return NULL_TREE;
 893
 894   return step;
 895 }
 896
 897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 898
 899 static bool
 900 abnormal_ssa_name_p (tree exp)
 901 {
 902   if (!exp)
 903     return false;
 904
 905   if (TREE_CODE (exp) != SSA_NAME)
 906     return false;
 907
 908   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 909 }
 910
 911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 912    abnormal phi node.  Callback for for_each_index.  */
 913
 914 static bool
 915 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 916                                   void *data ATTRIBUTE_UNUSED)
 917 {
 918   if (TREE_CODE (base) == ARRAY_REF)
 919     {
 920       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 921         return false;
 922       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 923         return false;
 924     }
 925
 926   return !abnormal_ssa_name_p (*index);
 927 }
 928
 929 /* Returns true if EXPR contains a ssa name that occurs in an
 930    abnormal phi node.  */
 931
 932 static bool
 933 contains_abnormal_ssa_name_p (tree expr)
 934 {
 935   enum tree_code code;
 936   enum tree_code_class class;
 937
 938   if (!expr)
 939     return false;
 940
 941   code = TREE_CODE (expr);
 942   class = TREE_CODE_CLASS (code);
 943
 944   if (code == SSA_NAME)
 945     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 946
 947   if (code == INTEGER_CST
 948       || is_gimple_min_invariant (expr))
 949     return false;
 950
 951   if (code == ADDR_EXPR)
 952     return !for_each_index (&TREE_OPERAND (expr, 0),
 953                             idx_contains_abnormal_ssa_name_p,
 954                             NULL);
 955
 956   switch (class)
 957     {
 958     case tcc_binary:
 959     case tcc_comparison:
 960       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 961         return true;
 962
 963       /* Fallthru.  */
 964     case tcc_unary:
 965       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 966         return true;
 967
 968       break;
 969
 970     default:
 971       gcc_unreachable ();
 972     }
 973
 974   return false;
 975 }
 976
 977 /* Finds basic ivs.  */
 978
 979 static bool
 980 find_bivs (struct ivopts_data *data)
 981 {
 982   tree phi, step, type, base;
 983   bool found = false;
 984   struct loop *loop = data->current_loop;
 985
 986   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 987     {
 988       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 989         continue;
 990
 991       step = determine_biv_step (phi);
 992       if (!step)
 993         continue;
 994
 995       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 996       base = expand_simple_operations (base);
 997       if (contains_abnormal_ssa_name_p (base)
 998           || contains_abnormal_ssa_name_p (step))
 999         continue;
1000
1001       type = TREE_TYPE (PHI_RESULT (phi));
1002       base = fold_convert (type, base);
1003       if (step)
1004         step = fold_convert (type, step);
1005
1006       set_iv (data, PHI_RESULT (phi), base, step);
1007       found = true;
1008     }
1009
1010   return found;
1011 }
1012
1013 /* Marks basic ivs.  */
1014
1015 static void
1016 mark_bivs (struct ivopts_data *data)
1017 {
1018   tree phi, var;
1019   struct iv *iv, *incr_iv;
1020   struct loop *loop = data->current_loop;
1021   basic_block incr_bb;
1022
1023   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1024     {
1025       iv = get_iv (data, PHI_RESULT (phi));
1026       if (!iv)
1027         continue;
1028
1029       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030       incr_iv = get_iv (data, var);
1031       if (!incr_iv)
1032         continue;
1033
1034       /* If the increment is in the subloop, ignore it.  */
1035       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036       if (incr_bb->loop_father != data->current_loop
1037           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1038         continue;
1039
1040       iv->biv_p = true;
1041       incr_iv->biv_p = true;
1042     }
1043 }
1044
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046    parameters to BASE and STEP.  */
1047
1048 static bool
1049 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050                         tree *base, tree *step)
1051 {
1052   tree lhs;
1053   struct loop *loop = data->current_loop;
1054
1055   *base = NULL_TREE;
1056   *step = NULL_TREE;
1057
1058   if (TREE_CODE (stmt) != MODIFY_EXPR)
1059     return false;
1060
1061   lhs = TREE_OPERAND (stmt, 0);
1062   if (TREE_CODE (lhs) != SSA_NAME)
1063     return false;
1064
1065   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066     return false;
1067   *base = expand_simple_operations (*base);
1068
1069   if (contains_abnormal_ssa_name_p (*base)
1070       || contains_abnormal_ssa_name_p (*step))
1071     return false;
1072
1073   return true;
1074 }
1075
1076 /* Finds general ivs in statement STMT.  */
1077
1078 static void
1079 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1080 {
1081   tree base, step;
1082
1083   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1084     return;
1085
1086   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1087 }
1088
1089 /* Finds general ivs in basic block BB.  */
1090
1091 static void
1092 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1093 {
1094   block_stmt_iterator bsi;
1095
1096   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097     find_givs_in_stmt (data, bsi_stmt (bsi));
1098 }
1099
1100 /* Finds general ivs.  */
1101
1102 static void
1103 find_givs (struct ivopts_data *data)
1104 {
1105   struct loop *loop = data->current_loop;
1106   basic_block *body = get_loop_body_in_dom_order (loop);
1107   unsigned i;
1108
1109   for (i = 0; i < loop->num_nodes; i++)
1110     find_givs_in_bb (data, body[i]);
1111   free (body);
1112 }
1113
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115    variable and if so, its initial value and step.  */
1116
1117 static bool
1118 find_induction_variables (struct ivopts_data *data)
1119 {
1120   unsigned i;
1121   bitmap_iterator bi;
1122
1123   if (!find_bivs (data))
1124     return false;
1125
1126   find_givs (data);
1127   mark_bivs (data);
1128
1129   if (dump_file && (dump_flags & TDF_DETAILS))
1130     {
1131       struct tree_niter_desc *niter;
1132
1133       niter = niter_for_single_dom_exit (data);
1134
1135       if (niter)
1136         {
1137           fprintf (dump_file, "  number of iterations ");
1138           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1139           fprintf (dump_file, "\n");
1140
1141           fprintf (dump_file, "  may be zero if ");
1142           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1143           fprintf (dump_file, "\n");
1144           fprintf (dump_file, "\n");
1145         };
1146
1147       fprintf (dump_file, "Induction variables:\n\n");
1148
1149       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1150         {
1151           if (ver_info (data, i)->iv)
1152             dump_iv (dump_file, ver_info (data, i)->iv);
1153         }
1154     }
1155
1156   return true;
1157 }
1158
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1160
1161 static struct iv_use *
1162 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163             tree stmt, enum use_type use_type)
1164 {
1165   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1166
1167   use->id = n_iv_uses (data);
1168   use->type = use_type;
1169   use->iv = iv;
1170   use->stmt = stmt;
1171   use->op_p = use_p;
1172   use->related_cands = BITMAP_ALLOC (NULL);
1173
1174   /* To avoid showing ssa name in the dumps, if it was not reset by the
1175      caller.  */
1176   iv->ssa_name = NULL_TREE;
1177
1178   if (dump_file && (dump_flags & TDF_DETAILS))
1179     dump_use (dump_file, use);
1180
1181   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1182
1183   return use;
1184 }
1185
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187    NONLINEAR_USE is true if the invariant is used in a way we do not
1188    handle specially.  */
1189
1190 static void
1191 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1192 {
1193   basic_block bb;
1194   struct version_info *info;
1195
1196   if (TREE_CODE (op) != SSA_NAME
1197       || !is_gimple_reg (op))
1198     return;
1199
1200   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201   if (bb
1202       && flow_bb_inside_loop_p (data->current_loop, bb))
1203     return;
1204
1205   info = name_info (data, op);
1206   info->name = op;
1207   info->has_nonlin_use |= nonlinear_use;
1208   if (!info->inv_id)
1209     info->inv_id = ++data->max_inv_id;
1210   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1211 }
1212
1213 /* Checks whether the use OP is interesting and if so, records it
1214    as TYPE.  */
1215
1216 static struct iv_use *
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1218                                        enum use_type type)
1219 {
1220   struct iv *iv;
1221   struct iv *civ;
1222   tree stmt;
1223   struct iv_use *use;
1224
1225   if (TREE_CODE (op) != SSA_NAME)
1226     return NULL;
1227
1228   iv = get_iv (data, op);
1229   if (!iv)
1230     return NULL;
1231
1232   if (iv->have_use_for)
1233     {
1234       use = iv_use (data, iv->use_id);
1235
1236       gcc_assert (use->type == USE_NONLINEAR_EXPR
1237                   || use->type == USE_OUTER);
1238
1239       if (type == USE_NONLINEAR_EXPR)
1240         use->type = USE_NONLINEAR_EXPR;
1241       return use;
1242     }
1243
1244   if (zero_p (iv->step))
1245     {
1246       record_invariant (data, op, true);
1247       return NULL;
1248     }
1249   iv->have_use_for = true;
1250
1251   civ = xmalloc (sizeof (struct iv));
1252   *civ = *iv;
1253
1254   stmt = SSA_NAME_DEF_STMT (op);
1255   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256               || TREE_CODE (stmt) == MODIFY_EXPR);
1257
1258   use = record_use (data, NULL, civ, stmt, type);
1259   iv->use_id = use->id;
1260
1261   return use;
1262 }
1263
1264 /* Checks whether the use OP is interesting and if so, records it.  */
1265
1266 static struct iv_use *
1267 find_interesting_uses_op (struct ivopts_data *data, tree op)
1268 {
1269   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1270 }
1271
1272 /* Records a definition of induction variable OP that is used outside of the
1273    loop.  */
1274
1275 static struct iv_use *
1276 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1277 {
1278   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1279 }
1280
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282    and if so, records it.  */
1283
1284 static void
1285 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1286 {
1287   tree *op0_p;
1288   tree *op1_p;
1289   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290   struct iv const_iv;
1291   tree zero = integer_zero_node;
1292
1293   const_iv.step = NULL_TREE;
1294
1295   if (TREE_CODE (*cond_p) != SSA_NAME
1296       && !COMPARISON_CLASS_P (*cond_p))
1297     return;
1298
1299   if (TREE_CODE (*cond_p) == SSA_NAME)
1300     {
1301       op0_p = cond_p;
1302       op1_p = &zero;
1303     }
1304   else
1305     {
1306       op0_p = &TREE_OPERAND (*cond_p, 0);
1307       op1_p = &TREE_OPERAND (*cond_p, 1);
1308     }
1309
1310   if (TREE_CODE (*op0_p) == SSA_NAME)
1311     iv0 = get_iv (data, *op0_p);
1312   else
1313     iv0 = &const_iv;
1314
1315   if (TREE_CODE (*op1_p) == SSA_NAME)
1316     iv1 = get_iv (data, *op1_p);
1317   else
1318     iv1 = &const_iv;
1319
1320   if (/* When comparing with non-invariant value, we may not do any senseful
1321          induction variable elimination.  */
1322       (!iv0 || !iv1)
1323       /* Eliminating condition based on two ivs would be nontrivial.
1324          ??? TODO -- it is not really important to handle this case.  */
1325       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1326     {
1327       find_interesting_uses_op (data, *op0_p);
1328       find_interesting_uses_op (data, *op1_p);
1329       return;
1330     }
1331
1332   if (zero_p (iv0->step) && zero_p (iv1->step))
1333     {
1334       /* If both are invariants, this is a work for unswitching.  */
1335       return;
1336     }
1337
1338   civ = xmalloc (sizeof (struct iv));
1339   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1341 }
1342
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344    i.e. if all its operands are defined outside of the LOOP.  */
1345
1346 bool
1347 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1348 {
1349   basic_block def_bb;
1350   unsigned i, len;
1351
1352   if (is_gimple_min_invariant (expr))
1353     return true;
1354
1355   if (TREE_CODE (expr) == SSA_NAME)
1356     {
1357       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358       if (def_bb
1359           && flow_bb_inside_loop_p (loop, def_bb))
1360         return false;
1361
1362       return true;
1363     }
1364
1365   if (!EXPR_P (expr))
1366     return false;
1367
1368   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1369   for (i = 0; i < len; i++)
1370     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1371       return false;
1372
1373   return true;
1374 }
1375
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377    initial ones.  Returns false when the value of the index cannot be determined.
1378    Callback for for_each_index.  */
1379
1380 struct ifs_ivopts_data
1381 {
1382   struct ivopts_data *ivopts_data;
1383   tree stmt;
1384   tree *step_p;
1385 };
1386
1387 static bool
1388 idx_find_step (tree base, tree *idx, void *data)
1389 {
1390   struct ifs_ivopts_data *dta = data;
1391   struct iv *iv;
1392   tree step, iv_step, lbound, off;
1393   struct loop *loop = dta->ivopts_data->current_loop;
1394
1395   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1397     return false;
1398
1399   /* If base is a component ref, require that the offset of the reference
1400      be invariant.  */
1401   if (TREE_CODE (base) == COMPONENT_REF)
1402     {
1403       off = component_ref_field_offset (base);
1404       return expr_invariant_in_loop_p (loop, off);
1405     }
1406
1407   /* If base is array, first check whether we will be able to move the
1408      reference out of the loop (in order to take its address in strength
1409      reduction).  In order for this to work we need both lower bound
1410      and step to be loop invariants.  */
1411   if (TREE_CODE (base) == ARRAY_REF)
1412     {
1413       step = array_ref_element_size (base);
1414       lbound = array_ref_low_bound (base);
1415
1416       if (!expr_invariant_in_loop_p (loop, step)
1417           || !expr_invariant_in_loop_p (loop, lbound))
1418         return false;
1419     }
1420
1421   if (TREE_CODE (*idx) != SSA_NAME)
1422     return true;
1423
1424   iv = get_iv (dta->ivopts_data, *idx);
1425   if (!iv)
1426     return false;
1427
1428   *idx = iv->base;
1429
1430   if (!iv->step)
1431     return true;
1432
1433   if (TREE_CODE (base) == ARRAY_REF)
1434     {
1435       step = array_ref_element_size (base);
1436
1437       /* We only handle addresses whose step is an integer constant.  */
1438       if (TREE_CODE (step) != INTEGER_CST)
1439         return false;
1440     }
1441   else
1442     /* The step for pointer arithmetics already is 1 byte.  */
1443     step = build_int_cst (sizetype, 1);
1444
1445   iv_step = convert_step (dta->ivopts_data->current_loop,
1446                           sizetype, iv->base, iv->step, dta->stmt);
1447
1448   if (!iv_step)
1449     {
1450       /* The index might wrap.  */
1451       return false;
1452     }
1453
1454   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1455
1456   if (!*dta->step_p)
1457     *dta->step_p = step;
1458   else
1459     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1460
1461   return true;
1462 }
1463
1464 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1465    object is passed to it in DATA.  */
1466
1467 static bool
1468 idx_record_use (tree base, tree *idx,
1469                 void *data)
1470 {
1471   find_interesting_uses_op (data, *idx);
1472   if (TREE_CODE (base) == ARRAY_REF)
1473     {
1474       find_interesting_uses_op (data, array_ref_element_size (base));
1475       find_interesting_uses_op (data, array_ref_low_bound (base));
1476     }
1477   return true;
1478 }
1479
1480 /* Returns true if memory reference REF may be unaligned.  */
1481
1482 static bool
1483 may_be_unaligned_p (tree ref)
1484 {
1485   tree base;
1486   tree base_type;
1487   HOST_WIDE_INT bitsize;
1488   HOST_WIDE_INT bitpos;
1489   tree toffset;
1490   enum machine_mode mode;
1491   int unsignedp, volatilep;
1492   unsigned base_align;
1493
1494   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1495      thus they are not misaligned.  */
1496   if (TREE_CODE (ref) == TARGET_MEM_REF)
1497     return false;
1498
1499   /* The test below is basically copy of what expr.c:normal_inner_ref
1500      does to check whether the object must be loaded by parts when
1501      STRICT_ALIGNMENT is true.  */
1502   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1503                               &unsignedp, &volatilep, true);
1504   base_type = TREE_TYPE (base);
1505   base_align = TYPE_ALIGN (base_type);
1506
1507   if (mode != BLKmode
1508       && (base_align < GET_MODE_ALIGNMENT (mode)
1509           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1510           || bitpos % BITS_PER_UNIT != 0))
1511     return true;
1512
1513   return false;
1514 }
1515
1516 /* Finds addresses in *OP_P inside STMT.  */
1517
1518 static void
1519 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1520 {
1521   tree base = *op_p, step = NULL;
1522   struct iv *civ;
1523   struct ifs_ivopts_data ifs_ivopts_data;
1524
1525   /* Do not play with volatile memory references.  A bit too conservative,
1526      perhaps, but safe.  */
1527   if (stmt_ann (stmt)->has_volatile_ops)
1528     goto fail;
1529
1530   /* Ignore bitfields for now.  Not really something terribly complicated
1531      to handle.  TODO.  */
1532   if (TREE_CODE (base) == COMPONENT_REF
1533       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1534     goto fail;
1535
1536   if (STRICT_ALIGNMENT
1537       && may_be_unaligned_p (base))
1538     goto fail;
1539
1540   base = unshare_expr (base);
1541
1542   if (TREE_CODE (base) == TARGET_MEM_REF)
1543     {
1544       tree type = build_pointer_type (TREE_TYPE (base));
1545       tree astep;
1546
1547       if (TMR_BASE (base)
1548           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1549         {
1550           civ = get_iv (data, TMR_BASE (base));
1551           if (!civ)
1552             goto fail;
1553
1554           TMR_BASE (base) = civ->base;
1555           step = civ->step;
1556         }
1557       if (TMR_INDEX (base)
1558           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1559         {
1560           civ = get_iv (data, TMR_INDEX (base));
1561           if (!civ)
1562             goto fail;
1563
1564           TMR_INDEX (base) = civ->base;
1565           astep = civ->step;
1566
1567           if (astep)
1568             {
1569               if (TMR_STEP (base))
1570                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1571
1572               if (step)
1573                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1574               else
1575                 step = astep;
1576             }
1577         }
1578
1579       if (zero_p (step))
1580         goto fail;
1581       base = tree_mem_ref_addr (type, base);
1582     }
1583   else
1584     {
1585       ifs_ivopts_data.ivopts_data = data;
1586       ifs_ivopts_data.stmt = stmt;
1587       ifs_ivopts_data.step_p = &step;
1588       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1589           || zero_p (step))
1590         goto fail;
1591
1592       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1593       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1594
1595       base = build_fold_addr_expr (base);
1596     }
1597
1598   civ = alloc_iv (base, step);
1599   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1600   return;
1601
1602 fail:
1603   for_each_index (op_p, idx_record_use, data);
1604 }
1605
1606 /* Finds and records invariants used in STMT.  */
1607
1608 static void
1609 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1610 {
1611   ssa_op_iter iter;
1612   use_operand_p use_p;
1613   tree op;
1614
1615   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1616     {
1617       op = USE_FROM_PTR (use_p);
1618       record_invariant (data, op, false);
1619     }
1620 }
1621
1622 /* Finds interesting uses of induction variables in the statement STMT.  */
1623
1624 static void
1625 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1626 {
1627   struct iv *iv;
1628   tree op, lhs, rhs;
1629   ssa_op_iter iter;
1630   use_operand_p use_p;
1631
1632   find_invariants_stmt (data, stmt);
1633
1634   if (TREE_CODE (stmt) == COND_EXPR)
1635     {
1636       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1637       return;
1638     }
1639
1640   if (TREE_CODE (stmt) == MODIFY_EXPR)
1641     {
1642       lhs = TREE_OPERAND (stmt, 0);
1643       rhs = TREE_OPERAND (stmt, 1);
1644
1645       if (TREE_CODE (lhs) == SSA_NAME)
1646         {
1647           /* If the statement defines an induction variable, the uses are not
1648              interesting by themselves.  */
1649
1650           iv = get_iv (data, lhs);
1651
1652           if (iv && !zero_p (iv->step))
1653             return;
1654         }
1655
1656       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1657         {
1658         case tcc_comparison:
1659           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1660           return;
1661
1662         case tcc_reference:
1663           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1664           if (REFERENCE_CLASS_P (lhs))
1665             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1666           return;
1667
1668         default: ;
1669         }
1670
1671       if (REFERENCE_CLASS_P (lhs)
1672           && is_gimple_val (rhs))
1673         {
1674           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1675           find_interesting_uses_op (data, rhs);
1676           return;
1677         }
1678
1679       /* TODO -- we should also handle address uses of type
1680
1681          memory = call (whatever);
1682
1683          and
1684
1685          call (memory).  */
1686     }
1687
1688   if (TREE_CODE (stmt) == PHI_NODE
1689       && bb_for_stmt (stmt) == data->current_loop->header)
1690     {
1691       lhs = PHI_RESULT (stmt);
1692       iv = get_iv (data, lhs);
1693
1694       if (iv && !zero_p (iv->step))
1695         return;
1696     }
1697
1698   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1699     {
1700       op = USE_FROM_PTR (use_p);
1701
1702       if (TREE_CODE (op) != SSA_NAME)
1703         continue;
1704
1705       iv = get_iv (data, op);
1706       if (!iv)
1707         continue;
1708
1709       find_interesting_uses_op (data, op);
1710     }
1711 }
1712
1713 /* Finds interesting uses of induction variables outside of loops
1714    on loop exit edge EXIT.  */
1715
1716 static void
1717 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1718 {
1719   tree phi, def;
1720
1721   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1722     {
1723       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1724       find_interesting_uses_outer (data, def);
1725     }
1726 }
1727
1728 /* Finds uses of the induction variables that are interesting.  */
1729
1730 static void
1731 find_interesting_uses (struct ivopts_data *data)
1732 {
1733   basic_block bb;
1734   block_stmt_iterator bsi;
1735   tree phi;
1736   basic_block *body = get_loop_body (data->current_loop);
1737   unsigned i;
1738   struct version_info *info;
1739   edge e;
1740
1741   if (dump_file && (dump_flags & TDF_DETAILS))
1742     fprintf (dump_file, "Uses:\n\n");
1743
1744   for (i = 0; i < data->current_loop->num_nodes; i++)
1745     {
1746       edge_iterator ei;
1747       bb = body[i];
1748
1749       FOR_EACH_EDGE (e, ei, bb->succs)
1750         if (e->dest != EXIT_BLOCK_PTR
1751             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1752           find_interesting_uses_outside (data, e);
1753
1754       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1755         find_interesting_uses_stmt (data, phi);
1756       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1757         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1758     }
1759
1760   if (dump_file && (dump_flags & TDF_DETAILS))
1761     {
1762       bitmap_iterator bi;
1763
1764       fprintf (dump_file, "\n");
1765
1766       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1767         {
1768           info = ver_info (data, i);
1769           if (info->inv_id)
1770             {
1771               fprintf (dump_file, "  ");
1772               print_generic_expr (dump_file, info->name, TDF_SLIM);
1773               fprintf (dump_file, " is invariant (%d)%s\n",
1774                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1775             }
1776         }
1777
1778       fprintf (dump_file, "\n");
1779     }
1780
1781   free (body);
1782 }
1783
1784 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1785    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1786    we are at the top-level of the processed address.  */
1787
1788 static tree
1789 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1790                 unsigned HOST_WIDE_INT *offset)
1791 {
1792   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1793   enum tree_code code;
1794   tree type, orig_type = TREE_TYPE (expr);
1795   unsigned HOST_WIDE_INT off0, off1, st;
1796   tree orig_expr = expr;
1797
1798   STRIP_NOPS (expr);
1799
1800   type = TREE_TYPE (expr);
1801   code = TREE_CODE (expr);
1802   *offset = 0;
1803
1804   switch (code)
1805     {
1806     case INTEGER_CST:
1807       if (!cst_and_fits_in_hwi (expr)
1808           || zero_p (expr))
1809         return orig_expr;
1810
1811       *offset = int_cst_value (expr);
1812       return build_int_cst_type (orig_type, 0);
1813
1814     case PLUS_EXPR:
1815     case MINUS_EXPR:
1816       op0 = TREE_OPERAND (expr, 0);
1817       op1 = TREE_OPERAND (expr, 1);
1818
1819       op0 = strip_offset_1 (op0, false, false, &off0);
1820       op1 = strip_offset_1 (op1, false, false, &off1);
1821
1822       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1823       if (op0 == TREE_OPERAND (expr, 0)
1824           && op1 == TREE_OPERAND (expr, 1))
1825         return orig_expr;
1826
1827       if (zero_p (op1))
1828         expr = op0;
1829       else if (zero_p (op0))
1830         {
1831           if (code == PLUS_EXPR)
1832             expr = op1;
1833           else
1834             expr = fold_build1 (NEGATE_EXPR, type, op1);
1835         }
1836       else
1837         expr = fold_build2 (code, type, op0, op1);
1838
1839       return fold_convert (orig_type, expr);
1840
1841     case ARRAY_REF:
1842       if (!inside_addr)
1843         return orig_expr;
1844
1845       step = array_ref_element_size (expr);
1846       if (!cst_and_fits_in_hwi (step))
1847         break;
1848
1849       st = int_cst_value (step);
1850       op1 = TREE_OPERAND (expr, 1);
1851       op1 = strip_offset_1 (op1, false, false, &off1);
1852       *offset = off1 * st;
1853
1854       if (top_compref
1855           && zero_p (op1))
1856         {
1857           /* Strip the component reference completely.  */
1858           op0 = TREE_OPERAND (expr, 0);
1859           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1860           *offset += off0;
1861           return op0;
1862         }
1863       break;
1864
1865     case COMPONENT_REF:
1866       if (!inside_addr)
1867         return orig_expr;
1868
1869       tmp = component_ref_field_offset (expr);
1870       if (top_compref
1871           && cst_and_fits_in_hwi (tmp))
1872         {
1873           /* Strip the component reference completely.  */
1874           op0 = TREE_OPERAND (expr, 0);
1875           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1876           *offset = off0 + int_cst_value (tmp);
1877           return op0;
1878         }
1879       break;
1880
1881     case ADDR_EXPR:
1882       op0 = TREE_OPERAND (expr, 0);
1883       op0 = strip_offset_1 (op0, true, true, &off0);
1884       *offset += off0;
1885
1886       if (op0 == TREE_OPERAND (expr, 0))
1887         return orig_expr;
1888
1889       expr = build_fold_addr_expr (op0);
1890       return fold_convert (orig_type, expr);
1891
1892     case INDIRECT_REF:
1893       inside_addr = false;
1894       break;
1895
1896     default:
1897       return orig_expr;
1898     }
1899
1900   /* Default handling of expressions for that we want to recurse into
1901      the first operand.  */
1902   op0 = TREE_OPERAND (expr, 0);
1903   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1904   *offset += off0;
1905
1906   if (op0 == TREE_OPERAND (expr, 0)
1907       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1908     return orig_expr;
1909
1910   expr = copy_node (expr);
1911   TREE_OPERAND (expr, 0) = op0;
1912   if (op1)
1913     TREE_OPERAND (expr, 1) = op1;
1914
1915   /* Inside address, we might strip the top level component references,
1916      thus changing type of the expression.  Handling of ADDR_EXPR
1917      will fix that.  */
1918   expr = fold_convert (orig_type, expr);
1919
1920   return expr;
1921 }
1922
1923 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1924
1925 static tree
1926 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1927 {
1928   return strip_offset_1 (expr, false, false, offset);
1929 }
1930
1931 /* Returns variant of TYPE that can be used as base for different uses.
1932    For integer types, we return unsigned variant of the type, which
1933    avoids problems with overflows.  For pointer types, we return void *.  */
1934
1935 static tree
1936 generic_type_for (tree type)
1937 {
1938   if (POINTER_TYPE_P (type))
1939     return ptr_type_node;
1940
1941   if (TYPE_UNSIGNED (type))
1942     return type;
1943
1944   return unsigned_type_for (type);
1945 }
1946
1947 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1948    the bitmap to that we should store it.  */
1949
1950 static struct ivopts_data *fd_ivopts_data;
1951 static tree
1952 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1953 {
1954   bitmap *depends_on = data;
1955   struct version_info *info;
1956
1957   if (TREE_CODE (*expr_p) != SSA_NAME)
1958     return NULL_TREE;
1959   info = name_info (fd_ivopts_data, *expr_p);
1960
1961   if (!info->inv_id || info->has_nonlin_use)
1962     return NULL_TREE;
1963
1964   if (!*depends_on)
1965     *depends_on = BITMAP_ALLOC (NULL);
1966   bitmap_set_bit (*depends_on, info->inv_id);
1967
1968   return NULL_TREE;
1969 }
1970
1971 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1972    position to POS.  If USE is not NULL, the candidate is set as related to
1973    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1974    replacement of the final value of the iv by a direct computation.  */
1975
1976 static struct iv_cand *
1977 add_candidate_1 (struct ivopts_data *data,
1978                  tree base, tree step, bool important, enum iv_position pos,
1979                  struct iv_use *use, tree incremented_at)
1980 {
1981   unsigned i;
1982   struct iv_cand *cand = NULL;
1983   tree type, orig_type;
1984
1985   if (base)
1986     {
1987       orig_type = TREE_TYPE (base);
1988       type = generic_type_for (orig_type);
1989       if (type != orig_type)
1990         {
1991           base = fold_convert (type, base);
1992           if (step)
1993             step = fold_convert (type, step);
1994         }
1995     }
1996
1997   for (i = 0; i < n_iv_cands (data); i++)
1998     {
1999       cand = iv_cand (data, i);
2000
2001       if (cand->pos != pos)
2002         continue;
2003
2004       if (cand->incremented_at != incremented_at)
2005         continue;
2006
2007       if (!cand->iv)
2008         {
2009           if (!base && !step)
2010             break;
2011
2012           continue;
2013         }
2014
2015       if (!base && !step)
2016         continue;
2017
2018       if (!operand_equal_p (base, cand->iv->base, 0))
2019         continue;
2020
2021       if (zero_p (cand->iv->step))
2022         {
2023           if (zero_p (step))
2024             break;
2025         }
2026       else
2027         {
2028           if (step && operand_equal_p (step, cand->iv->step, 0))
2029             break;
2030         }
2031     }
2032
2033   if (i == n_iv_cands (data))
2034     {
2035       cand = xcalloc (1, sizeof (struct iv_cand));
2036       cand->id = i;
2037
2038       if (!base && !step)
2039         cand->iv = NULL;
2040       else
2041         cand->iv = alloc_iv (base, step);
2042
2043       cand->pos = pos;
2044       if (pos != IP_ORIGINAL && cand->iv)
2045         {
2046           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2047           cand->var_after = cand->var_before;
2048         }
2049       cand->important = important;
2050       cand->incremented_at = incremented_at;
2051       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2052
2053       if (step
2054           && TREE_CODE (step) != INTEGER_CST)
2055         {
2056           fd_ivopts_data = data;
2057           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2058         }
2059
2060       if (dump_file && (dump_flags & TDF_DETAILS))
2061         dump_cand (dump_file, cand);
2062     }
2063
2064   if (important && !cand->important)
2065     {
2066       cand->important = true;
2067       if (dump_file && (dump_flags & TDF_DETAILS))
2068         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2069     }
2070
2071   if (use)
2072     {
2073       bitmap_set_bit (use->related_cands, i);
2074       if (dump_file && (dump_flags & TDF_DETAILS))
2075         fprintf (dump_file, "Candidate %d is related to use %d\n",
2076                  cand->id, use->id);
2077     }
2078
2079   return cand;
2080 }
2081
2082 /* Returns true if incrementing the induction variable at the end of the LOOP
2083    is allowed.
2084
2085    The purpose is to avoid splitting latch edge with a biv increment, thus
2086    creating a jump, possibly confusing other optimization passes and leaving
2087    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2088    is not available (so we do not have a better alternative), or if the latch
2089    edge is already nonempty.  */
2090
2091 static bool
2092 allow_ip_end_pos_p (struct loop *loop)
2093 {
2094   if (!ip_normal_pos (loop))
2095     return true;
2096
2097   if (!empty_block_p (ip_end_pos (loop)))
2098     return true;
2099
2100   return false;
2101 }
2102
2103 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2104    position to POS.  If USE is not NULL, the candidate is set as related to
2105    it.  The candidate computation is scheduled on all available positions.  */
2106
2107 static void
2108 add_candidate (struct ivopts_data *data,
2109                tree base, tree step, bool important, struct iv_use *use)
2110 {
2111   if (ip_normal_pos (data->current_loop))
2112     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2113   if (ip_end_pos (data->current_loop)
2114       && allow_ip_end_pos_p (data->current_loop))
2115     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2116 }
2117
2118 /* Add a standard "0 + 1 * iteration" iv candidate for a
2119    type with SIZE bits.  */
2120
2121 static void
2122 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2123                                      unsigned int size)
2124 {
2125   tree type = lang_hooks.types.type_for_size (size, true);
2126   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2127                  true, NULL);
2128 }
2129
2130 /* Adds standard iv candidates.  */
2131
2132 static void
2133 add_standard_iv_candidates (struct ivopts_data *data)
2134 {
2135   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2136
2137   /* The same for a double-integer type if it is still fast enough.  */
2138   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2139     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2140 }
2141
2142
2143 /* Adds candidates bases on the old induction variable IV.  */
2144
2145 static void
2146 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2147 {
2148   tree phi, def;
2149   struct iv_cand *cand;
2150
2151   add_candidate (data, iv->base, iv->step, true, NULL);
2152
2153   /* The same, but with initial value zero.  */
2154   add_candidate (data,
2155                  build_int_cst (TREE_TYPE (iv->base), 0),
2156                  iv->step, true, NULL);
2157
2158   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2159   if (TREE_CODE (phi) == PHI_NODE)
2160     {
2161       /* Additionally record the possibility of leaving the original iv
2162          untouched.  */
2163       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2164       cand = add_candidate_1 (data,
2165                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2166                               SSA_NAME_DEF_STMT (def));
2167       cand->var_before = iv->ssa_name;
2168       cand->var_after = def;
2169     }
2170 }
2171
2172 /* Adds candidates based on the old induction variables.  */
2173
2174 static void
2175 add_old_ivs_candidates (struct ivopts_data *data)
2176 {
2177   unsigned i;
2178   struct iv *iv;
2179   bitmap_iterator bi;
2180
2181   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2182     {
2183       iv = ver_info (data, i)->iv;
2184       if (iv && iv->biv_p && !zero_p (iv->step))
2185         add_old_iv_candidates (data, iv);
2186     }
2187 }
2188
2189 /* Adds candidates based on the value of the induction variable IV and USE.  */
2190
2191 static void
2192 add_iv_value_candidates (struct ivopts_data *data,
2193                          struct iv *iv, struct iv_use *use)
2194 {
2195   unsigned HOST_WIDE_INT offset;
2196   tree base;
2197
2198   add_candidate (data, iv->base, iv->step, false, use);
2199
2200   /* The same, but with initial value zero.  Make such variable important,
2201      since it is generic enough so that possibly many uses may be based
2202      on it.  */
2203   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2204                  iv->step, true, use);
2205
2206   /* Third, try removing the constant offset.  */
2207   base = strip_offset (iv->base, &offset);
2208   if (offset)
2209     add_candidate (data, base, iv->step, false, use);
2210 }
2211
2212 /* Possibly adds pseudocandidate for replacing the final value of USE by
2213    a direct computation.  */
2214
2215 static void
2216 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2217 {
2218   struct tree_niter_desc *niter;
2219
2220   /* We must know where we exit the loop and how many times does it roll.  */
2221   niter = niter_for_single_dom_exit (data);
2222   if (!niter
2223       || !zero_p (niter->may_be_zero))
2224     return;
2225
2226   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2227 }
2228
2229 /* Adds candidates based on the uses.  */
2230
2231 static void
2232 add_derived_ivs_candidates (struct ivopts_data *data)
2233 {
2234   unsigned i;
2235
2236   for (i = 0; i < n_iv_uses (data); i++)
2237     {
2238       struct iv_use *use = iv_use (data, i);
2239
2240       if (!use)
2241         continue;
2242
2243       switch (use->type)
2244         {
2245         case USE_NONLINEAR_EXPR:
2246         case USE_COMPARE:
2247         case USE_ADDRESS:
2248           /* Just add the ivs based on the value of the iv used here.  */
2249           add_iv_value_candidates (data, use->iv, use);
2250           break;
2251
2252         case USE_OUTER:
2253           add_iv_value_candidates (data, use->iv, use);
2254
2255           /* Additionally, add the pseudocandidate for the possibility to
2256              replace the final value by a direct computation.  */
2257           add_iv_outer_candidates (data, use);
2258           break;
2259
2260         default:
2261           gcc_unreachable ();
2262         }
2263     }
2264 }
2265
2266 /* Record important candidates and add them to related_cands bitmaps
2267    if needed.  */
2268
2269 static void
2270 record_important_candidates (struct ivopts_data *data)
2271 {
2272   unsigned i;
2273   struct iv_use *use;
2274
2275   for (i = 0; i < n_iv_cands (data); i++)
2276     {
2277       struct iv_cand *cand = iv_cand (data, i);
2278
2279       if (cand->important)
2280         bitmap_set_bit (data->important_candidates, i);
2281     }
2282
2283   data->consider_all_candidates = (n_iv_cands (data)
2284                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2285
2286   if (data->consider_all_candidates)
2287     {
2288       /* We will not need "related_cands" bitmaps in this case,
2289          so release them to decrease peak memory consumption.  */
2290       for (i = 0; i < n_iv_uses (data); i++)
2291         {
2292           use = iv_use (data, i);
2293           BITMAP_FREE (use->related_cands);
2294         }
2295     }
2296   else
2297     {
2298       /* Add important candidates to the related_cands bitmaps.  */
2299       for (i = 0; i < n_iv_uses (data); i++)
2300         bitmap_ior_into (iv_use (data, i)->related_cands,
2301                          data->important_candidates);
2302     }
2303 }
2304
2305 /* Finds the candidates for the induction variables.  */
2306
2307 static void
2308 find_iv_candidates (struct ivopts_data *data)
2309 {
2310   /* Add commonly used ivs.  */
2311   add_standard_iv_candidates (data);
2312
2313   /* Add old induction variables.  */
2314   add_old_ivs_candidates (data);
2315
2316   /* Add induction variables derived from uses.  */
2317   add_derived_ivs_candidates (data);
2318
2319   /* Record the important candidates.  */
2320   record_important_candidates (data);
2321 }
2322
2323 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2324    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2325    we allocate a simple list to every use.  */
2326
2327 static void
2328 alloc_use_cost_map (struct ivopts_data *data)
2329 {
2330   unsigned i, size, s, j;
2331
2332   for (i = 0; i < n_iv_uses (data); i++)
2333     {
2334       struct iv_use *use = iv_use (data, i);
2335       bitmap_iterator bi;
2336
2337       if (data->consider_all_candidates)
2338         size = n_iv_cands (data);
2339       else
2340         {
2341           s = 0;
2342           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2343             {
2344               s++;
2345             }
2346
2347           /* Round up to the power of two, so that moduling by it is fast.  */
2348           for (size = 1; size < s; size <<= 1)
2349             continue;
2350         }
2351
2352       use->n_map_members = size;
2353       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2354     }
2355 }
2356
2357 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2358    on invariants DEPENDS_ON and that the value used in expressing it
2359    is VALUE.*/
2360
2361 static void
2362 set_use_iv_cost (struct ivopts_data *data,
2363                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2364                  bitmap depends_on, tree value)
2365 {
2366   unsigned i, s;
2367
2368   if (cost == INFTY)
2369     {
2370       BITMAP_FREE (depends_on);
2371       return;
2372     }
2373
2374   if (data->consider_all_candidates)
2375     {
2376       use->cost_map[cand->id].cand = cand;
2377       use->cost_map[cand->id].cost = cost;
2378       use->cost_map[cand->id].depends_on = depends_on;
2379       use->cost_map[cand->id].value = value;
2380       return;
2381     }
2382
2383   /* n_map_members is a power of two, so this computes modulo.  */
2384   s = cand->id & (use->n_map_members - 1);
2385   for (i = s; i < use->n_map_members; i++)
2386     if (!use->cost_map[i].cand)
2387       goto found;
2388   for (i = 0; i < s; i++)
2389     if (!use->cost_map[i].cand)
2390       goto found;
2391
2392   gcc_unreachable ();
2393
2394 found:
2395   use->cost_map[i].cand = cand;
2396   use->cost_map[i].cost = cost;
2397   use->cost_map[i].depends_on = depends_on;
2398   use->cost_map[i].value = value;
2399 }
2400
2401 /* Gets cost of (USE, CANDIDATE) pair.  */
2402
2403 static struct cost_pair *
2404 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2405                  struct iv_cand *cand)
2406 {
2407   unsigned i, s;
2408   struct cost_pair *ret;
2409
2410   if (!cand)
2411     return NULL;
2412
2413   if (data->consider_all_candidates)
2414     {
2415       ret = use->cost_map + cand->id;
2416       if (!ret->cand)
2417         return NULL;
2418
2419       return ret;
2420     }
2421
2422   /* n_map_members is a power of two, so this computes modulo.  */
2423   s = cand->id & (use->n_map_members - 1);
2424   for (i = s; i < use->n_map_members; i++)
2425     if (use->cost_map[i].cand == cand)
2426       return use->cost_map + i;
2427
2428   for (i = 0; i < s; i++)
2429     if (use->cost_map[i].cand == cand)
2430       return use->cost_map + i;
2431
2432   return NULL;
2433 }
2434
2435 /* Returns estimate on cost of computing SEQ.  */
2436
2437 static unsigned
2438 seq_cost (rtx seq)
2439 {
2440   unsigned cost = 0;
2441   rtx set;
2442
2443   for (; seq; seq = NEXT_INSN (seq))
2444     {
2445       set = single_set (seq);
2446       if (set)
2447         cost += rtx_cost (set, SET);
2448       else
2449         cost++;
2450     }
2451
2452   return cost;
2453 }
2454
2455 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2456 static rtx
2457 produce_memory_decl_rtl (tree obj, int *regno)
2458 {
2459   rtx x;
2460
2461   gcc_assert (obj);
2462   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2463     {
2464       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2465       x = gen_rtx_SYMBOL_REF (Pmode, name);
2466     }
2467   else
2468     x = gen_raw_REG (Pmode, (*regno)++);
2469
2470   return gen_rtx_MEM (DECL_MODE (obj), x);
2471 }
2472
2473 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2474    walk_tree.  DATA contains the actual fake register number.  */
2475
2476 static tree
2477 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2478 {
2479   tree obj = NULL_TREE;
2480   rtx x = NULL_RTX;
2481   int *regno = data;
2482
2483   switch (TREE_CODE (*expr_p))
2484     {
2485     case ADDR_EXPR:
2486       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2487            handled_component_p (*expr_p);
2488            expr_p = &TREE_OPERAND (*expr_p, 0))
2489         continue;
2490       obj = *expr_p;
2491       if (DECL_P (obj))
2492         x = produce_memory_decl_rtl (obj, regno);
2493       break;
2494
2495     case SSA_NAME:
2496       *ws = 0;
2497       obj = SSA_NAME_VAR (*expr_p);
2498       if (!DECL_RTL_SET_P (obj))
2499         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2500       break;
2501
2502     case VAR_DECL:
2503     case PARM_DECL:
2504     case RESULT_DECL:
2505       *ws = 0;
2506       obj = *expr_p;
2507
2508       if (DECL_RTL_SET_P (obj))
2509         break;
2510
2511       if (DECL_MODE (obj) == BLKmode)
2512         x = produce_memory_decl_rtl (obj, regno);
2513       else
2514         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2515
2516       break;
2517
2518     default:
2519       break;
2520     }
2521
2522   if (x)
2523     {
2524       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2525       SET_DECL_RTL (obj, x);
2526     }
2527
2528   return NULL_TREE;
2529 }
2530
2531 /* Determines cost of the computation of EXPR.  */
2532
2533 static unsigned
2534 computation_cost (tree expr)
2535 {
2536   rtx seq, rslt;
2537   tree type = TREE_TYPE (expr);
2538   unsigned cost;
2539   /* Avoid using hard regs in ways which may be unsupported.  */
2540   int regno = LAST_VIRTUAL_REGISTER + 1;
2541
2542   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2543   start_sequence ();
2544   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2545   seq = get_insns ();
2546   end_sequence ();
2547
2548   cost = seq_cost (seq);
2549   if (MEM_P (rslt))
2550     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2551
2552   return cost;
2553 }
2554
2555 /* Returns variable containing the value of candidate CAND at statement AT.  */
2556
2557 static tree
2558 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2559 {
2560   if (stmt_after_increment (loop, cand, stmt))
2561     return cand->var_after;
2562   else
2563     return cand->var_before;
2564 }
2565
2566 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2567    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2568
2569 int
2570 tree_int_cst_sign_bit (tree t)
2571 {
2572   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2573   unsigned HOST_WIDE_INT w;
2574
2575   if (bitno < HOST_BITS_PER_WIDE_INT)
2576     w = TREE_INT_CST_LOW (t);
2577   else
2578     {
2579       w = TREE_INT_CST_HIGH (t);
2580       bitno -= HOST_BITS_PER_WIDE_INT;
2581     }
2582
2583   return (w >> bitno) & 1;
2584 }
2585
2586 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2587    return cst.  Otherwise return NULL_TREE.  */
2588
2589 static tree
2590 constant_multiple_of (tree type, tree top, tree bot)
2591 {
2592   tree res, mby, p0, p1;
2593   enum tree_code code;
2594   bool negate;
2595
2596   STRIP_NOPS (top);
2597   STRIP_NOPS (bot);
2598
2599   if (operand_equal_p (top, bot, 0))
2600     return build_int_cst (type, 1);
2601
2602   code = TREE_CODE (top);
2603   switch (code)
2604     {
2605     case MULT_EXPR:
2606       mby = TREE_OPERAND (top, 1);
2607       if (TREE_CODE (mby) != INTEGER_CST)
2608         return NULL_TREE;
2609
2610       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2611       if (!res)
2612         return NULL_TREE;
2613
2614       return fold_binary_to_constant (MULT_EXPR, type, res,
2615                                       fold_convert (type, mby));
2616
2617     case PLUS_EXPR:
2618     case MINUS_EXPR:
2619       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2620       if (!p0)
2621         return NULL_TREE;
2622       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2623       if (!p1)
2624         return NULL_TREE;
2625
2626       return fold_binary_to_constant (code, type, p0, p1);
2627
2628     case INTEGER_CST:
2629       if (TREE_CODE (bot) != INTEGER_CST)
2630         return NULL_TREE;
2631
2632       bot = fold_convert (type, bot);
2633       top = fold_convert (type, top);
2634
2635       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2636          the result afterwards.  */
2637       if (tree_int_cst_sign_bit (bot))
2638         {
2639           negate = true;
2640           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2641         }
2642       else
2643         negate = false;
2644
2645       /* Ditto for TOP.  */
2646       if (tree_int_cst_sign_bit (top))
2647         {
2648           negate = !negate;
2649           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2650         }
2651
2652       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2653         return NULL_TREE;
2654
2655       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2656       if (negate)
2657         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2658       return res;
2659
2660     default:
2661       return NULL_TREE;
2662     }
2663 }
2664
2665 /* Sets COMB to CST.  */
2666
2667 static void
2668 aff_combination_const (struct affine_tree_combination *comb, tree type,
2669                        unsigned HOST_WIDE_INT cst)
2670 {
2671   unsigned prec = TYPE_PRECISION (type);
2672
2673   comb->type = type;
2674   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2675
2676   comb->n = 0;
2677   comb->rest = NULL_TREE;
2678   comb->offset = cst & comb->mask;
2679 }
2680
2681 /* Sets COMB to single element ELT.  */
2682
2683 static void
2684 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2685 {
2686   unsigned prec = TYPE_PRECISION (type);
2687
2688   comb->type = type;
2689   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2690
2691   comb->n = 1;
2692   comb->elts[0] = elt;
2693   comb->coefs[0] = 1;
2694   comb->rest = NULL_TREE;
2695   comb->offset = 0;
2696 }
2697
2698 /* Scales COMB by SCALE.  */
2699
2700 static void
2701 aff_combination_scale (struct affine_tree_combination *comb,
2702                        unsigned HOST_WIDE_INT scale)
2703 {
2704   unsigned i, j;
2705
2706   if (scale == 1)
2707     return;
2708
2709   if (scale == 0)
2710     {
2711       aff_combination_const (comb, comb->type, 0);
2712       return;
2713     }
2714
2715   comb->offset = (scale * comb->offset) & comb->mask;
2716   for (i = 0, j = 0; i < comb->n; i++)
2717     {
2718       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2719       comb->elts[j] = comb->elts[i];
2720       if (comb->coefs[j] != 0)
2721         j++;
2722     }
2723   comb->n = j;
2724
2725   if (comb->rest)
2726     {
2727       if (comb->n < MAX_AFF_ELTS)
2728         {
2729           comb->coefs[comb->n] = scale;
2730           comb->elts[comb->n] = comb->rest;
2731           comb->rest = NULL_TREE;
2732           comb->n++;
2733         }
2734       else
2735         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2736                                   build_int_cst_type (comb->type, scale));
2737     }
2738 }
2739
2740 /* Adds ELT * SCALE to COMB.  */
2741
2742 static void
2743 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2744                          unsigned HOST_WIDE_INT scale)
2745 {
2746   unsigned i;
2747
2748   if (scale == 0)
2749     return;
2750
2751   for (i = 0; i < comb->n; i++)
2752     if (operand_equal_p (comb->elts[i], elt, 0))
2753       {
2754         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2755         if (comb->coefs[i])
2756           return;
2757
2758         comb->n--;
2759         comb->coefs[i] = comb->coefs[comb->n];
2760         comb->elts[i] = comb->elts[comb->n];
2761         return;
2762       }
2763   if (comb->n < MAX_AFF_ELTS)
2764     {
2765       comb->coefs[comb->n] = scale;
2766       comb->elts[comb->n] = elt;
2767       comb->n++;
2768       return;
2769     }
2770
2771   if (scale == 1)
2772     elt = fold_convert (comb->type, elt);
2773   else
2774     elt = fold_build2 (MULT_EXPR, comb->type,
2775                        fold_convert (comb->type, elt),
2776                        build_int_cst_type (comb->type, scale));
2777
2778   if (comb->rest)
2779     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2780   else
2781     comb->rest = elt;
2782 }
2783
2784 /* Adds COMB2 to COMB1.  */
2785
2786 static void
2787 aff_combination_add (struct affine_tree_combination *comb1,
2788                      struct affine_tree_combination *comb2)
2789 {
2790   unsigned i;
2791
2792   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2793   for (i = 0; i < comb2-> n; i++)
2794     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2795   if (comb2->rest)
2796     aff_combination_add_elt (comb1, comb2->rest, 1);
2797 }
2798
2799 /* Splits EXPR into an affine combination of parts.  */
2800
2801 static void
2802 tree_to_aff_combination (tree expr, tree type,
2803                          struct affine_tree_combination *comb)
2804 {
2805   struct affine_tree_combination tmp;
2806   enum tree_code code;
2807   tree cst, core, toffset;
2808   HOST_WIDE_INT bitpos, bitsize;
2809   enum machine_mode mode;
2810   int unsignedp, volatilep;
2811
2812   STRIP_NOPS (expr);
2813
2814   code = TREE_CODE (expr);
2815   switch (code)
2816     {
2817     case INTEGER_CST:
2818       aff_combination_const (comb, type, int_cst_value (expr));
2819       return;
2820
2821     case PLUS_EXPR:
2822     case MINUS_EXPR:
2823       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825       if (code == MINUS_EXPR)
2826         aff_combination_scale (&tmp, -1);
2827       aff_combination_add (comb, &tmp);
2828       return;
2829
2830     case MULT_EXPR:
2831       cst = TREE_OPERAND (expr, 1);
2832       if (TREE_CODE (cst) != INTEGER_CST)
2833         break;
2834       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835       aff_combination_scale (comb, int_cst_value (cst));
2836       return;
2837
2838     case NEGATE_EXPR:
2839       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840       aff_combination_scale (comb, -1);
2841       return;
2842
2843     case ADDR_EXPR:
2844       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845                                   &toffset, &mode, &unsignedp, &volatilep,
2846                                   false);
2847       if (bitpos % BITS_PER_UNIT != 0)
2848         break;
2849       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850       core = build_fold_addr_expr (core);
2851       if (TREE_CODE (core) == ADDR_EXPR)
2852         aff_combination_add_elt (comb, core, 1);
2853       else
2854         {
2855           tree_to_aff_combination (core, type, &tmp);
2856           aff_combination_add (comb, &tmp);
2857         }
2858       if (toffset)
2859         {
2860           tree_to_aff_combination (toffset, type, &tmp);
2861           aff_combination_add (comb, &tmp);
2862         }
2863       return;
2864
2865     default:
2866       break;
2867     }
2868
2869   aff_combination_elt (comb, type, expr);
2870 }
2871
2872 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2873
2874 static tree
2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876                  unsigned HOST_WIDE_INT mask)
2877 {
2878   enum tree_code code;
2879
2880   scale &= mask;
2881   elt = fold_convert (type, elt);
2882
2883   if (scale == 1)
2884     {
2885       if (!expr)
2886         return elt;
2887
2888       return fold_build2 (PLUS_EXPR, type, expr, elt);
2889     }
2890
2891   if (scale == mask)
2892     {
2893       if (!expr)
2894         return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896       return fold_build2 (MINUS_EXPR, type, expr, elt);
2897     }
2898
2899   if (!expr)
2900     return fold_build2 (MULT_EXPR, type, elt,
2901                         build_int_cst_type (type, scale));
2902
2903   if ((scale | (mask >> 1)) == mask)
2904     {
2905       /* Scale is negative.  */
2906       code = MINUS_EXPR;
2907       scale = (-scale) & mask;
2908     }
2909   else
2910     code = PLUS_EXPR;
2911
2912   elt = fold_build2 (MULT_EXPR, type, elt,
2913                      build_int_cst_type (type, scale));
2914   return fold_build2 (code, type, expr, elt);
2915 }
2916
2917 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2918
2919 static void
2920 unshare_aff_combination (struct affine_tree_combination *comb)
2921 {
2922   unsigned i;
2923
2924   for (i = 0; i < comb->n; i++)
2925     comb->elts[i] = unshare_expr (comb->elts[i]);
2926   if (comb->rest)
2927     comb->rest = unshare_expr (comb->rest);
2928 }
2929
2930 /* Makes tree from the affine combination COMB.  */
2931
2932 static tree
2933 aff_combination_to_tree (struct affine_tree_combination *comb)
2934 {
2935   tree type = comb->type;
2936   tree expr = comb->rest;
2937   unsigned i;
2938   unsigned HOST_WIDE_INT off, sgn;
2939
2940   /* Handle the special case produced by get_computation_aff when
2941      the type does not fit in HOST_WIDE_INT.  */
2942   if (comb->n == 0 && comb->offset == 0)
2943     return fold_convert (type, expr);
2944
2945   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2946
2947   for (i = 0; i < comb->n; i++)
2948     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2949                             comb->mask);
2950
2951   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2952     {
2953       /* Offset is negative.  */
2954       off = (-comb->offset) & comb->mask;
2955       sgn = comb->mask;
2956     }
2957   else
2958     {
2959       off = comb->offset;
2960       sgn = 1;
2961     }
2962   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2963                           comb->mask);
2964 }
2965
2966 /* Determines the expression by that USE is expressed from induction variable
2967    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2968    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2969
2970 static bool
2971 get_computation_aff (struct loop *loop,
2972                      struct iv_use *use, struct iv_cand *cand, tree at,
2973                      struct affine_tree_combination *aff)
2974 {
2975   tree ubase = use->iv->base;
2976   tree ustep = use->iv->step;
2977   tree cbase = cand->iv->base;
2978   tree cstep = cand->iv->step;
2979   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2980   tree uutype;
2981   tree expr, delta;
2982   tree ratio;
2983   unsigned HOST_WIDE_INT ustepi, cstepi;
2984   HOST_WIDE_INT ratioi;
2985   struct affine_tree_combination cbase_aff, expr_aff;
2986
2987   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2988     {
2989       /* We do not have a precision to express the values of use.  */
2990       return false;
2991     }
2992
2993   expr = var_at_stmt (loop, cand, at);
2994
2995   if (TREE_TYPE (expr) != ctype)
2996     {
2997       /* This may happen with the original ivs.  */
2998       expr = fold_convert (ctype, expr);
2999     }
3000
3001   if (TYPE_UNSIGNED (utype))
3002     uutype = utype;
3003   else
3004     {
3005       uutype = unsigned_type_for (utype);
3006       ubase = fold_convert (uutype, ubase);
3007       ustep = fold_convert (uutype, ustep);
3008     }
3009
3010   if (uutype != ctype)
3011     {
3012       expr = fold_convert (uutype, expr);
3013       cbase = fold_convert (uutype, cbase);
3014       cstep = fold_convert (uutype, cstep);
3015     }
3016
3017   if (cst_and_fits_in_hwi (cstep)
3018       && cst_and_fits_in_hwi (ustep))
3019     {
3020       ustepi = int_cst_value (ustep);
3021       cstepi = int_cst_value (cstep);
3022
3023       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3024         {
3025           /* TODO maybe consider case when ustep divides cstep and the ratio is
3026              a power of 2 (so that the division is fast to execute)?  We would
3027              need to be much more careful with overflows etc. then.  */
3028           return false;
3029         }
3030
3031       ratio = build_int_cst_type (uutype, ratioi);
3032     }
3033   else
3034     {
3035       ratio = constant_multiple_of (uutype, ustep, cstep);
3036       if (!ratio)
3037         return false;
3038
3039       /* Ratioi is only used to detect special cases when the multiplicative
3040          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3041          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3042          to integer_onep/integer_all_onesp, since the former ignores
3043          TREE_OVERFLOW.  */
3044       if (cst_and_fits_in_hwi (ratio))
3045         ratioi = int_cst_value (ratio);
3046       else if (integer_onep (ratio))
3047         ratioi = 1;
3048       else if (integer_all_onesp (ratio))
3049         ratioi = -1;
3050       else
3051         ratioi = 0;
3052     }
3053
3054   /* We may need to shift the value if we are after the increment.  */
3055   if (stmt_after_increment (loop, cand, at))
3056     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3057
3058   /* use = ubase - ratio * cbase + ratio * var.
3059
3060      In general case ubase + ratio * (var - cbase) could be better (one less
3061      multiplication), but often it is possible to eliminate redundant parts
3062      of computations from (ubase - ratio * cbase) term, and if it does not
3063      happen, fold is able to apply the distributive law to obtain this form
3064      anyway.  */
3065
3066   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3067     {
3068       /* Let's compute in trees and just return the result in AFF.  This case
3069          should not be very common, and fold itself is not that bad either,
3070          so making the aff. functions more complicated to handle this case
3071          is not that urgent.  */
3072       if (ratioi == 1)
3073         {
3074           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3075           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3076         }
3077       else if (ratioi == -1)
3078         {
3079           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3080           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3081         }
3082       else
3083         {
3084           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3085           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3086           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3087           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3088         }
3089
3090       aff->type = uutype;
3091       aff->n = 0;
3092       aff->offset = 0;
3093       aff->mask = 0;
3094       aff->rest = expr;
3095       return true;
3096     }
3097
3098   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3099      possible to compute ratioi.  */
3100   gcc_assert (ratioi);
3101
3102   tree_to_aff_combination (ubase, uutype, aff);
3103   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3104   tree_to_aff_combination (expr, uutype, &expr_aff);
3105   aff_combination_scale (&cbase_aff, -ratioi);
3106   aff_combination_scale (&expr_aff, ratioi);
3107   aff_combination_add (aff, &cbase_aff);
3108   aff_combination_add (aff, &expr_aff);
3109
3110   return true;
3111 }
3112
3113 /* Determines the expression by that USE is expressed from induction variable
3114    CAND at statement AT in LOOP.  The computation is unshared.  */
3115
3116 static tree
3117 get_computation_at (struct loop *loop,
3118                     struct iv_use *use, struct iv_cand *cand, tree at)
3119 {
3120   struct affine_tree_combination aff;
3121   tree type = TREE_TYPE (use->iv->base);
3122
3123   if (!get_computation_aff (loop, use, cand, at, &aff))
3124     return NULL_TREE;
3125   unshare_aff_combination (&aff);
3126   return fold_convert (type, aff_combination_to_tree (&aff));
3127 }
3128
3129 /* Determines the expression by that USE is expressed from induction variable
3130    CAND in LOOP.  The computation is unshared.  */
3131
3132 static tree
3133 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3134 {
3135   return get_computation_at (loop, use, cand, use->stmt);
3136 }
3137
3138 /* Returns cost of addition in MODE.  */
3139
3140 static unsigned
3141 add_cost (enum machine_mode mode)
3142 {
3143   static unsigned costs[NUM_MACHINE_MODES];
3144   rtx seq;
3145   unsigned cost;
3146
3147   if (costs[mode])
3148     return costs[mode];
3149
3150   start_sequence ();
3151   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3152                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3153                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3154                  NULL_RTX);
3155   seq = get_insns ();
3156   end_sequence ();
3157
3158   cost = seq_cost (seq);
3159   if (!cost)
3160     cost = 1;
3161
3162   costs[mode] = cost;
3163
3164   if (dump_file && (dump_flags & TDF_DETAILS))
3165     fprintf (dump_file, "Addition in %s costs %d\n",
3166              GET_MODE_NAME (mode), cost);
3167   return cost;
3168 }
3169
3170 /* Entry in a hashtable of already known costs for multiplication.  */
3171 struct mbc_entry
3172 {
3173   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3174   enum machine_mode mode;       /* In mode.  */
3175   unsigned cost;                /* The cost.  */
3176 };
3177
3178 /* Counts hash value for the ENTRY.  */
3179
3180 static hashval_t
3181 mbc_entry_hash (const void *entry)
3182 {
3183   const struct mbc_entry *e = entry;
3184
3185   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3186 }
3187
3188 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3189
3190 static int
3191 mbc_entry_eq (const void *entry1, const void *entry2)
3192 {
3193   const struct mbc_entry *e1 = entry1;
3194   const struct mbc_entry *e2 = entry2;
3195
3196   return (e1->mode == e2->mode
3197           && e1->cst == e2->cst);
3198 }
3199
3200 /* Returns cost of multiplication by constant CST in MODE.  */
3201
3202 unsigned
3203 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3204 {
3205   static htab_t costs;
3206   struct mbc_entry **cached, act;
3207   rtx seq;
3208   unsigned cost;
3209
3210   if (!costs)
3211     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3212
3213   act.mode = mode;
3214   act.cst = cst;
3215   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3216   if (*cached)
3217     return (*cached)->cost;
3218
3219   *cached = xmalloc (sizeof (struct mbc_entry));
3220   (*cached)->mode = mode;
3221   (*cached)->cst = cst;
3222
3223   start_sequence ();
3224   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3225                gen_int_mode (cst, mode), NULL_RTX, 0);
3226   seq = get_insns ();
3227   end_sequence ();
3228
3229   cost = seq_cost (seq);
3230
3231   if (dump_file && (dump_flags & TDF_DETAILS))
3232     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3233              (int) cst, GET_MODE_NAME (mode), cost);
3234
3235   (*cached)->cost = cost;
3236
3237   return cost;
3238 }
3239
3240 /* Returns true if multiplying by RATIO is allowed in address.  */
3241
3242 bool
3243 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3244 {
3245 #define MAX_RATIO 128
3246   static sbitmap valid_mult;
3247
3248   if (!valid_mult)
3249     {
3250       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3251       rtx addr;
3252       HOST_WIDE_INT i;
3253
3254       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3255       sbitmap_zero (valid_mult);
3256       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3257       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3258         {
3259           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3260           if (memory_address_p (Pmode, addr))
3261             SET_BIT (valid_mult, i + MAX_RATIO);
3262         }
3263
3264       if (dump_file && (dump_flags & TDF_DETAILS))
3265         {
3266           fprintf (dump_file, "  allowed multipliers:");
3267           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3268             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3269               fprintf (dump_file, " %d", (int) i);
3270           fprintf (dump_file, "\n");
3271           fprintf (dump_file, "\n");
3272         }
3273     }
3274
3275   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3276     return false;
3277
3278   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3279 }
3280
3281 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3282    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3283    variable is omitted.  The created memory accesses MODE.
3284
3285    TODO -- there must be some better way.  This all is quite crude.  */
3286
3287 static unsigned
3288 get_address_cost (bool symbol_present, bool var_present,
3289                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3290 {
3291   static bool initialized = false;
3292   static HOST_WIDE_INT rat, off;
3293   static HOST_WIDE_INT min_offset, max_offset;
3294   static unsigned costs[2][2][2][2];
3295   unsigned cost, acost;
3296   rtx seq, addr, base;
3297   bool offset_p, ratio_p;
3298   rtx reg1;
3299   HOST_WIDE_INT s_offset;
3300   unsigned HOST_WIDE_INT mask;
3301   unsigned bits;
3302
3303   if (!initialized)
3304     {
3305       HOST_WIDE_INT i;
3306       initialized = true;
3307
3308       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3309
3310       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3311       for (i = 1; i <= 1 << 20; i <<= 1)
3312         {
3313           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3314           if (!memory_address_p (Pmode, addr))
3315             break;
3316         }
3317       max_offset = i >> 1;
3318       off = max_offset;
3319
3320       for (i = 1; i <= 1 << 20; i <<= 1)
3321         {
3322           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3323           if (!memory_address_p (Pmode, addr))
3324             break;
3325         }
3326       min_offset = -(i >> 1);
3327
3328       if (dump_file && (dump_flags & TDF_DETAILS))
3329         {
3330           fprintf (dump_file, "get_address_cost:\n");
3331           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3332           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3333         }
3334
3335       rat = 1;
3336       for (i = 2; i <= MAX_RATIO; i++)
3337         if (multiplier_allowed_in_address_p (i))
3338           {
3339             rat = i;
3340             break;
3341           }
3342     }
3343
3344   bits = GET_MODE_BITSIZE (Pmode);
3345   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3346   offset &= mask;
3347   if ((offset >> (bits - 1) & 1))
3348     offset |= ~mask;
3349   s_offset = offset;
3350
3351   cost = 0;
3352   offset_p = (s_offset != 0
3353               && min_offset <= s_offset && s_offset <= max_offset);
3354   ratio_p = (ratio != 1
3355              && multiplier_allowed_in_address_p (ratio));
3356
3357   if (ratio != 1 && !ratio_p)
3358     cost += multiply_by_cost (ratio, Pmode);
3359
3360   if (s_offset && !offset_p && !symbol_present)
3361     {
3362       cost += add_cost (Pmode);
3363       var_present = true;
3364     }
3365
3366   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3367   if (!acost)
3368     {
3369       acost = 0;
3370
3371       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3372       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3373       if (ratio_p)
3374         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3375
3376       if (var_present)
3377         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3378
3379       if (symbol_present)
3380         {
3381           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3382           if (offset_p)
3383             base = gen_rtx_fmt_e (CONST, Pmode,
3384                                   gen_rtx_fmt_ee (PLUS, Pmode,
3385                                                   base,
3386                                                   gen_int_mode (off, Pmode)));
3387         }
3388       else if (offset_p)
3389         base = gen_int_mode (off, Pmode);
3390       else
3391         base = NULL_RTX;
3392
3393       if (base)
3394         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3395
3396       start_sequence ();
3397       addr = memory_address (Pmode, addr);
3398       seq = get_insns ();
3399       end_sequence ();
3400
3401       acost = seq_cost (seq);
3402       acost += address_cost (addr, Pmode);
3403
3404       if (!acost)
3405         acost = 1;
3406       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3407     }
3408
3409   return cost + acost;
3410 }
3411 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3412    invariants the computation depends on.  */
3413
3414 static unsigned
3415 force_var_cost (struct ivopts_data *data,
3416                 tree expr, bitmap *depends_on)
3417 {
3418   static bool costs_initialized = false;
3419   static unsigned integer_cost;
3420   static unsigned symbol_cost;
3421   static unsigned address_cost;
3422   tree op0, op1;
3423   unsigned cost0, cost1, cost;
3424   enum machine_mode mode;
3425
3426   if (!costs_initialized)
3427     {
3428       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3429       rtx x = gen_rtx_MEM (DECL_MODE (var),
3430                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3431       tree addr;
3432       tree type = build_pointer_type (integer_type_node);
3433
3434       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3435                                                            2000));
3436
3437       SET_DECL_RTL (var, x);
3438       TREE_STATIC (var) = 1;
3439       addr = build1 (ADDR_EXPR, type, var);
3440       symbol_cost = computation_cost (addr) + 1;
3441
3442       address_cost
3443         = computation_cost (build2 (PLUS_EXPR, type,
3444                                     addr,
3445                                     build_int_cst_type (type, 2000))) + 1;
3446       if (dump_file && (dump_flags & TDF_DETAILS))
3447         {
3448           fprintf (dump_file, "force_var_cost:\n");
3449           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3450           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3451           fprintf (dump_file, "  address %d\n", (int) address_cost);
3452           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3453           fprintf (dump_file, "\n");
3454         }
3455
3456       costs_initialized = true;
3457     }
3458
3459   STRIP_NOPS (expr);
3460
3461   if (depends_on)
3462     {
3463       fd_ivopts_data = data;
3464       walk_tree (&expr, find_depends, depends_on, NULL);
3465     }
3466
3467   if (SSA_VAR_P (expr))
3468     return 0;
3469
3470   if (TREE_INVARIANT (expr))
3471     {
3472       if (TREE_CODE (expr) == INTEGER_CST)
3473         return integer_cost;
3474
3475       if (TREE_CODE (expr) == ADDR_EXPR)
3476         {
3477           tree obj = TREE_OPERAND (expr, 0);
3478
3479           if (TREE_CODE (obj) == VAR_DECL
3480               || TREE_CODE (obj) == PARM_DECL
3481               || TREE_CODE (obj) == RESULT_DECL)
3482             return symbol_cost;
3483         }
3484
3485       return address_cost;
3486     }
3487
3488   switch (TREE_CODE (expr))
3489     {
3490     case PLUS_EXPR:
3491     case MINUS_EXPR:
3492     case MULT_EXPR:
3493       op0 = TREE_OPERAND (expr, 0);
3494       op1 = TREE_OPERAND (expr, 1);
3495       STRIP_NOPS (op0);
3496       STRIP_NOPS (op1);
3497
3498       if (is_gimple_val (op0))
3499         cost0 = 0;
3500       else
3501         cost0 = force_var_cost (data, op0, NULL);
3502
3503       if (is_gimple_val (op1))
3504         cost1 = 0;
3505       else
3506         cost1 = force_var_cost (data, op1, NULL);
3507
3508       break;
3509
3510     default:
3511       /* Just an arbitrary value, FIXME.  */
3512       return target_spill_cost;
3513     }
3514
3515   mode = TYPE_MODE (TREE_TYPE (expr));
3516   switch (TREE_CODE (expr))
3517     {
3518     case PLUS_EXPR:
3519     case MINUS_EXPR:
3520       cost = add_cost (mode);
3521       break;
3522
3523     case MULT_EXPR:
3524       if (cst_and_fits_in_hwi (op0))
3525         cost = multiply_by_cost (int_cst_value (op0), mode);
3526       else if (cst_and_fits_in_hwi (op1))
3527         cost = multiply_by_cost (int_cst_value (op1), mode);
3528       else
3529         return target_spill_cost;
3530       break;
3531
3532     default:
3533       gcc_unreachable ();
3534     }
3535
3536   cost += cost0;
3537   cost += cost1;
3538
3539   /* Bound the cost by target_spill_cost.  The parts of complicated
3540      computations often are either loop invariant or at least can
3541      be shared between several iv uses, so letting this grow without
3542      limits would not give reasonable results.  */
3543   return cost < target_spill_cost ? cost : target_spill_cost;
3544 }
3545
3546 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3547    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3548    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3549    invariants the computation depends on.  */
3550
3551 static unsigned
3552 split_address_cost (struct ivopts_data *data,
3553                     tree addr, bool *symbol_present, bool *var_present,
3554                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3555 {
3556   tree core;
3557   HOST_WIDE_INT bitsize;
3558   HOST_WIDE_INT bitpos;
3559   tree toffset;
3560   enum machine_mode mode;
3561   int unsignedp, volatilep;
3562
3563   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3564                               &unsignedp, &volatilep, false);
3565
3566   if (toffset != 0
3567       || bitpos % BITS_PER_UNIT != 0
3568       || TREE_CODE (core) != VAR_DECL)
3569     {
3570       *symbol_present = false;
3571       *var_present = true;
3572       fd_ivopts_data = data;
3573       walk_tree (&addr, find_depends, depends_on, NULL);
3574       return target_spill_cost;
3575     }
3576
3577   *offset += bitpos / BITS_PER_UNIT;
3578   if (TREE_STATIC (core)
3579       || DECL_EXTERNAL (core))
3580     {
3581       *symbol_present = true;
3582       *var_present = false;
3583       return 0;
3584     }
3585
3586   *symbol_present = false;
3587   *var_present = true;
3588   return 0;
3589 }
3590
3591 /* Estimates cost of expressing difference of addresses E1 - E2 as
3592    var + symbol + offset.  The value of offset is added to OFFSET,
3593    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3594    part is missing.  DEPENDS_ON is a set of the invariants the computation
3595    depends on.  */
3596
3597 static unsigned
3598 ptr_difference_cost (struct ivopts_data *data,
3599                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3600                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3601 {
3602   HOST_WIDE_INT diff = 0;
3603   unsigned cost;
3604
3605   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3606
3607   if (ptr_difference_const (e1, e2, &diff))
3608     {
3609       *offset += diff;
3610       *symbol_present = false;
3611       *var_present = false;
3612       return 0;
3613     }
3614
3615   if (e2 == integer_zero_node)
3616     return split_address_cost (data, TREE_OPERAND (e1, 0),
3617                                symbol_present, var_present, offset, depends_on);
3618
3619   *symbol_present = false;
3620   *var_present = true;
3621
3622   cost = force_var_cost (data, e1, depends_on);
3623   cost += force_var_cost (data, e2, depends_on);
3624   cost += add_cost (Pmode);
3625
3626   return cost;
3627 }
3628
3629 /* Estimates cost of expressing difference E1 - E2 as
3630    var + symbol + offset.  The value of offset is added to OFFSET,
3631    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3632    part is missing.  DEPENDS_ON is a set of the invariants the computation
3633    depends on.  */
3634
3635 static unsigned
3636 difference_cost (struct ivopts_data *data,
3637                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3638                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3639 {
3640   unsigned cost;
3641   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3642   unsigned HOST_WIDE_INT off1, off2;
3643
3644   e1 = strip_offset (e1, &off1);
3645   e2 = strip_offset (e2, &off2);
3646   *offset += off1 - off2;
3647
3648   STRIP_NOPS (e1);
3649   STRIP_NOPS (e2);
3650
3651   if (TREE_CODE (e1) == ADDR_EXPR)
3652     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3653                                 depends_on);
3654   *symbol_present = false;
3655
3656   if (operand_equal_p (e1, e2, 0))
3657     {
3658       *var_present = false;
3659       return 0;
3660     }
3661   *var_present = true;
3662   if (zero_p (e2))
3663     return force_var_cost (data, e1, depends_on);
3664
3665   if (zero_p (e1))
3666     {
3667       cost = force_var_cost (data, e2, depends_on);
3668       cost += multiply_by_cost (-1, mode);
3669
3670       return cost;
3671     }
3672
3673   cost = force_var_cost (data, e1, depends_on);
3674   cost += force_var_cost (data, e2, depends_on);
3675   cost += add_cost (mode);
3676
3677   return cost;
3678 }
3679
3680 /* Determines the cost of the computation by that USE is expressed
3681    from induction variable CAND.  If ADDRESS_P is true, we just need
3682    to create an address from it, otherwise we want to get it into
3683    register.  A set of invariants we depend on is stored in
3684    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3685
3686 static unsigned
3687 get_computation_cost_at (struct ivopts_data *data,
3688                          struct iv_use *use, struct iv_cand *cand,
3689                          bool address_p, bitmap *depends_on, tree at)
3690 {
3691   tree ubase = use->iv->base, ustep = use->iv->step;
3692   tree cbase, cstep;
3693   tree utype = TREE_TYPE (ubase), ctype;
3694   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3695   HOST_WIDE_INT ratio, aratio;
3696   bool var_present, symbol_present;
3697   unsigned cost = 0, n_sums;
3698
3699   *depends_on = NULL;
3700
3701   /* Only consider real candidates.  */
3702   if (!cand->iv)
3703     return INFTY;
3704
3705   cbase = cand->iv->base;
3706   cstep = cand->iv->step;
3707   ctype = TREE_TYPE (cbase);
3708
3709   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3710     {
3711       /* We do not have a precision to express the values of use.  */
3712       return INFTY;
3713     }
3714
3715   if (address_p)
3716     {
3717       /* Do not try to express address of an object with computation based
3718          on address of a different object.  This may cause problems in rtl
3719          level alias analysis (that does not expect this to be happening,
3720          as this is illegal in C), and would be unlikely to be useful
3721          anyway.  */
3722       if (use->iv->base_object
3723           && cand->iv->base_object
3724           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3725         return INFTY;
3726     }
3727
3728   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3729     {
3730       /* TODO -- add direct handling of this case.  */
3731       goto fallback;
3732     }
3733
3734   /* CSTEPI is removed from the offset in case statement is after the
3735      increment.  If the step is not constant, we use zero instead.
3736      This is a bit imprecise (there is the extra addition), but
3737      redundancy elimination is likely to transform the code so that
3738      it uses value of the variable before increment anyway,
3739      so it is not that much unrealistic.  */
3740   if (cst_and_fits_in_hwi (cstep))
3741     cstepi = int_cst_value (cstep);
3742   else
3743     cstepi = 0;
3744
3745   if (cst_and_fits_in_hwi (ustep)
3746       && cst_and_fits_in_hwi (cstep))
3747     {
3748       ustepi = int_cst_value (ustep);
3749
3750       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3751         return INFTY;
3752     }
3753   else
3754     {
3755       tree rat;
3756
3757       rat = constant_multiple_of (utype, ustep, cstep);
3758
3759       if (!rat)
3760         return INFTY;
3761
3762       if (cst_and_fits_in_hwi (rat))
3763         ratio = int_cst_value (rat);
3764       else if (integer_onep (rat))
3765         ratio = 1;
3766       else if (integer_all_onesp (rat))
3767         ratio = -1;
3768       else
3769         return INFTY;
3770     }
3771
3772   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3773      or ratio == 1, it is better to handle this like
3774
3775      ubase - ratio * cbase + ratio * var
3776
3777      (also holds in the case ratio == -1, TODO.  */
3778
3779   if (cst_and_fits_in_hwi (cbase))
3780     {
3781       offset = - ratio * int_cst_value (cbase);
3782       cost += difference_cost (data,
3783                                ubase, integer_zero_node,
3784                                &symbol_present, &var_present, &offset,
3785                                depends_on);
3786     }
3787   else if (ratio == 1)
3788     {
3789       cost += difference_cost (data,
3790                                ubase, cbase,
3791                                &symbol_present, &var_present, &offset,
3792                                depends_on);
3793     }
3794   else
3795     {
3796       cost += force_var_cost (data, cbase, depends_on);
3797       cost += add_cost (TYPE_MODE (ctype));
3798       cost += difference_cost (data,
3799                                ubase, integer_zero_node,
3800                                &symbol_present, &var_present, &offset,
3801                                depends_on);
3802     }
3803
3804   /* If we are after the increment, the value of the candidate is higher by
3805      one iteration.  */
3806   if (stmt_after_increment (data->current_loop, cand, at))
3807     offset -= ratio * cstepi;
3808
3809   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3810      (symbol/var/const parts may be omitted).  If we are looking for an address,
3811      find the cost of addressing this.  */
3812   if (address_p)
3813     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3814
3815   /* Otherwise estimate the costs for computing the expression.  */
3816   aratio = ratio > 0 ? ratio : -ratio;
3817   if (!symbol_present && !var_present && !offset)
3818     {
3819       if (ratio != 1)
3820         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3821
3822       return cost;
3823     }
3824
3825   if (aratio != 1)
3826     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3827
3828   n_sums = 1;
3829   if (var_present
3830       /* Symbol + offset should be compile-time computable.  */
3831       && (symbol_present || offset))
3832     n_sums++;
3833
3834   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3835
3836 fallback:
3837   {
3838     /* Just get the expression, expand it and measure the cost.  */
3839     tree comp = get_computation_at (data->current_loop, use, cand, at);
3840
3841     if (!comp)
3842       return INFTY;
3843
3844     if (address_p)
3845       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3846
3847     return computation_cost (comp);
3848   }
3849 }
3850
3851 /* Determines the cost of the computation by that USE is expressed
3852    from induction variable CAND.  If ADDRESS_P is true, we just need
3853    to create an address from it, otherwise we want to get it into
3854    register.  A set of invariants we depend on is stored in
3855    DEPENDS_ON.  */
3856
3857 static unsigned
3858 get_computation_cost (struct ivopts_data *data,
3859                       struct iv_use *use, struct iv_cand *cand,
3860                       bool address_p, bitmap *depends_on)
3861 {
3862   return get_computation_cost_at (data,
3863                                   use, cand, address_p, depends_on, use->stmt);
3864 }
3865
3866 /* Determines cost of basing replacement of USE on CAND in a generic
3867    expression.  */
3868
3869 static bool
3870 determine_use_iv_cost_generic (struct ivopts_data *data,
3871                                struct iv_use *use, struct iv_cand *cand)
3872 {
3873   bitmap depends_on;
3874   unsigned cost;
3875
3876   /* The simple case first -- if we need to express value of the preserved
3877      original biv, the cost is 0.  This also prevents us from counting the
3878      cost of increment twice -- once at this use and once in the cost of
3879      the candidate.  */
3880   if (cand->pos == IP_ORIGINAL
3881       && cand->incremented_at == use->stmt)
3882     {
3883       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3884       return true;
3885     }
3886
3887   cost = get_computation_cost (data, use, cand, false, &depends_on);
3888   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3889
3890   return cost != INFTY;
3891 }
3892
3893 /* Determines cost of basing replacement of USE on CAND in an address.  */
3894
3895 static bool
3896 determine_use_iv_cost_address (struct ivopts_data *data,
3897                                struct iv_use *use, struct iv_cand *cand)
3898 {
3899   bitmap depends_on;
3900   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3901
3902   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3903
3904   return cost != INFTY;
3905 }
3906
3907 /* Computes value of induction variable IV in iteration NITER.  */
3908
3909 static tree
3910 iv_value (struct iv *iv, tree niter)
3911 {
3912   tree val;
3913   tree type = TREE_TYPE (iv->base);
3914
3915   niter = fold_convert (type, niter);
3916   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3917
3918   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3919 }
3920
3921 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3922
3923 static tree
3924 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3925 {
3926   tree val = iv_value (cand->iv, niter);
3927   tree type = TREE_TYPE (cand->iv->base);
3928
3929   if (stmt_after_increment (loop, cand, at))
3930     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3931
3932   return val;
3933 }
3934
3935 /* Returns period of induction variable iv.  */
3936
3937 static tree
3938 iv_period (struct iv *iv)
3939 {
3940   tree step = iv->step, period, type;
3941   tree pow2div;
3942
3943   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3944
3945   /* Period of the iv is gcd (step, type range).  Since type range is power
3946      of two, it suffices to determine the maximum power of two that divides
3947      step.  */
3948   pow2div = num_ending_zeros (step);
3949   type = unsigned_type_for (TREE_TYPE (step));
3950
3951   period = build_low_bits_mask (type,
3952                                 (TYPE_PRECISION (type)
3953                                  - tree_low_cst (pow2div, 1)));
3954
3955   return period;
3956 }
3957
3958 /* Returns the comparison operator used when eliminating the iv USE.  */
3959
3960 static enum tree_code
3961 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3962 {
3963   struct loop *loop = data->current_loop;
3964   basic_block ex_bb;
3965   edge exit;
3966
3967   ex_bb = bb_for_stmt (use->stmt);
3968   exit = EDGE_SUCC (ex_bb, 0);
3969   if (flow_bb_inside_loop_p (loop, exit->dest))
3970     exit = EDGE_SUCC (ex_bb, 1);
3971
3972   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3973 }
3974
3975 /* Check whether it is possible to express the condition in USE by comparison
3976    of candidate CAND.  If so, store the value compared with to BOUND.  */
3977
3978 static bool
3979 may_eliminate_iv (struct ivopts_data *data,
3980                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3981 {
3982   basic_block ex_bb;
3983   edge exit;
3984   struct tree_niter_desc *niter;
3985   tree nit, nit_type;
3986   tree wider_type, period, per_type;
3987   struct loop *loop = data->current_loop;
3988
3989   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3990     return false;
3991
3992   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3993      for other conditions inside loop body.  */
3994   ex_bb = bb_for_stmt (use->stmt);
3995   if (use->stmt != last_stmt (ex_bb)
3996       || TREE_CODE (use->stmt) != COND_EXPR)
3997     return false;
3998   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3999     return false;
4000
4001   exit = EDGE_SUCC (ex_bb, 0);
4002   if (flow_bb_inside_loop_p (loop, exit->dest))
4003     exit = EDGE_SUCC (ex_bb, 1);
4004   if (flow_bb_inside_loop_p (loop, exit->dest))
4005     return false;
4006
4007   niter = niter_for_exit (data, exit);
4008   if (!niter
4009       || !zero_p (niter->may_be_zero))
4010     return false;
4011
4012   nit = niter->niter;
4013   nit_type = TREE_TYPE (nit);
4014
4015   /* Determine whether we may use the variable to test whether niter iterations
4016      elapsed.  This is the case iff the period of the induction variable is
4017      greater than the number of iterations.  */
4018   period = iv_period (cand->iv);
4019   if (!period)
4020     return false;
4021   per_type = TREE_TYPE (period);
4022
4023   wider_type = TREE_TYPE (period);
4024   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4025     wider_type = per_type;
4026   else
4027     wider_type = nit_type;
4028
4029   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4030                                       fold_convert (wider_type, period),
4031                                       fold_convert (wider_type, nit))))
4032     return false;
4033
4034   *bound = cand_value_at (loop, cand, use->stmt, nit);
4035   return true;
4036 }
4037
4038 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4039
4040 static bool
4041 determine_use_iv_cost_condition (struct ivopts_data *data,
4042                                  struct iv_use *use, struct iv_cand *cand)
4043 {
4044   tree bound = NULL_TREE, op, cond;
4045   bitmap depends_on = NULL;
4046   unsigned cost;
4047
4048   /* Only consider real candidates.  */
4049   if (!cand->iv)
4050     {
4051       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4052       return false;
4053     }
4054
4055   if (may_eliminate_iv (data, use, cand, &bound))
4056     {
4057       cost = force_var_cost (data, bound, &depends_on);
4058
4059       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4060       return cost != INFTY;
4061     }
4062
4063   /* The induction variable elimination failed; just express the original
4064      giv.  If it is compared with an invariant, note that we cannot get
4065      rid of it.  */
4066   cost = get_computation_cost (data, use, cand, false, &depends_on);
4067
4068   cond = *use->op_p;
4069   if (TREE_CODE (cond) != SSA_NAME)
4070     {
4071       op = TREE_OPERAND (cond, 0);
4072       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4073         op = TREE_OPERAND (cond, 1);
4074       if (TREE_CODE (op) == SSA_NAME)
4075         {
4076           op = get_iv (data, op)->base;
4077           fd_ivopts_data = data;
4078           walk_tree (&op, find_depends, &depends_on, NULL);
4079         }
4080     }
4081
4082   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4083   return cost != INFTY;
4084 }
4085
4086 /* Checks whether it is possible to replace the final value of USE by
4087    a direct computation.  If so, the formula is stored to *VALUE.  */
4088
4089 static bool
4090 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4091                          tree *value)
4092 {
4093   struct loop *loop = data->current_loop;
4094   edge exit;
4095   struct tree_niter_desc *niter;
4096
4097   exit = single_dom_exit (loop);
4098   if (!exit)
4099     return false;
4100
4101   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4102                               bb_for_stmt (use->stmt)));
4103
4104   niter = niter_for_single_dom_exit (data);
4105   if (!niter
4106       || !zero_p (niter->may_be_zero))
4107     return false;
4108
4109   *value = iv_value (use->iv, niter->niter);
4110
4111   return true;
4112 }
4113
4114 /* Determines cost of replacing final value of USE using CAND.  */
4115
4116 static bool
4117 determine_use_iv_cost_outer (struct ivopts_data *data,
4118                              struct iv_use *use, struct iv_cand *cand)
4119 {
4120   bitmap depends_on;
4121   unsigned cost;
4122   edge exit;
4123   tree value = NULL_TREE;
4124   struct loop *loop = data->current_loop;
4125
4126   /* The simple case first -- if we need to express value of the preserved
4127      original biv, the cost is 0.  This also prevents us from counting the
4128      cost of increment twice -- once at this use and once in the cost of
4129      the candidate.  */
4130   if (cand->pos == IP_ORIGINAL
4131       && cand->incremented_at == use->stmt)
4132     {
4133       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4134       return true;
4135     }
4136
4137   if (!cand->iv)
4138     {
4139       if (!may_replace_final_value (data, use, &value))
4140         {
4141           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4142           return false;
4143         }
4144
4145       depends_on = NULL;
4146       cost = force_var_cost (data, value, &depends_on);
4147
4148       cost /= AVG_LOOP_NITER (loop);
4149
4150       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4151       return cost != INFTY;
4152     }
4153
4154   exit = single_dom_exit (loop);
4155   if (exit)
4156     {
4157       /* If there is just a single exit, we may use value of the candidate
4158          after we take it to determine the value of use.  */
4159       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4160                                       last_stmt (exit->src));
4161       if (cost != INFTY)
4162         cost /= AVG_LOOP_NITER (loop);
4163     }
4164   else
4165     {
4166       /* Otherwise we just need to compute the iv.  */
4167       cost = get_computation_cost (data, use, cand, false, &depends_on);
4168     }
4169
4170   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4171
4172   return cost != INFTY;
4173 }
4174
4175 /* Determines cost of basing replacement of USE on CAND.  Returns false
4176    if USE cannot be based on CAND.  */
4177
4178 static bool
4179 determine_use_iv_cost (struct ivopts_data *data,
4180                        struct iv_use *use, struct iv_cand *cand)
4181 {
4182   switch (use->type)
4183     {
4184     case USE_NONLINEAR_EXPR:
4185       return determine_use_iv_cost_generic (data, use, cand);
4186
4187     case USE_OUTER:
4188       return determine_use_iv_cost_outer (data, use, cand);
4189
4190     case USE_ADDRESS:
4191       return determine_use_iv_cost_address (data, use, cand);
4192
4193     case USE_COMPARE:
4194       return determine_use_iv_cost_condition (data, use, cand);
4195
4196     default:
4197       gcc_unreachable ();
4198     }
4199 }
4200
4201 /* Determines costs of basing the use of the iv on an iv candidate.  */
4202
4203 static void
4204 determine_use_iv_costs (struct ivopts_data *data)
4205 {
4206   unsigned i, j;
4207   struct iv_use *use;
4208   struct iv_cand *cand;
4209   bitmap to_clear = BITMAP_ALLOC (NULL);
4210
4211   alloc_use_cost_map (data);
4212
4213   for (i = 0; i < n_iv_uses (data); i++)
4214     {
4215       use = iv_use (data, i);
4216
4217       if (data->consider_all_candidates)
4218         {
4219           for (j = 0; j < n_iv_cands (data); j++)
4220             {
4221               cand = iv_cand (data, j);
4222               determine_use_iv_cost (data, use, cand);
4223             }
4224         }
4225       else
4226         {
4227           bitmap_iterator bi;
4228
4229           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4230             {
4231               cand = iv_cand (data, j);
4232               if (!determine_use_iv_cost (data, use, cand))
4233                 bitmap_set_bit (to_clear, j);
4234             }
4235
4236           /* Remove the candidates for that the cost is infinite from
4237              the list of related candidates.  */
4238           bitmap_and_compl_into (use->related_cands, to_clear);
4239           bitmap_clear (to_clear);
4240         }
4241     }
4242
4243   BITMAP_FREE (to_clear);
4244
4245   if (dump_file && (dump_flags & TDF_DETAILS))
4246     {
4247       fprintf (dump_file, "Use-candidate costs:\n");
4248
4249       for (i = 0; i < n_iv_uses (data); i++)
4250         {
4251           use = iv_use (data, i);
4252
4253           fprintf (dump_file, "Use %d:\n", i);
4254           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4255           for (j = 0; j < use->n_map_members; j++)
4256             {
4257               if (!use->cost_map[j].cand
4258                   || use->cost_map[j].cost == INFTY)
4259                 continue;
4260
4261               fprintf (dump_file, "  %d\t%d\t",
4262                        use->cost_map[j].cand->id,
4263                        use->cost_map[j].cost);
4264               if (use->cost_map[j].depends_on)
4265                 bitmap_print (dump_file,
4266                               use->cost_map[j].depends_on, "","");
4267               fprintf (dump_file, "\n");
4268             }
4269
4270           fprintf (dump_file, "\n");
4271         }
4272       fprintf (dump_file, "\n");
4273     }
4274 }
4275
4276 /* Determines cost of the candidate CAND.  */
4277
4278 static void
4279 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4280 {
4281   unsigned cost_base, cost_step;
4282   tree base;
4283
4284   if (!cand->iv)
4285     {
4286       cand->cost = 0;
4287       return;
4288     }
4289
4290   /* There are two costs associated with the candidate -- its increment
4291      and its initialization.  The second is almost negligible for any loop
4292      that rolls enough, so we take it just very little into account.  */
4293
4294   base = cand->iv->base;
4295   cost_base = force_var_cost (data, base, NULL);
4296   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4297
4298   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4299
4300   /* Prefer the original iv unless we may gain something by replacing it;
4301      this is not really relevant for artificial ivs created by other
4302      passes.  */
4303   if (cand->pos == IP_ORIGINAL
4304       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4305     cand->cost--;
4306
4307   /* Prefer not to insert statements into latch unless there are some
4308      already (so that we do not create unnecessary jumps).  */
4309   if (cand->pos == IP_END
4310       && empty_block_p (ip_end_pos (data->current_loop)))
4311     cand->cost++;
4312 }
4313
4314 /* Determines costs of computation of the candidates.  */
4315
4316 static void
4317 determine_iv_costs (struct ivopts_data *data)
4318 {
4319   unsigned i;
4320
4321   if (dump_file && (dump_flags & TDF_DETAILS))
4322     {
4323       fprintf (dump_file, "Candidate costs:\n");
4324       fprintf (dump_file, "  cand\tcost\n");
4325     }
4326
4327   for (i = 0; i < n_iv_cands (data); i++)
4328     {
4329       struct iv_cand *cand = iv_cand (data, i);
4330
4331       determine_iv_cost (data, cand);
4332
4333       if (dump_file && (dump_flags & TDF_DETAILS))
4334         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4335     }
4336
4337 if (dump_file && (dump_flags & TDF_DETAILS))
4338       fprintf (dump_file, "\n");
4339 }
4340
4341 /* Calculates cost for having SIZE induction variables.  */
4342
4343 static unsigned
4344 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4345 {
4346   return global_cost_for_size (size,
4347                                loop_data (data->current_loop)->regs_used,
4348                                n_iv_uses (data));
4349 }
4350
4351 /* For each size of the induction variable set determine the penalty.  */
4352
4353 static void
4354 determine_set_costs (struct ivopts_data *data)
4355 {
4356   unsigned j, n;
4357   tree phi, op;
4358   struct loop *loop = data->current_loop;
4359   bitmap_iterator bi;
4360
4361   /* We use the following model (definitely improvable, especially the
4362      cost function -- TODO):
4363
4364      We estimate the number of registers available (using MD data), name it A.
4365
4366      We estimate the number of registers used by the loop, name it U.  This
4367      number is obtained as the number of loop phi nodes (not counting virtual
4368      registers and bivs) + the number of variables from outside of the loop.
4369
4370      We set a reserve R (free regs that are used for temporary computations,
4371      etc.).  For now the reserve is a constant 3.
4372
4373      Let I be the number of induction variables.
4374
4375      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4376         make a lot of ivs without a reason).
4377      -- if A - R < U + I <= A, the cost is I * PRES_COST
4378      -- if U + I > A, the cost is I * PRES_COST and
4379         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4380
4381   if (dump_file && (dump_flags & TDF_DETAILS))
4382     {
4383       fprintf (dump_file, "Global costs:\n");
4384       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4385       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4386       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4387       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4388     }
4389
4390   n = 0;
4391   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4392     {
4393       op = PHI_RESULT (phi);
4394
4395       if (!is_gimple_reg (op))
4396         continue;
4397
4398       if (get_iv (data, op))
4399         continue;
4400
4401       n++;
4402     }
4403
4404   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4405     {
4406       struct version_info *info = ver_info (data, j);
4407
4408       if (info->inv_id && info->has_nonlin_use)
4409         n++;
4410     }
4411
4412   loop_data (loop)->regs_used = n;
4413   if (dump_file && (dump_flags & TDF_DETAILS))
4414     fprintf (dump_file, "  regs_used %d\n", n);
4415
4416   if (dump_file && (dump_flags & TDF_DETAILS))
4417     {
4418       fprintf (dump_file, "  cost for size:\n");
4419       fprintf (dump_file, "  ivs\tcost\n");
4420       for (j = 0; j <= 2 * target_avail_regs; j++)
4421         fprintf (dump_file, "  %d\t%d\n", j,
4422                  ivopts_global_cost_for_size (data, j));
4423       fprintf (dump_file, "\n");
4424     }
4425 }
4426
4427 /* Returns true if A is a cheaper cost pair than B.  */
4428
4429 static bool
4430 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4431 {
4432   if (!a)
4433     return false;
4434
4435   if (!b)
4436     return true;
4437
4438   if (a->cost < b->cost)
4439     return true;
4440
4441   if (a->cost > b->cost)
4442     return false;
4443
4444   /* In case the costs are the same, prefer the cheaper candidate.  */
4445   if (a->cand->cost < b->cand->cost)
4446     return true;
4447
4448   return false;
4449 }
4450
4451 /* Computes the cost field of IVS structure.  */
4452
4453 static void
4454 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4455 {
4456   unsigned cost = 0;
4457
4458   cost += ivs->cand_use_cost;
4459   cost += ivs->cand_cost;
4460   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4461
4462   ivs->cost = cost;
4463 }
4464
4465 /* Remove invariants in set INVS to set IVS.  */
4466
4467 static void
4468 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4469 {
4470   bitmap_iterator bi;
4471   unsigned iid;
4472
4473   if (!invs)
4474     return;
4475
4476   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4477     {
4478       ivs->n_invariant_uses[iid]--;
4479       if (ivs->n_invariant_uses[iid] == 0)
4480         ivs->n_regs--;
4481     }
4482 }
4483
4484 /* Set USE not to be expressed by any candidate in IVS.  */
4485
4486 static void
4487 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4488                  struct iv_use *use)
4489 {
4490   unsigned uid = use->id, cid;
4491   struct cost_pair *cp;
4492
4493   cp = ivs->cand_for_use[uid];
4494   if (!cp)
4495     return;
4496   cid = cp->cand->id;
4497
4498   ivs->bad_uses++;
4499   ivs->cand_for_use[uid] = NULL;
4500   ivs->n_cand_uses[cid]--;
4501
4502   if (ivs->n_cand_uses[cid] == 0)
4503     {
4504       bitmap_clear_bit (ivs->cands, cid);
4505       /* Do not count the pseudocandidates.  */
4506       if (cp->cand->iv)
4507         ivs->n_regs--;
4508       ivs->n_cands--;
4509       ivs->cand_cost -= cp->cand->cost;
4510
4511       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4512     }
4513
4514   ivs->cand_use_cost -= cp->cost;
4515
4516   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4517   iv_ca_recount_cost (data, ivs);
4518 }
4519
4520 /* Add invariants in set INVS to set IVS.  */
4521
4522 static void
4523 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4524 {
4525   bitmap_iterator bi;
4526   unsigned iid;
4527
4528   if (!invs)
4529     return;
4530
4531   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4532     {
4533       ivs->n_invariant_uses[iid]++;
4534       if (ivs->n_invariant_uses[iid] == 1)
4535         ivs->n_regs++;
4536     }
4537 }
4538
4539 /* Set cost pair for USE in set IVS to CP.  */
4540
4541 static void
4542 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4543               struct iv_use *use, struct cost_pair *cp)
4544 {
4545   unsigned uid = use->id, cid;
4546
4547   if (ivs->cand_for_use[uid] == cp)
4548     return;
4549
4550   if (ivs->cand_for_use[uid])
4551     iv_ca_set_no_cp (data, ivs, use);
4552
4553   if (cp)
4554     {
4555       cid = cp->cand->id;
4556
4557       ivs->bad_uses--;
4558       ivs->cand_for_use[uid] = cp;
4559       ivs->n_cand_uses[cid]++;
4560       if (ivs->n_cand_uses[cid] == 1)
4561         {
4562           bitmap_set_bit (ivs->cands, cid);
4563           /* Do not count the pseudocandidates.  */
4564           if (cp->cand->iv)
4565             ivs->n_regs++;
4566           ivs->n_cands++;
4567           ivs->cand_cost += cp->cand->cost;
4568
4569           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4570         }
4571
4572       ivs->cand_use_cost += cp->cost;
4573       iv_ca_set_add_invariants (ivs, cp->depends_on);
4574       iv_ca_recount_cost (data, ivs);
4575     }
4576 }
4577
4578 /* Extend set IVS by expressing USE by some of the candidates in it
4579    if possible.  */
4580
4581 static void
4582 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4583                struct iv_use *use)
4584 {
4585   struct cost_pair *best_cp = NULL, *cp;
4586   bitmap_iterator bi;
4587   unsigned i;
4588
4589   gcc_assert (ivs->upto >= use->id);
4590
4591   if (ivs->upto == use->id)
4592     {
4593       ivs->upto++;
4594       ivs->bad_uses++;
4595     }
4596
4597   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4598     {
4599       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4600
4601       if (cheaper_cost_pair (cp, best_cp))
4602         best_cp = cp;
4603     }
4604
4605   iv_ca_set_cp (data, ivs, use, best_cp);
4606 }
4607
4608 /* Get cost for assignment IVS.  */
4609
4610 static unsigned
4611 iv_ca_cost (struct iv_ca *ivs)
4612 {
4613   return (ivs->bad_uses ? INFTY : ivs->cost);
4614 }
4615
4616 /* Returns true if all dependences of CP are among invariants in IVS.  */
4617
4618 static bool
4619 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4620 {
4621   unsigned i;
4622   bitmap_iterator bi;
4623
4624   if (!cp->depends_on)
4625     return true;
4626
4627   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4628     {
4629       if (ivs->n_invariant_uses[i] == 0)
4630         return false;
4631     }
4632
4633   return true;
4634 }
4635
4636 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4637    it before NEXT_CHANGE.  */
4638
4639 static struct iv_ca_delta *
4640 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4641                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4642 {
4643   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4644
4645   change->use = use;
4646   change->old_cp = old_cp;
4647   change->new_cp = new_cp;
4648   change->next_change = next_change;
4649
4650   return change;
4651 }
4652
4653 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4654    are rewritten.  */
4655
4656 static struct iv_ca_delta *
4657 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4658 {
4659   struct iv_ca_delta *last;
4660
4661   if (!l2)
4662     return l1;
4663
4664   if (!l1)
4665     return l2;
4666
4667   for (last = l1; last->next_change; last = last->next_change)
4668     continue;
4669   last->next_change = l2;
4670
4671   return l1;
4672 }
4673
4674 /* Returns candidate by that USE is expressed in IVS.  */
4675
4676 static struct cost_pair *
4677 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4678 {
4679   return ivs->cand_for_use[use->id];
4680 }
4681
4682 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4683
4684 static struct iv_ca_delta *
4685 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4686 {
4687   struct iv_ca_delta *act, *next, *prev = NULL;
4688   struct cost_pair *tmp;
4689
4690   for (act = delta; act; act = next)
4691     {
4692       next = act->next_change;
4693       act->next_change = prev;
4694       prev = act;
4695
4696       tmp = act->old_cp;
4697       act->old_cp = act->new_cp;
4698       act->new_cp = tmp;
4699     }
4700
4701   return prev;
4702 }
4703
4704 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4705    reverted instead.  */
4706
4707 static void
4708 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4709                     struct iv_ca_delta *delta, bool forward)
4710 {
4711   struct cost_pair *from, *to;
4712   struct iv_ca_delta *act;
4713
4714   if (!forward)
4715     delta = iv_ca_delta_reverse (delta);
4716
4717   for (act = delta; act; act = act->next_change)
4718     {
4719       from = act->old_cp;
4720       to = act->new_cp;
4721       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4722       iv_ca_set_cp (data, ivs, act->use, to);
4723     }
4724
4725   if (!forward)
4726     iv_ca_delta_reverse (delta);
4727 }
4728
4729 /* Returns true if CAND is used in IVS.  */
4730
4731 static bool
4732 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4733 {
4734   return ivs->n_cand_uses[cand->id] > 0;
4735 }
4736
4737 /* Returns number of induction variable candidates in the set IVS.  */
4738
4739 static unsigned
4740 iv_ca_n_cands (struct iv_ca *ivs)
4741 {
4742   return ivs->n_cands;
4743 }
4744
4745 /* Free the list of changes DELTA.  */
4746
4747 static void
4748 iv_ca_delta_free (struct iv_ca_delta **delta)
4749 {
4750   struct iv_ca_delta *act, *next;
4751
4752   for (act = *delta; act; act = next)
4753     {
4754       next = act->next_change;
4755       free (act);
4756     }
4757
4758   *delta = NULL;
4759 }
4760
4761 /* Allocates new iv candidates assignment.  */
4762
4763 static struct iv_ca *
4764 iv_ca_new (struct ivopts_data *data)
4765 {
4766   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4767
4768   nw->upto = 0;
4769   nw->bad_uses = 0;
4770   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4771   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4772   nw->cands = BITMAP_ALLOC (NULL);
4773   nw->n_cands = 0;
4774   nw->n_regs = 0;
4775   nw->cand_use_cost = 0;
4776   nw->cand_cost = 0;
4777   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4778   nw->cost = 0;
4779
4780   return nw;
4781 }
4782
4783 /* Free memory occupied by the set IVS.  */
4784
4785 static void
4786 iv_ca_free (struct iv_ca **ivs)
4787 {
4788   free ((*ivs)->cand_for_use);
4789   free ((*ivs)->n_cand_uses);
4790   BITMAP_FREE ((*ivs)->cands);
4791   free ((*ivs)->n_invariant_uses);
4792   free (*ivs);
4793   *ivs = NULL;
4794 }
4795
4796 /* Dumps IVS to FILE.  */
4797
4798 static void
4799 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4800 {
4801   const char *pref = "  invariants ";
4802   unsigned i;
4803
4804   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4805   bitmap_print (file, ivs->cands, "  candidates ","\n");
4806
4807   for (i = 1; i <= data->max_inv_id; i++)
4808     if (ivs->n_invariant_uses[i])
4809       {
4810         fprintf (file, "%s%d", pref, i);
4811         pref = ", ";
4812       }
4813   fprintf (file, "\n");
4814 }
4815
4816 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4817    new set, and store differences in DELTA.  Number of induction variables
4818    in the new set is stored to N_IVS.  */
4819
4820 static unsigned
4821 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4822               struct iv_cand *cand, struct iv_ca_delta **delta,
4823               unsigned *n_ivs)
4824 {
4825   unsigned i, cost;
4826   struct iv_use *use;
4827   struct cost_pair *old_cp, *new_cp;
4828
4829   *delta = NULL;
4830   for (i = 0; i < ivs->upto; i++)
4831     {
4832       use = iv_use (data, i);
4833       old_cp = iv_ca_cand_for_use (ivs, use);
4834
4835       if (old_cp
4836           && old_cp->cand == cand)
4837         continue;
4838
4839       new_cp = get_use_iv_cost (data, use, cand);
4840       if (!new_cp)
4841         continue;
4842
4843       if (!iv_ca_has_deps (ivs, new_cp))
4844         continue;
4845
4846       if (!cheaper_cost_pair (new_cp, old_cp))
4847         continue;
4848
4849       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4850     }
4851
4852   iv_ca_delta_commit (data, ivs, *delta, true);
4853   cost = iv_ca_cost (ivs);
4854   if (n_ivs)
4855     *n_ivs = iv_ca_n_cands (ivs);
4856   iv_ca_delta_commit (data, ivs, *delta, false);
4857
4858   return cost;
4859 }
4860
4861 /* Try narrowing set IVS by removing CAND.  Return the cost of
4862    the new set and store the differences in DELTA.  */
4863
4864 static unsigned
4865 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4866               struct iv_cand *cand, struct iv_ca_delta **delta)
4867 {
4868   unsigned i, ci;
4869   struct iv_use *use;
4870   struct cost_pair *old_cp, *new_cp, *cp;
4871   bitmap_iterator bi;
4872   struct iv_cand *cnd;
4873   unsigned cost;
4874
4875   *delta = NULL;
4876   for (i = 0; i < n_iv_uses (data); i++)
4877     {
4878       use = iv_use (data, i);
4879
4880       old_cp = iv_ca_cand_for_use (ivs, use);
4881       if (old_cp->cand != cand)
4882         continue;
4883
4884       new_cp = NULL;
4885
4886       if (data->consider_all_candidates)
4887         {
4888           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4889             {
4890               if (ci == cand->id)
4891                 continue;
4892
4893               cnd = iv_cand (data, ci);
4894
4895               cp = get_use_iv_cost (data, use, cnd);
4896               if (!cp)
4897                 continue;
4898               if (!iv_ca_has_deps (ivs, cp))
4899                 continue;
4900
4901               if (!cheaper_cost_pair (cp, new_cp))
4902                 continue;
4903
4904               new_cp = cp;
4905             }
4906         }
4907       else
4908         {
4909           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4910             {
4911               if (ci == cand->id)
4912                 continue;
4913
4914               cnd = iv_cand (data, ci);
4915
4916               cp = get_use_iv_cost (data, use, cnd);
4917               if (!cp)
4918                 continue;
4919               if (!iv_ca_has_deps (ivs, cp))
4920                 continue;
4921
4922               if (!cheaper_cost_pair (cp, new_cp))
4923                 continue;
4924
4925               new_cp = cp;
4926             }
4927         }
4928
4929       if (!new_cp)
4930         {
4931           iv_ca_delta_free (delta);
4932           return INFTY;
4933         }
4934
4935       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4936     }
4937
4938   iv_ca_delta_commit (data, ivs, *delta, true);
4939   cost = iv_ca_cost (ivs);
4940   iv_ca_delta_commit (data, ivs, *delta, false);
4941
4942   return cost;
4943 }
4944
4945 /* Try optimizing the set of candidates IVS by removing candidates different
4946    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4947    differences in DELTA.  */
4948
4949 static unsigned
4950 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4951              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4952 {
4953   bitmap_iterator bi;
4954   struct iv_ca_delta *act_delta, *best_delta;
4955   unsigned i, best_cost, acost;
4956   struct iv_cand *cand;
4957
4958   best_delta = NULL;
4959   best_cost = iv_ca_cost (ivs);
4960
4961   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4962     {
4963       cand = iv_cand (data, i);
4964
4965       if (cand == except_cand)
4966         continue;
4967
4968       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4969
4970       if (acost < best_cost)
4971         {
4972           best_cost = acost;
4973           iv_ca_delta_free (&best_delta);
4974           best_delta = act_delta;
4975         }
4976       else
4977         iv_ca_delta_free (&act_delta);
4978     }
4979
4980   if (!best_delta)
4981     {
4982       *delta = NULL;
4983       return best_cost;
4984     }
4985
4986   /* Recurse to possibly remove other unnecessary ivs.  */
4987   iv_ca_delta_commit (data, ivs, best_delta, true);
4988   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4989   iv_ca_delta_commit (data, ivs, best_delta, false);
4990   *delta = iv_ca_delta_join (best_delta, *delta);
4991   return best_cost;
4992 }
4993
4994 /* Tries to extend the sets IVS in the best possible way in order
4995    to express the USE.  */
4996
4997 static bool
4998 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4999                   struct iv_use *use)
5000 {
5001   unsigned best_cost, act_cost;
5002   unsigned i;
5003   bitmap_iterator bi;
5004   struct iv_cand *cand;
5005   struct iv_ca_delta *best_delta = NULL, *act_delta;
5006   struct cost_pair *cp;
5007
5008   iv_ca_add_use (data, ivs, use);
5009   best_cost = iv_ca_cost (ivs);
5010
5011   cp = iv_ca_cand_for_use (ivs, use);
5012   if (cp)
5013     {
5014       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5015       iv_ca_set_no_cp (data, ivs, use);
5016     }
5017
5018   /* First try important candidates.  Only if it fails, try the specific ones.
5019      Rationale -- in loops with many variables the best choice often is to use
5020      just one generic biv.  If we added here many ivs specific to the uses,
5021      the optimization algorithm later would be likely to get stuck in a local
5022      minimum, thus causing us to create too many ivs.  The approach from
5023      few ivs to more seems more likely to be successful -- starting from few
5024      ivs, replacing an expensive use by a specific iv should always be a
5025      win.  */
5026   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5027     {
5028       cand = iv_cand (data, i);
5029
5030       if (iv_ca_cand_used_p (ivs, cand))
5031         continue;
5032
5033       cp = get_use_iv_cost (data, use, cand);
5034       if (!cp)
5035         continue;
5036
5037       iv_ca_set_cp (data, ivs, use, cp);
5038       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5039       iv_ca_set_no_cp (data, ivs, use);
5040       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5041
5042       if (act_cost < best_cost)
5043         {
5044           best_cost = act_cost;
5045
5046           iv_ca_delta_free (&best_delta);
5047           best_delta = act_delta;
5048         }
5049       else
5050         iv_ca_delta_free (&act_delta);
5051     }
5052
5053   if (best_cost == INFTY)
5054     {
5055       for (i = 0; i < use->n_map_members; i++)
5056         {
5057           cp = use->cost_map + i;
5058           cand = cp->cand;
5059           if (!cand)
5060             continue;
5061
5062           /* Already tried this.  */
5063           if (cand->important)
5064             continue;
5065
5066           if (iv_ca_cand_used_p (ivs, cand))
5067             continue;
5068
5069           act_delta = NULL;
5070           iv_ca_set_cp (data, ivs, use, cp);
5071           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5072           iv_ca_set_no_cp (data, ivs, use);
5073           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5074                                        cp, act_delta);
5075
5076           if (act_cost < best_cost)
5077             {
5078               best_cost = act_cost;
5079
5080               if (best_delta)
5081                 iv_ca_delta_free (&best_delta);
5082               best_delta = act_delta;
5083             }
5084           else
5085             iv_ca_delta_free (&act_delta);
5086         }
5087     }
5088
5089   iv_ca_delta_commit (data, ivs, best_delta, true);
5090   iv_ca_delta_free (&best_delta);
5091
5092   return (best_cost != INFTY);
5093 }
5094
5095 /* Finds an initial assignment of candidates to uses.  */
5096
5097 static struct iv_ca *
5098 get_initial_solution (struct ivopts_data *data)
5099 {
5100   struct iv_ca *ivs = iv_ca_new (data);
5101   unsigned i;
5102
5103   for (i = 0; i < n_iv_uses (data); i++)
5104     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5105       {
5106         iv_ca_free (&ivs);
5107         return NULL;
5108       }
5109
5110   return ivs;
5111 }
5112
5113 /* Tries to improve set of induction variables IVS.  */
5114
5115 static bool
5116 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5117 {
5118   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5119   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5120   struct iv_cand *cand;
5121
5122   /* Try extending the set of induction variables by one.  */
5123   for (i = 0; i < n_iv_cands (data); i++)
5124     {
5125       cand = iv_cand (data, i);
5126
5127       if (iv_ca_cand_used_p (ivs, cand))
5128         continue;
5129
5130       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5131       if (!act_delta)
5132         continue;
5133
5134       /* If we successfully added the candidate and the set is small enough,
5135          try optimizing it by removing other candidates.  */
5136       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5137         {
5138           iv_ca_delta_commit (data, ivs, act_delta, true);
5139           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5140           iv_ca_delta_commit (data, ivs, act_delta, false);
5141           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5142         }
5143
5144       if (acost < best_cost)
5145         {
5146           best_cost = acost;
5147           iv_ca_delta_free (&best_delta);
5148           best_delta = act_delta;
5149         }
5150       else
5151         iv_ca_delta_free (&act_delta);
5152     }
5153
5154   if (!best_delta)
5155     {
5156       /* Try removing the candidates from the set instead.  */
5157       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5158
5159       /* Nothing more we can do.  */
5160       if (!best_delta)
5161         return false;
5162     }
5163
5164   iv_ca_delta_commit (data, ivs, best_delta, true);
5165   gcc_assert (best_cost == iv_ca_cost (ivs));
5166   iv_ca_delta_free (&best_delta);
5167   return true;
5168 }
5169
5170 /* Attempts to find the optimal set of induction variables.  We do simple
5171    greedy heuristic -- we try to replace at most one candidate in the selected
5172    solution and remove the unused ivs while this improves the cost.  */
5173
5174 static struct iv_ca *
5175 find_optimal_iv_set (struct ivopts_data *data)
5176 {
5177   unsigned i;
5178   struct iv_ca *set;
5179   struct iv_use *use;
5180
5181   /* Get the initial solution.  */
5182   set = get_initial_solution (data);
5183   if (!set)
5184     {
5185       if (dump_file && (dump_flags & TDF_DETAILS))
5186         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5187       return NULL;
5188     }
5189
5190   if (dump_file && (dump_flags & TDF_DETAILS))
5191     {
5192       fprintf (dump_file, "Initial set of candidates:\n");
5193       iv_ca_dump (data, dump_file, set);
5194     }
5195
5196   while (try_improve_iv_set (data, set))
5197     {
5198       if (dump_file && (dump_flags & TDF_DETAILS))
5199         {
5200           fprintf (dump_file, "Improved to:\n");
5201           iv_ca_dump (data, dump_file, set);
5202         }
5203     }
5204
5205   if (dump_file && (dump_flags & TDF_DETAILS))
5206     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5207
5208   for (i = 0; i < n_iv_uses (data); i++)
5209     {
5210       use = iv_use (data, i);
5211       use->selected = iv_ca_cand_for_use (set, use)->cand;
5212     }
5213
5214   return set;
5215 }
5216
5217 /* Creates a new induction variable corresponding to CAND.  */
5218
5219 static void
5220 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5221 {
5222   block_stmt_iterator incr_pos;
5223   tree base;
5224   bool after = false;
5225
5226   if (!cand->iv)
5227     return;
5228
5229   switch (cand->pos)
5230     {
5231     case IP_NORMAL:
5232       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5233       break;
5234
5235     case IP_END:
5236       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5237       after = true;
5238       break;
5239
5240     case IP_ORIGINAL:
5241       /* Mark that the iv is preserved.  */
5242       name_info (data, cand->var_before)->preserve_biv = true;
5243       name_info (data, cand->var_after)->preserve_biv = true;
5244
5245       /* Rewrite the increment so that it uses var_before directly.  */
5246       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5247
5248       return;
5249     }
5250
5251   gimple_add_tmp_var (cand->var_before);
5252   add_referenced_tmp_var (cand->var_before);
5253
5254   base = unshare_expr (cand->iv->base);
5255
5256   create_iv (base, unshare_expr (cand->iv->step),
5257              cand->var_before, data->current_loop,
5258              &incr_pos, after, &cand->var_before, &cand->var_after);
5259 }
5260
5261 /* Creates new induction variables described in SET.  */
5262
5263 static void
5264 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5265 {
5266   unsigned i;
5267   struct iv_cand *cand;
5268   bitmap_iterator bi;
5269
5270   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5271     {
5272       cand = iv_cand (data, i);
5273       create_new_iv (data, cand);
5274     }
5275 }
5276
5277 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5278    is true, remove also the ssa name defined by the statement.  */
5279
5280 static void
5281 remove_statement (tree stmt, bool including_defined_name)
5282 {
5283   if (TREE_CODE (stmt) == PHI_NODE)
5284     {
5285       if (!including_defined_name)
5286         {
5287           /* Prevent the ssa name defined by the statement from being removed.  */
5288           SET_PHI_RESULT (stmt, NULL);
5289         }
5290       remove_phi_node (stmt, NULL_TREE);
5291     }
5292   else
5293     {
5294       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5295
5296       bsi_remove (&bsi);
5297     }
5298 }
5299
5300 /* Rewrites USE (definition of iv used in a nonlinear expression)
5301    using candidate CAND.  */
5302
5303 static void
5304 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5305                             struct iv_use *use, struct iv_cand *cand)
5306 {
5307   tree comp;
5308   tree op, stmts, tgt, ass;
5309   block_stmt_iterator bsi, pbsi;
5310
5311   /* An important special case -- if we are asked to express value of
5312      the original iv by itself, just exit; there is no need to
5313      introduce a new computation (that might also need casting the
5314      variable to unsigned and back).  */
5315   if (cand->pos == IP_ORIGINAL
5316       && TREE_CODE (use->stmt) == MODIFY_EXPR
5317       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5318     {
5319       op = TREE_OPERAND (use->stmt, 1);
5320
5321       /* Be a bit careful.  In case variable is expressed in some
5322          complicated way, rewrite it so that we may get rid of this
5323          complicated expression.  */
5324       if ((TREE_CODE (op) == PLUS_EXPR
5325            || TREE_CODE (op) == MINUS_EXPR)
5326           && TREE_OPERAND (op, 0) == cand->var_before
5327           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5328         return;
5329     }
5330
5331   comp = get_computation (data->current_loop, use, cand);
5332   switch (TREE_CODE (use->stmt))
5333     {
5334     case PHI_NODE:
5335       tgt = PHI_RESULT (use->stmt);
5336
5337       /* If we should keep the biv, do not replace it.  */
5338       if (name_info (data, tgt)->preserve_biv)
5339         return;
5340
5341       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5342       while (!bsi_end_p (pbsi)
5343              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5344         {
5345           bsi = pbsi;
5346           bsi_next (&pbsi);
5347         }
5348       break;
5349
5350     case MODIFY_EXPR:
5351       tgt = TREE_OPERAND (use->stmt, 0);
5352       bsi = bsi_for_stmt (use->stmt);
5353       break;
5354
5355     default:
5356       gcc_unreachable ();
5357     }
5358
5359   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5360
5361   if (TREE_CODE (use->stmt) == PHI_NODE)
5362     {
5363       if (stmts)
5364         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5365       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5366       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5367       remove_statement (use->stmt, false);
5368       SSA_NAME_DEF_STMT (tgt) = ass;
5369     }
5370   else
5371     {
5372       if (stmts)
5373         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5374       TREE_OPERAND (use->stmt, 1) = op;
5375     }
5376 }
5377
5378 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5379    for_each_index.  */
5380
5381 static bool
5382 idx_remove_ssa_names (tree base, tree *idx,
5383                       void *data ATTRIBUTE_UNUSED)
5384 {
5385   tree *op;
5386
5387   if (TREE_CODE (*idx) == SSA_NAME)
5388     *idx = SSA_NAME_VAR (*idx);
5389
5390   if (TREE_CODE (base) == ARRAY_REF)
5391     {
5392       op = &TREE_OPERAND (base, 2);
5393       if (*op
5394           && TREE_CODE (*op) == SSA_NAME)
5395         *op = SSA_NAME_VAR (*op);
5396       op = &TREE_OPERAND (base, 3);
5397       if (*op
5398           && TREE_CODE (*op) == SSA_NAME)
5399         *op = SSA_NAME_VAR (*op);
5400     }
5401
5402   return true;
5403 }
5404
5405 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5406
5407 static tree
5408 unshare_and_remove_ssa_names (tree ref)
5409 {
5410   ref = unshare_expr (ref);
5411   for_each_index (&ref, idx_remove_ssa_names, NULL);
5412
5413   return ref;
5414 }
5415
5416 /* Extract the alias analysis info for the memory reference REF.  There are
5417    several ways how this information may be stored and what precisely is
5418    its semantics depending on the type of the reference, but there always is
5419    somewhere hidden one _DECL node that is used to determine the set of
5420    virtual operands for the reference.  The code below deciphers this jungle
5421    and extracts this single useful piece of information.  */
5422
5423 static tree
5424 get_ref_tag (tree ref)
5425 {
5426   tree var = get_base_address (ref);
5427   tree tag;
5428
5429   if (!var)
5430     return NULL_TREE;
5431
5432   if (TREE_CODE (var) == INDIRECT_REF)
5433     var = TREE_OPERAND (var, 0);
5434   if (TREE_CODE (var) == SSA_NAME)
5435     {
5436       if (SSA_NAME_PTR_INFO (var))
5437         {
5438           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5439           if (tag)
5440             return tag;
5441         }
5442
5443       var = SSA_NAME_VAR (var);
5444     }
5445
5446   if (DECL_P (var))
5447     {
5448       tag = var_ann (var)->type_mem_tag;
5449       if (tag)
5450         return tag;
5451
5452       return var;
5453     }
5454
5455   return NULL_TREE;
5456 }
5457
5458 /* Copies the reference information from OLD_REF to NEW_REF.  */
5459
5460 static void
5461 copy_ref_info (tree new_ref, tree old_ref)
5462 {
5463   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5464     copy_mem_ref_info (new_ref, old_ref);
5465   else
5466     {
5467       TMR_TAG (new_ref) = get_ref_tag (old_ref);
5468       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5469     }
5470 }
5471
5472 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5473
5474 static void
5475 rewrite_use_address (struct ivopts_data *data,
5476                      struct iv_use *use, struct iv_cand *cand)
5477 {
5478   struct affine_tree_combination aff;
5479   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5480   tree ref;
5481
5482   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5483   unshare_aff_combination (&aff);
5484
5485   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5486   copy_ref_info (ref, *use->op_p);
5487   *use->op_p = ref;
5488 }
5489
5490 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5491    candidate CAND.  */
5492
5493 static void
5494 rewrite_use_compare (struct ivopts_data *data,
5495                      struct iv_use *use, struct iv_cand *cand)
5496 {
5497   tree comp;
5498   tree *op_p, cond, op, stmts, bound;
5499   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5500   enum tree_code compare;
5501   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5502
5503   bound = cp->value;
5504   if (bound)
5505     {
5506       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5507       tree var_type = TREE_TYPE (var);
5508
5509       compare = iv_elimination_compare (data, use);
5510       bound = fold_convert (var_type, bound);
5511       op = force_gimple_operand (unshare_expr (bound), &stmts,
5512                                  true, NULL_TREE);
5513
5514       if (stmts)
5515         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5516
5517       *use->op_p = build2 (compare, boolean_type_node, var, op);
5518       update_stmt (use->stmt);
5519       return;
5520     }
5521
5522   /* The induction variable elimination failed; just express the original
5523      giv.  */
5524   comp = get_computation (data->current_loop, use, cand);
5525
5526   cond = *use->op_p;
5527   op_p = &TREE_OPERAND (cond, 0);
5528   if (TREE_CODE (*op_p) != SSA_NAME
5529       || zero_p (get_iv (data, *op_p)->step))
5530     op_p = &TREE_OPERAND (cond, 1);
5531
5532   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5533   if (stmts)
5534     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5535
5536   *op_p = op;
5537 }
5538
5539 /* Ensure that operand *OP_P may be used at the end of EXIT without
5540    violating loop closed ssa form.  */
5541
5542 static void
5543 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5544 {
5545   basic_block def_bb;
5546   struct loop *def_loop;
5547   tree phi, use;
5548
5549   use = USE_FROM_PTR (op_p);
5550   if (TREE_CODE (use) != SSA_NAME)
5551     return;
5552
5553   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5554   if (!def_bb)
5555     return;
5556
5557   def_loop = def_bb->loop_father;
5558   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5559     return;
5560
5561   /* Try finding a phi node that copies the value out of the loop.  */
5562   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5563     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5564       break;
5565
5566   if (!phi)
5567     {
5568       /* Create such a phi node.  */
5569       tree new_name = duplicate_ssa_name (use, NULL);
5570
5571       phi = create_phi_node (new_name, exit->dest);
5572       SSA_NAME_DEF_STMT (new_name) = phi;
5573       add_phi_arg (phi, use, exit);
5574     }
5575
5576   SET_USE (op_p, PHI_RESULT (phi));
5577 }
5578
5579 /* Ensure that operands of STMT may be used at the end of EXIT without
5580    violating loop closed ssa form.  */
5581
5582 static void
5583 protect_loop_closed_ssa_form (edge exit, tree stmt)
5584 {
5585   ssa_op_iter iter;
5586   use_operand_p use_p;
5587
5588   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5589     protect_loop_closed_ssa_form_use (exit, use_p);
5590 }
5591
5592 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5593    so that they are emitted on the correct place, and so that the loop closed
5594    ssa form is preserved.  */
5595
5596 static void
5597 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5598 {
5599   tree_stmt_iterator tsi;
5600   block_stmt_iterator bsi;
5601   tree phi, stmt, def, next;
5602
5603   if (!single_pred_p (exit->dest))
5604     split_loop_exit_edge (exit);
5605
5606   /* Ensure there is label in exit->dest, so that we can
5607      insert after it.  */
5608   tree_block_label (exit->dest);
5609   bsi = bsi_after_labels (exit->dest);
5610
5611   if (TREE_CODE (stmts) == STATEMENT_LIST)
5612     {
5613       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5614         {
5615           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5616           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5617         }
5618     }
5619   else
5620     {
5621       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5622       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5623     }
5624
5625   if (!op)
5626     return;
5627
5628   for (phi = phi_nodes (exit->dest); phi; phi = next)
5629     {
5630       next = PHI_CHAIN (phi);
5631
5632       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5633         {
5634           def = PHI_RESULT (phi);
5635           remove_statement (phi, false);
5636           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5637                         def, op);
5638           SSA_NAME_DEF_STMT (def) = stmt;
5639           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5640         }
5641     }
5642 }
5643
5644 /* Rewrites the final value of USE (that is only needed outside of the loop)
5645    using candidate CAND.  */
5646
5647 static void
5648 rewrite_use_outer (struct ivopts_data *data,
5649                    struct iv_use *use, struct iv_cand *cand)
5650 {
5651   edge exit;
5652   tree value, op, stmts, tgt;
5653   tree phi;
5654
5655   switch (TREE_CODE (use->stmt))
5656     {
5657     case PHI_NODE:
5658       tgt = PHI_RESULT (use->stmt);
5659       break;
5660     case MODIFY_EXPR:
5661       tgt = TREE_OPERAND (use->stmt, 0);
5662       break;
5663     default:
5664       gcc_unreachable ();
5665     }
5666
5667   exit = single_dom_exit (data->current_loop);
5668
5669   if (exit)
5670     {
5671       if (!cand->iv)
5672         {
5673           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5674           value = unshare_expr (cp->value);
5675         }
5676       else
5677         value = get_computation_at (data->current_loop,
5678                                     use, cand, last_stmt (exit->src));
5679
5680       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5681
5682       /* If we will preserve the iv anyway and we would need to perform
5683          some computation to replace the final value, do nothing.  */
5684       if (stmts && name_info (data, tgt)->preserve_biv)
5685         return;
5686
5687       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5688         {
5689           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5690
5691           if (USE_FROM_PTR (use_p) == tgt)
5692             SET_USE (use_p, op);
5693         }
5694
5695       if (stmts)
5696         compute_phi_arg_on_exit (exit, stmts, op);
5697
5698       /* Enable removal of the statement.  We cannot remove it directly,
5699          since we may still need the aliasing information attached to the
5700          ssa name defined by it.  */
5701       name_info (data, tgt)->iv->have_use_for = false;
5702       return;
5703     }
5704
5705   /* If the variable is going to be preserved anyway, there is nothing to
5706      do.  */
5707   if (name_info (data, tgt)->preserve_biv)
5708     return;
5709
5710   /* Otherwise we just need to compute the iv.  */
5711   rewrite_use_nonlinear_expr (data, use, cand);
5712 }
5713
5714 /* Rewrites USE using candidate CAND.  */
5715
5716 static void
5717 rewrite_use (struct ivopts_data *data,
5718              struct iv_use *use, struct iv_cand *cand)
5719 {
5720   switch (use->type)
5721     {
5722       case USE_NONLINEAR_EXPR:
5723         rewrite_use_nonlinear_expr (data, use, cand);
5724         break;
5725
5726       case USE_OUTER:
5727         rewrite_use_outer (data, use, cand);
5728         break;
5729
5730       case USE_ADDRESS:
5731         rewrite_use_address (data, use, cand);
5732         break;
5733
5734       case USE_COMPARE:
5735         rewrite_use_compare (data, use, cand);
5736         break;
5737
5738       default:
5739         gcc_unreachable ();
5740     }
5741   update_stmt (use->stmt);
5742 }
5743
5744 /* Rewrite the uses using the selected induction variables.  */
5745
5746 static void
5747 rewrite_uses (struct ivopts_data *data)
5748 {
5749   unsigned i;
5750   struct iv_cand *cand;
5751   struct iv_use *use;
5752
5753   for (i = 0; i < n_iv_uses (data); i++)
5754     {
5755       use = iv_use (data, i);
5756       cand = use->selected;
5757       gcc_assert (cand);
5758
5759       rewrite_use (data, use, cand);
5760     }
5761 }
5762
5763 /* Removes the ivs that are not used after rewriting.  */
5764
5765 static void
5766 remove_unused_ivs (struct ivopts_data *data)
5767 {
5768   unsigned j;
5769   bitmap_iterator bi;
5770
5771   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5772     {
5773       struct version_info *info;
5774
5775       info = ver_info (data, j);
5776       if (info->iv
5777           && !zero_p (info->iv->step)
5778           && !info->inv_id
5779           && !info->iv->have_use_for
5780           && !info->preserve_biv)
5781         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5782     }
5783 }
5784
5785 /* Frees data allocated by the optimization of a single loop.  */
5786
5787 static void
5788 free_loop_data (struct ivopts_data *data)
5789 {
5790   unsigned i, j;
5791   bitmap_iterator bi;
5792   tree obj;
5793
5794   htab_empty (data->niters);
5795
5796   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5797     {
5798       struct version_info *info;
5799
5800       info = ver_info (data, i);
5801       if (info->iv)
5802         free (info->iv);
5803       info->iv = NULL;
5804       info->has_nonlin_use = false;
5805       info->preserve_biv = false;
5806       info->inv_id = 0;
5807     }
5808   bitmap_clear (data->relevant);
5809   bitmap_clear (data->important_candidates);
5810
5811   for (i = 0; i < n_iv_uses (data); i++)
5812     {
5813       struct iv_use *use = iv_use (data, i);
5814
5815       free (use->iv);
5816       BITMAP_FREE (use->related_cands);
5817       for (j = 0; j < use->n_map_members; j++)
5818         if (use->cost_map[j].depends_on)
5819           BITMAP_FREE (use->cost_map[j].depends_on);
5820       free (use->cost_map);
5821       free (use);
5822     }
5823   VEC_truncate (iv_use_p, data->iv_uses, 0);
5824
5825   for (i = 0; i < n_iv_cands (data); i++)
5826     {
5827       struct iv_cand *cand = iv_cand (data, i);
5828
5829       if (cand->iv)
5830         free (cand->iv);
5831       if (cand->depends_on)
5832         BITMAP_FREE (cand->depends_on);
5833       free (cand);
5834     }
5835   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5836
5837   if (data->version_info_size < num_ssa_names)
5838     {
5839       data->version_info_size = 2 * num_ssa_names;
5840       free (data->version_info);
5841       data->version_info = xcalloc (data->version_info_size,
5842                                     sizeof (struct version_info));
5843     }
5844
5845   data->max_inv_id = 0;
5846
5847   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5848     SET_DECL_RTL (obj, NULL_RTX);
5849
5850   VEC_truncate (tree, decl_rtl_to_reset, 0);
5851 }
5852
5853 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5854    loop tree.  */
5855
5856 static void
5857 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5858 {
5859   unsigned i;
5860
5861   for (i = 1; i < loops->num; i++)
5862     if (loops->parray[i])
5863       {
5864         free (loops->parray[i]->aux);
5865         loops->parray[i]->aux = NULL;
5866       }
5867
5868   free_loop_data (data);
5869   free (data->version_info);
5870   BITMAP_FREE (data->relevant);
5871   BITMAP_FREE (data->important_candidates);
5872   htab_delete (data->niters);
5873
5874   VEC_free (tree, heap, decl_rtl_to_reset);
5875   VEC_free (iv_use_p, heap, data->iv_uses);
5876   VEC_free (iv_cand_p, heap, data->iv_candidates);
5877 }
5878
5879 /* Optimizes the LOOP.  Returns true if anything changed.  */
5880
5881 static bool
5882 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5883 {
5884   bool changed = false;
5885   struct iv_ca *iv_ca;
5886   edge exit;
5887
5888   data->current_loop = loop;
5889
5890   if (dump_file && (dump_flags & TDF_DETAILS))
5891     {
5892       fprintf (dump_file, "Processing loop %d\n", loop->num);
5893
5894       exit = single_dom_exit (loop);
5895       if (exit)
5896         {
5897           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5898                    exit->src->index, exit->dest->index);
5899           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5900           fprintf (dump_file, "\n");
5901         }
5902
5903       fprintf (dump_file, "\n");
5904     }
5905
5906   /* For each ssa name determines whether it behaves as an induction variable
5907      in some loop.  */
5908   if (!find_induction_variables (data))
5909     goto finish;
5910
5911   /* Finds interesting uses (item 1).  */
5912   find_interesting_uses (data);
5913   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5914     goto finish;
5915
5916   /* Finds candidates for the induction variables (item 2).  */
5917   find_iv_candidates (data);
5918
5919   /* Calculates the costs (item 3, part 1).  */
5920   determine_use_iv_costs (data);
5921   determine_iv_costs (data);
5922   determine_set_costs (data);
5923
5924   /* Find the optimal set of induction variables (item 3, part 2).  */
5925   iv_ca = find_optimal_iv_set (data);
5926   if (!iv_ca)
5927     goto finish;
5928   changed = true;
5929
5930   /* Create the new induction variables (item 4, part 1).  */
5931   create_new_ivs (data, iv_ca);
5932   iv_ca_free (&iv_ca);
5933
5934   /* Rewrite the uses (item 4, part 2).  */
5935   rewrite_uses (data);
5936
5937   /* Remove the ivs that are unused after rewriting.  */
5938   remove_unused_ivs (data);
5939
5940   /* We have changed the structure of induction variables; it might happen
5941      that definitions in the scev database refer to some of them that were
5942      eliminated.  */
5943   scev_reset ();
5944
5945 finish:
5946   free_loop_data (data);
5947
5948   return changed;
5949 }
5950
5951 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5952
5953 void
5954 tree_ssa_iv_optimize (struct loops *loops)
5955 {
5956   struct loop *loop;
5957   struct ivopts_data data;
5958
5959   tree_ssa_iv_optimize_init (loops, &data);
5960
5961   /* Optimize the loops starting with the innermost ones.  */
5962   loop = loops->tree_root;
5963   while (loop->inner)
5964     loop = loop->inner;
5965
5966   /* Scan the loops, inner ones first.  */
5967   while (loop != loops->tree_root)
5968     {
5969       if (dump_file && (dump_flags & TDF_DETAILS))
5970         flow_loop_dump (loop, dump_file, NULL, 1);
5971
5972       tree_ssa_iv_optimize_loop (&data, loop);
5973
5974       if (loop->next)
5975         {
5976           loop = loop->next;
5977           while (loop->inner)
5978             loop = loop->inner;
5979         }
5980       else
5981         loop = loop->outer;
5982     }
5983
5984   tree_ssa_iv_optimize_finalize (loops, &data);
5985 }