gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 static edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter);
 716       *slot = nfe_desc;
 717     }
 718   else
 719     nfe_desc = *slot;
 720
 721   if (!nfe_desc->valid_p)
 722     return NULL;
 723
 724   return &nfe_desc->niter;
 725 }
 726
 727 /* Returns structure describing number of iterations determined from
 728    single dominating exit of DATA->current_loop, or NULL if something
 729    goes wrong.  */
 730
 731 static struct tree_niter_desc *
 732 niter_for_single_dom_exit (struct ivopts_data *data)
 733 {
 734   edge exit = single_dom_exit (data->current_loop);
 735
 736   if (!exit)
 737     return NULL;
 738
 739   return niter_for_exit (data, exit);
 740 }
 741
 742 /* Initializes data structures used by the iv optimization pass, stored
 743    in DATA.  LOOPS is the loop tree.  */
 744
 745 static void
 746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 747 {
 748   unsigned i;
 749
 750   data->version_info_size = 2 * num_ssa_names;
 751   data->version_info = xcalloc (data->version_info_size,
 752                                 sizeof (struct version_info));
 753   data->relevant = BITMAP_ALLOC (NULL);
 754   data->important_candidates = BITMAP_ALLOC (NULL);
 755   data->max_inv_id = 0;
 756   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 757
 758   for (i = 1; i < loops->num; i++)
 759     if (loops->parray[i])
 760       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 761
 762   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 763   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 764   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 765 }
 766
 767 /* Returns a memory object to that EXPR points.  In case we are able to
 768    determine that it does not point to any such object, NULL is returned.  */
 769
 770 static tree
 771 determine_base_object (tree expr)
 772 {
 773   enum tree_code code = TREE_CODE (expr);
 774   tree base, obj, op0, op1;
 775
 776   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 777     return NULL_TREE;
 778
 779   switch (code)
 780     {
 781     case INTEGER_CST:
 782       return NULL_TREE;
 783
 784     case ADDR_EXPR:
 785       obj = TREE_OPERAND (expr, 0);
 786       base = get_base_address (obj);
 787
 788       if (!base)
 789         return expr;
 790
 791       if (TREE_CODE (base) == INDIRECT_REF)
 792         return determine_base_object (TREE_OPERAND (base, 0));
 793
 794       return fold_convert (ptr_type_node,
 795                            build_fold_addr_expr (base));
 796
 797     case PLUS_EXPR:
 798     case MINUS_EXPR:
 799       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 800       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 801
 802       if (!op1)
 803         return op0;
 804
 805       if (!op0)
 806         return (code == PLUS_EXPR
 807                 ? op1
 808                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 809
 810       return fold_build2 (code, ptr_type_node, op0, op1);
 811
 812     case NOP_EXPR:
 813     case CONVERT_EXPR:
 814       return determine_base_object (TREE_OPERAND (expr, 0));
 815
 816     default:
 817       return fold_convert (ptr_type_node, expr);
 818     }
 819 }
 820
 821 /* Allocates an induction variable with given initial value BASE and step STEP
 822    for loop LOOP.  */
 823
 824 static struct iv *
 825 alloc_iv (tree base, tree step)
 826 {
 827   struct iv *iv = xcalloc (1, sizeof (struct iv));
 828
 829   if (step && integer_zerop (step))
 830     step = NULL_TREE;
 831
 832   iv->base = base;
 833   iv->base_object = determine_base_object (base);
 834   iv->step = step;
 835   iv->biv_p = false;
 836   iv->have_use_for = false;
 837   iv->use_id = 0;
 838   iv->ssa_name = NULL_TREE;
 839
 840   return iv;
 841 }
 842
 843 /* Sets STEP and BASE for induction variable IV.  */
 844
 845 static void
 846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 847 {
 848   struct version_info *info = name_info (data, iv);
 849
 850   gcc_assert (!info->iv);
 851
 852   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 853   info->iv = alloc_iv (base, step);
 854   info->iv->ssa_name = iv;
 855 }
 856
 857 /* Finds induction variable declaration for VAR.  */
 858
 859 static struct iv *
 860 get_iv (struct ivopts_data *data, tree var)
 861 {
 862   basic_block bb;
 863
 864   if (!name_info (data, var)->iv)
 865     {
 866       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 867
 868       if (!bb
 869           || !flow_bb_inside_loop_p (data->current_loop, bb))
 870         set_iv (data, var, var, NULL_TREE);
 871     }
 872
 873   return name_info (data, var)->iv;
 874 }
 875
 876 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 877    not define a simple affine biv with nonzero step.  */
 878
 879 static tree
 880 determine_biv_step (tree phi)
 881 {
 882   struct loop *loop = bb_for_stmt (phi)->loop_father;
 883   tree name = PHI_RESULT (phi), base, step;
 884
 885   if (!is_gimple_reg (name))
 886     return NULL_TREE;
 887
 888   if (!simple_iv (loop, phi, name, &base, &step, true))
 889     return NULL_TREE;
 890
 891   if (zero_p (step))
 892     return NULL_TREE;
 893
 894   return step;
 895 }
 896
 897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 898
 899 static bool
 900 abnormal_ssa_name_p (tree exp)
 901 {
 902   if (!exp)
 903     return false;
 904
 905   if (TREE_CODE (exp) != SSA_NAME)
 906     return false;
 907
 908   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 909 }
 910
 911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 912    abnormal phi node.  Callback for for_each_index.  */
 913
 914 static bool
 915 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 916                                   void *data ATTRIBUTE_UNUSED)
 917 {
 918   if (TREE_CODE (base) == ARRAY_REF)
 919     {
 920       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 921         return false;
 922       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 923         return false;
 924     }
 925
 926   return !abnormal_ssa_name_p (*index);
 927 }
 928
 929 /* Returns true if EXPR contains a ssa name that occurs in an
 930    abnormal phi node.  */
 931
 932 static bool
 933 contains_abnormal_ssa_name_p (tree expr)
 934 {
 935   enum tree_code code;
 936   enum tree_code_class class;
 937
 938   if (!expr)
 939     return false;
 940
 941   code = TREE_CODE (expr);
 942   class = TREE_CODE_CLASS (code);
 943
 944   if (code == SSA_NAME)
 945     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 946
 947   if (code == INTEGER_CST
 948       || is_gimple_min_invariant (expr))
 949     return false;
 950
 951   if (code == ADDR_EXPR)
 952     return !for_each_index (&TREE_OPERAND (expr, 0),
 953                             idx_contains_abnormal_ssa_name_p,
 954                             NULL);
 955
 956   switch (class)
 957     {
 958     case tcc_binary:
 959     case tcc_comparison:
 960       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 961         return true;
 962
 963       /* Fallthru.  */
 964     case tcc_unary:
 965       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 966         return true;
 967
 968       break;
 969
 970     default:
 971       gcc_unreachable ();
 972     }
 973
 974   return false;
 975 }
 976
 977 /* Finds basic ivs.  */
 978
 979 static bool
 980 find_bivs (struct ivopts_data *data)
 981 {
 982   tree phi, step, type, base;
 983   bool found = false;
 984   struct loop *loop = data->current_loop;
 985
 986   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 987     {
 988       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 989         continue;
 990
 991       step = determine_biv_step (phi);
 992       if (!step)
 993         continue;
 994
 995       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 996       base = expand_simple_operations (base);
 997       if (contains_abnormal_ssa_name_p (base)
 998           || contains_abnormal_ssa_name_p (step))
 999         continue;
1000
1001       type = TREE_TYPE (PHI_RESULT (phi));
1002       base = fold_convert (type, base);
1003       if (step)
1004         step = fold_convert (type, step);
1005
1006       set_iv (data, PHI_RESULT (phi), base, step);
1007       found = true;
1008     }
1009
1010   return found;
1011 }
1012
1013 /* Marks basic ivs.  */
1014
1015 static void
1016 mark_bivs (struct ivopts_data *data)
1017 {
1018   tree phi, var;
1019   struct iv *iv, *incr_iv;
1020   struct loop *loop = data->current_loop;
1021   basic_block incr_bb;
1022
1023   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1024     {
1025       iv = get_iv (data, PHI_RESULT (phi));
1026       if (!iv)
1027         continue;
1028
1029       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030       incr_iv = get_iv (data, var);
1031       if (!incr_iv)
1032         continue;
1033
1034       /* If the increment is in the subloop, ignore it.  */
1035       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036       if (incr_bb->loop_father != data->current_loop
1037           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1038         continue;
1039
1040       iv->biv_p = true;
1041       incr_iv->biv_p = true;
1042     }
1043 }
1044
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046    parameters to BASE and STEP.  */
1047
1048 static bool
1049 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050                         tree *base, tree *step)
1051 {
1052   tree lhs;
1053   struct loop *loop = data->current_loop;
1054
1055   *base = NULL_TREE;
1056   *step = NULL_TREE;
1057
1058   if (TREE_CODE (stmt) != MODIFY_EXPR)
1059     return false;
1060
1061   lhs = TREE_OPERAND (stmt, 0);
1062   if (TREE_CODE (lhs) != SSA_NAME)
1063     return false;
1064
1065   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066     return false;
1067   *base = expand_simple_operations (*base);
1068
1069   if (contains_abnormal_ssa_name_p (*base)
1070       || contains_abnormal_ssa_name_p (*step))
1071     return false;
1072
1073   return true;
1074 }
1075
1076 /* Finds general ivs in statement STMT.  */
1077
1078 static void
1079 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1080 {
1081   tree base, step;
1082
1083   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1084     return;
1085
1086   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1087 }
1088
1089 /* Finds general ivs in basic block BB.  */
1090
1091 static void
1092 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1093 {
1094   block_stmt_iterator bsi;
1095
1096   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097     find_givs_in_stmt (data, bsi_stmt (bsi));
1098 }
1099
1100 /* Finds general ivs.  */
1101
1102 static void
1103 find_givs (struct ivopts_data *data)
1104 {
1105   struct loop *loop = data->current_loop;
1106   basic_block *body = get_loop_body_in_dom_order (loop);
1107   unsigned i;
1108
1109   for (i = 0; i < loop->num_nodes; i++)
1110     find_givs_in_bb (data, body[i]);
1111   free (body);
1112 }
1113
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115    variable and if so, its initial value and step.  */
1116
1117 static bool
1118 find_induction_variables (struct ivopts_data *data)
1119 {
1120   unsigned i;
1121   bitmap_iterator bi;
1122
1123   if (!find_bivs (data))
1124     return false;
1125
1126   find_givs (data);
1127   mark_bivs (data);
1128
1129   if (dump_file && (dump_flags & TDF_DETAILS))
1130     {
1131       struct tree_niter_desc *niter;
1132
1133       niter = niter_for_single_dom_exit (data);
1134
1135       if (niter)
1136         {
1137           fprintf (dump_file, "  number of iterations ");
1138           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1139           fprintf (dump_file, "\n");
1140
1141           fprintf (dump_file, "  may be zero if ");
1142           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1143           fprintf (dump_file, "\n");
1144           fprintf (dump_file, "\n");
1145         };
1146
1147       fprintf (dump_file, "Induction variables:\n\n");
1148
1149       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1150         {
1151           if (ver_info (data, i)->iv)
1152             dump_iv (dump_file, ver_info (data, i)->iv);
1153         }
1154     }
1155
1156   return true;
1157 }
1158
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1160
1161 static struct iv_use *
1162 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163             tree stmt, enum use_type use_type)
1164 {
1165   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1166
1167   use->id = n_iv_uses (data);
1168   use->type = use_type;
1169   use->iv = iv;
1170   use->stmt = stmt;
1171   use->op_p = use_p;
1172   use->related_cands = BITMAP_ALLOC (NULL);
1173
1174   /* To avoid showing ssa name in the dumps, if it was not reset by the
1175      caller.  */
1176   iv->ssa_name = NULL_TREE;
1177
1178   if (dump_file && (dump_flags & TDF_DETAILS))
1179     dump_use (dump_file, use);
1180
1181   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1182
1183   return use;
1184 }
1185
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187    NONLINEAR_USE is true if the invariant is used in a way we do not
1188    handle specially.  */
1189
1190 static void
1191 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1192 {
1193   basic_block bb;
1194   struct version_info *info;
1195
1196   if (TREE_CODE (op) != SSA_NAME
1197       || !is_gimple_reg (op))
1198     return;
1199
1200   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201   if (bb
1202       && flow_bb_inside_loop_p (data->current_loop, bb))
1203     return;
1204
1205   info = name_info (data, op);
1206   info->name = op;
1207   info->has_nonlin_use |= nonlinear_use;
1208   if (!info->inv_id)
1209     info->inv_id = ++data->max_inv_id;
1210   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1211 }
1212
1213 /* Checks whether the use OP is interesting and if so, records it
1214    as TYPE.  */
1215
1216 static struct iv_use *
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1218                                        enum use_type type)
1219 {
1220   struct iv *iv;
1221   struct iv *civ;
1222   tree stmt;
1223   struct iv_use *use;
1224
1225   if (TREE_CODE (op) != SSA_NAME)
1226     return NULL;
1227
1228   iv = get_iv (data, op);
1229   if (!iv)
1230     return NULL;
1231
1232   if (iv->have_use_for)
1233     {
1234       use = iv_use (data, iv->use_id);
1235
1236       gcc_assert (use->type == USE_NONLINEAR_EXPR
1237                   || use->type == USE_OUTER);
1238
1239       if (type == USE_NONLINEAR_EXPR)
1240         use->type = USE_NONLINEAR_EXPR;
1241       return use;
1242     }
1243
1244   if (zero_p (iv->step))
1245     {
1246       record_invariant (data, op, true);
1247       return NULL;
1248     }
1249   iv->have_use_for = true;
1250
1251   civ = xmalloc (sizeof (struct iv));
1252   *civ = *iv;
1253
1254   stmt = SSA_NAME_DEF_STMT (op);
1255   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256               || TREE_CODE (stmt) == MODIFY_EXPR);
1257
1258   use = record_use (data, NULL, civ, stmt, type);
1259   iv->use_id = use->id;
1260
1261   return use;
1262 }
1263
1264 /* Checks whether the use OP is interesting and if so, records it.  */
1265
1266 static struct iv_use *
1267 find_interesting_uses_op (struct ivopts_data *data, tree op)
1268 {
1269   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1270 }
1271
1272 /* Records a definition of induction variable OP that is used outside of the
1273    loop.  */
1274
1275 static struct iv_use *
1276 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1277 {
1278   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1279 }
1280
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282    and if so, records it.  */
1283
1284 static void
1285 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1286 {
1287   tree *op0_p;
1288   tree *op1_p;
1289   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290   struct iv const_iv;
1291   tree zero = integer_zero_node;
1292
1293   const_iv.step = NULL_TREE;
1294
1295   if (TREE_CODE (*cond_p) != SSA_NAME
1296       && !COMPARISON_CLASS_P (*cond_p))
1297     return;
1298
1299   if (TREE_CODE (*cond_p) == SSA_NAME)
1300     {
1301       op0_p = cond_p;
1302       op1_p = &zero;
1303     }
1304   else
1305     {
1306       op0_p = &TREE_OPERAND (*cond_p, 0);
1307       op1_p = &TREE_OPERAND (*cond_p, 1);
1308     }
1309
1310   if (TREE_CODE (*op0_p) == SSA_NAME)
1311     iv0 = get_iv (data, *op0_p);
1312   else
1313     iv0 = &const_iv;
1314
1315   if (TREE_CODE (*op1_p) == SSA_NAME)
1316     iv1 = get_iv (data, *op1_p);
1317   else
1318     iv1 = &const_iv;
1319
1320   if (/* When comparing with non-invariant value, we may not do any senseful
1321          induction variable elimination.  */
1322       (!iv0 || !iv1)
1323       /* Eliminating condition based on two ivs would be nontrivial.
1324          ??? TODO -- it is not really important to handle this case.  */
1325       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1326     {
1327       find_interesting_uses_op (data, *op0_p);
1328       find_interesting_uses_op (data, *op1_p);
1329       return;
1330     }
1331
1332   if (zero_p (iv0->step) && zero_p (iv1->step))
1333     {
1334       /* If both are invariants, this is a work for unswitching.  */
1335       return;
1336     }
1337
1338   civ = xmalloc (sizeof (struct iv));
1339   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1341 }
1342
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344    i.e. if all its operands are defined outside of the LOOP.  */
1345
1346 bool
1347 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1348 {
1349   basic_block def_bb;
1350   unsigned i, len;
1351
1352   if (is_gimple_min_invariant (expr))
1353     return true;
1354
1355   if (TREE_CODE (expr) == SSA_NAME)
1356     {
1357       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358       if (def_bb
1359           && flow_bb_inside_loop_p (loop, def_bb))
1360         return false;
1361
1362       return true;
1363     }
1364
1365   if (!EXPR_P (expr))
1366     return false;
1367
1368   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1369   for (i = 0; i < len; i++)
1370     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1371       return false;
1372
1373   return true;
1374 }
1375
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377    initial ones.  Returns false when the value of the index cannot be determined.
1378    Callback for for_each_index.  */
1379
1380 struct ifs_ivopts_data
1381 {
1382   struct ivopts_data *ivopts_data;
1383   tree stmt;
1384   tree *step_p;
1385 };
1386
1387 static bool
1388 idx_find_step (tree base, tree *idx, void *data)
1389 {
1390   struct ifs_ivopts_data *dta = data;
1391   struct iv *iv;
1392   tree step, iv_step, lbound, off;
1393   struct loop *loop = dta->ivopts_data->current_loop;
1394
1395   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1397     return false;
1398
1399   /* If base is a component ref, require that the offset of the reference
1400      be invariant.  */
1401   if (TREE_CODE (base) == COMPONENT_REF)
1402     {
1403       off = component_ref_field_offset (base);
1404       return expr_invariant_in_loop_p (loop, off);
1405     }
1406
1407   /* If base is array, first check whether we will be able to move the
1408      reference out of the loop (in order to take its address in strength
1409      reduction).  In order for this to work we need both lower bound
1410      and step to be loop invariants.  */
1411   if (TREE_CODE (base) == ARRAY_REF)
1412     {
1413       step = array_ref_element_size (base);
1414       lbound = array_ref_low_bound (base);
1415
1416       if (!expr_invariant_in_loop_p (loop, step)
1417           || !expr_invariant_in_loop_p (loop, lbound))
1418         return false;
1419     }
1420
1421   if (TREE_CODE (*idx) != SSA_NAME)
1422     return true;
1423
1424   iv = get_iv (dta->ivopts_data, *idx);
1425   if (!iv)
1426     return false;
1427
1428   *idx = iv->base;
1429
1430   if (!iv->step)
1431     return true;
1432
1433   if (TREE_CODE (base) == ARRAY_REF)
1434     {
1435       step = array_ref_element_size (base);
1436
1437       /* We only handle addresses whose step is an integer constant.  */
1438       if (TREE_CODE (step) != INTEGER_CST)
1439         return false;
1440     }
1441   else
1442     /* The step for pointer arithmetics already is 1 byte.  */
1443     step = build_int_cst (sizetype, 1);
1444
1445   if (TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
1446     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1447                                           sizetype, iv->base, iv->step, dta->stmt);
1448   else
1449     iv_step = fold_convert (sizetype, iv->step);
1450
1451   if (!iv_step)
1452     {
1453       /* The index might wrap.  */
1454       return false;
1455     }
1456
1457   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1458
1459   if (!*dta->step_p)
1460     *dta->step_p = step;
1461   else
1462     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1463
1464   return true;
1465 }
1466
1467 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1468    object is passed to it in DATA.  */
1469
1470 static bool
1471 idx_record_use (tree base, tree *idx,
1472                 void *data)
1473 {
1474   find_interesting_uses_op (data, *idx);
1475   if (TREE_CODE (base) == ARRAY_REF)
1476     {
1477       find_interesting_uses_op (data, array_ref_element_size (base));
1478       find_interesting_uses_op (data, array_ref_low_bound (base));
1479     }
1480   return true;
1481 }
1482
1483 /* Returns true if memory reference REF may be unaligned.  */
1484
1485 static bool
1486 may_be_unaligned_p (tree ref)
1487 {
1488   tree base;
1489   tree base_type;
1490   HOST_WIDE_INT bitsize;
1491   HOST_WIDE_INT bitpos;
1492   tree toffset;
1493   enum machine_mode mode;
1494   int unsignedp, volatilep;
1495   unsigned base_align;
1496
1497   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1498      thus they are not missaligned.  */
1499   if (TREE_CODE (ref) == TARGET_MEM_REF)
1500     return false;
1501
1502   /* The test below is basically copy of what expr.c:normal_inner_ref
1503      does to check whether the object must be loaded by parts when
1504      STRICT_ALIGNMENT is true.  */
1505   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1506                               &unsignedp, &volatilep, true);
1507   base_type = TREE_TYPE (base);
1508   base_align = TYPE_ALIGN (base_type);
1509
1510   if (mode != BLKmode
1511       && (base_align < GET_MODE_ALIGNMENT (mode)
1512           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1513           || bitpos % BITS_PER_UNIT != 0))
1514     return true;
1515
1516   return false;
1517 }
1518
1519 /* Finds addresses in *OP_P inside STMT.  */
1520
1521 static void
1522 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1523 {
1524   tree base = *op_p, step = NULL;
1525   struct iv *civ;
1526   struct ifs_ivopts_data ifs_ivopts_data;
1527
1528   /* Do not play with volatile memory references.  A bit too conservative,
1529      perhaps, but safe.  */
1530   if (stmt_ann (stmt)->has_volatile_ops)
1531     goto fail;
1532
1533   /* Ignore bitfields for now.  Not really something terribly complicated
1534      to handle.  TODO.  */
1535   if (TREE_CODE (base) == COMPONENT_REF
1536       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1537     goto fail;
1538
1539   if (STRICT_ALIGNMENT
1540       && may_be_unaligned_p (base))
1541     goto fail;
1542
1543   base = unshare_expr (base);
1544
1545   if (TREE_CODE (base) == TARGET_MEM_REF)
1546     {
1547       tree type = build_pointer_type (TREE_TYPE (base));
1548       tree astep;
1549
1550       if (TMR_BASE (base)
1551           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1552         {
1553           civ = get_iv (data, TMR_BASE (base));
1554           if (!civ)
1555             goto fail;
1556
1557           TMR_BASE (base) = civ->base;
1558           step = civ->step;
1559         }
1560       if (TMR_INDEX (base)
1561           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1562         {
1563           civ = get_iv (data, TMR_INDEX (base));
1564           if (!civ)
1565             goto fail;
1566
1567           TMR_INDEX (base) = civ->base;
1568           astep = civ->step;
1569
1570           if (astep)
1571             {
1572               if (TMR_STEP (base))
1573                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1574
1575               if (step)
1576                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1577               else
1578                 step = astep;
1579             }
1580         }
1581
1582       if (zero_p (step))
1583         goto fail;
1584       base = tree_mem_ref_addr (type, base);
1585     }
1586   else
1587     {
1588       ifs_ivopts_data.ivopts_data = data;
1589       ifs_ivopts_data.stmt = stmt;
1590       ifs_ivopts_data.step_p = &step;
1591       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1592           || zero_p (step))
1593         goto fail;
1594
1595       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1596       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1597
1598       base = build_fold_addr_expr (base);
1599     }
1600
1601   civ = alloc_iv (base, step);
1602   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1603   return;
1604
1605 fail:
1606   for_each_index (op_p, idx_record_use, data);
1607 }
1608
1609 /* Finds and records invariants used in STMT.  */
1610
1611 static void
1612 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1613 {
1614   ssa_op_iter iter;
1615   use_operand_p use_p;
1616   tree op;
1617
1618   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1619     {
1620       op = USE_FROM_PTR (use_p);
1621       record_invariant (data, op, false);
1622     }
1623 }
1624
1625 /* Finds interesting uses of induction variables in the statement STMT.  */
1626
1627 static void
1628 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1629 {
1630   struct iv *iv;
1631   tree op, lhs, rhs;
1632   ssa_op_iter iter;
1633   use_operand_p use_p;
1634
1635   find_invariants_stmt (data, stmt);
1636
1637   if (TREE_CODE (stmt) == COND_EXPR)
1638     {
1639       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1640       return;
1641     }
1642
1643   if (TREE_CODE (stmt) == MODIFY_EXPR)
1644     {
1645       lhs = TREE_OPERAND (stmt, 0);
1646       rhs = TREE_OPERAND (stmt, 1);
1647
1648       if (TREE_CODE (lhs) == SSA_NAME)
1649         {
1650           /* If the statement defines an induction variable, the uses are not
1651              interesting by themselves.  */
1652
1653           iv = get_iv (data, lhs);
1654
1655           if (iv && !zero_p (iv->step))
1656             return;
1657         }
1658
1659       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1660         {
1661         case tcc_comparison:
1662           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1663           return;
1664
1665         case tcc_reference:
1666           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1667           if (REFERENCE_CLASS_P (lhs))
1668             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1669           return;
1670
1671         default: ;
1672         }
1673
1674       if (REFERENCE_CLASS_P (lhs)
1675           && is_gimple_val (rhs))
1676         {
1677           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1678           find_interesting_uses_op (data, rhs);
1679           return;
1680         }
1681
1682       /* TODO -- we should also handle address uses of type
1683
1684          memory = call (whatever);
1685
1686          and
1687
1688          call (memory).  */
1689     }
1690
1691   if (TREE_CODE (stmt) == PHI_NODE
1692       && bb_for_stmt (stmt) == data->current_loop->header)
1693     {
1694       lhs = PHI_RESULT (stmt);
1695       iv = get_iv (data, lhs);
1696
1697       if (iv && !zero_p (iv->step))
1698         return;
1699     }
1700
1701   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1702     {
1703       op = USE_FROM_PTR (use_p);
1704
1705       if (TREE_CODE (op) != SSA_NAME)
1706         continue;
1707
1708       iv = get_iv (data, op);
1709       if (!iv)
1710         continue;
1711
1712       find_interesting_uses_op (data, op);
1713     }
1714 }
1715
1716 /* Finds interesting uses of induction variables outside of loops
1717    on loop exit edge EXIT.  */
1718
1719 static void
1720 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1721 {
1722   tree phi, def;
1723
1724   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1725     {
1726       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1727       find_interesting_uses_outer (data, def);
1728     }
1729 }
1730
1731 /* Finds uses of the induction variables that are interesting.  */
1732
1733 static void
1734 find_interesting_uses (struct ivopts_data *data)
1735 {
1736   basic_block bb;
1737   block_stmt_iterator bsi;
1738   tree phi;
1739   basic_block *body = get_loop_body (data->current_loop);
1740   unsigned i;
1741   struct version_info *info;
1742   edge e;
1743
1744   if (dump_file && (dump_flags & TDF_DETAILS))
1745     fprintf (dump_file, "Uses:\n\n");
1746
1747   for (i = 0; i < data->current_loop->num_nodes; i++)
1748     {
1749       edge_iterator ei;
1750       bb = body[i];
1751
1752       FOR_EACH_EDGE (e, ei, bb->succs)
1753         if (e->dest != EXIT_BLOCK_PTR
1754             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1755           find_interesting_uses_outside (data, e);
1756
1757       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1758         find_interesting_uses_stmt (data, phi);
1759       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1760         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1761     }
1762
1763   if (dump_file && (dump_flags & TDF_DETAILS))
1764     {
1765       bitmap_iterator bi;
1766
1767       fprintf (dump_file, "\n");
1768
1769       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1770         {
1771           info = ver_info (data, i);
1772           if (info->inv_id)
1773             {
1774               fprintf (dump_file, "  ");
1775               print_generic_expr (dump_file, info->name, TDF_SLIM);
1776               fprintf (dump_file, " is invariant (%d)%s\n",
1777                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1778             }
1779         }
1780
1781       fprintf (dump_file, "\n");
1782     }
1783
1784   free (body);
1785 }
1786
1787 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1788    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1789    we are at the top-level of the processed address.  */
1790
1791 static tree
1792 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1793                 unsigned HOST_WIDE_INT *offset)
1794 {
1795   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1796   enum tree_code code;
1797   tree type, orig_type = TREE_TYPE (expr);
1798   unsigned HOST_WIDE_INT off0, off1, st;
1799   tree orig_expr = expr;
1800
1801   STRIP_NOPS (expr);
1802
1803   type = TREE_TYPE (expr);
1804   code = TREE_CODE (expr);
1805   *offset = 0;
1806
1807   switch (code)
1808     {
1809     case INTEGER_CST:
1810       if (!cst_and_fits_in_hwi (expr)
1811           || zero_p (expr))
1812         return orig_expr;
1813
1814       *offset = int_cst_value (expr);
1815       return build_int_cst_type (orig_type, 0);
1816
1817     case PLUS_EXPR:
1818     case MINUS_EXPR:
1819       op0 = TREE_OPERAND (expr, 0);
1820       op1 = TREE_OPERAND (expr, 1);
1821
1822       op0 = strip_offset_1 (op0, false, false, &off0);
1823       op1 = strip_offset_1 (op1, false, false, &off1);
1824
1825       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1826       if (op0 == TREE_OPERAND (expr, 0)
1827           && op1 == TREE_OPERAND (expr, 1))
1828         return orig_expr;
1829
1830       if (zero_p (op1))
1831         expr = op0;
1832       else if (zero_p (op0))
1833         {
1834           if (code == PLUS_EXPR)
1835             expr = op1;
1836           else
1837             expr = fold_build1 (NEGATE_EXPR, type, op1);
1838         }
1839       else
1840         expr = fold_build2 (code, type, op0, op1);
1841
1842       return fold_convert (orig_type, expr);
1843
1844     case ARRAY_REF:
1845       if (!inside_addr)
1846         return orig_expr;
1847
1848       step = array_ref_element_size (expr);
1849       if (!cst_and_fits_in_hwi (step))
1850         break;
1851
1852       st = int_cst_value (step);
1853       op1 = TREE_OPERAND (expr, 1);
1854       op1 = strip_offset_1 (op1, false, false, &off1);
1855       *offset = off1 * st;
1856
1857       if (top_compref
1858           && zero_p (op1))
1859         {
1860           /* Strip the component reference completely.  */
1861           op0 = TREE_OPERAND (expr, 0);
1862           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1863           *offset += off0;
1864           return op0;
1865         }
1866       break;
1867
1868     case COMPONENT_REF:
1869       if (!inside_addr)
1870         return orig_expr;
1871
1872       tmp = component_ref_field_offset (expr);
1873       if (top_compref
1874           && cst_and_fits_in_hwi (tmp))
1875         {
1876           /* Strip the component reference completely.  */
1877           op0 = TREE_OPERAND (expr, 0);
1878           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1879           *offset = off0 + int_cst_value (tmp);
1880           return op0;
1881         }
1882       break;
1883
1884     case ADDR_EXPR:
1885       op0 = TREE_OPERAND (expr, 0);
1886       op0 = strip_offset_1 (op0, true, true, &off0);
1887       *offset += off0;
1888
1889       if (op0 == TREE_OPERAND (expr, 0))
1890         return orig_expr;
1891
1892       expr = build_fold_addr_expr (op0);
1893       return fold_convert (orig_type, expr);
1894
1895     case INDIRECT_REF:
1896       inside_addr = false;
1897       break;
1898
1899     default:
1900       return orig_expr;
1901     }
1902
1903   /* Default handling of expressions for that we want to recurse into
1904      the first operand.  */
1905   op0 = TREE_OPERAND (expr, 0);
1906   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1907   *offset += off0;
1908
1909   if (op0 == TREE_OPERAND (expr, 0)
1910       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1911     return orig_expr;
1912
1913   expr = copy_node (expr);
1914   TREE_OPERAND (expr, 0) = op0;
1915   if (op1)
1916     TREE_OPERAND (expr, 1) = op1;
1917
1918   /* Inside address, we might strip the top level component references,
1919      thus changing type of the expression.  Handling of ADDR_EXPR
1920      will fix that.  */
1921   expr = fold_convert (orig_type, expr);
1922
1923   return expr;
1924 }
1925
1926 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1927
1928 static tree
1929 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1930 {
1931   return strip_offset_1 (expr, false, false, offset);
1932 }
1933
1934 /* Returns variant of TYPE that can be used as base for different uses.
1935    For integer types, we return unsigned variant of the type, which
1936    avoids problems with overflows.  For pointer types, we return void *.  */
1937
1938 static tree
1939 generic_type_for (tree type)
1940 {
1941   if (POINTER_TYPE_P (type))
1942     return ptr_type_node;
1943
1944   if (TYPE_UNSIGNED (type))
1945     return type;
1946
1947   return unsigned_type_for (type);
1948 }
1949
1950 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1951    the bitmap to that we should store it.  */
1952
1953 static struct ivopts_data *fd_ivopts_data;
1954 static tree
1955 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1956 {
1957   bitmap *depends_on = data;
1958   struct version_info *info;
1959
1960   if (TREE_CODE (*expr_p) != SSA_NAME)
1961     return NULL_TREE;
1962   info = name_info (fd_ivopts_data, *expr_p);
1963
1964   if (!info->inv_id || info->has_nonlin_use)
1965     return NULL_TREE;
1966
1967   if (!*depends_on)
1968     *depends_on = BITMAP_ALLOC (NULL);
1969   bitmap_set_bit (*depends_on, info->inv_id);
1970
1971   return NULL_TREE;
1972 }
1973
1974 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1975    position to POS.  If USE is not NULL, the candidate is set as related to
1976    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1977    replacement of the final value of the iv by a direct computation.  */
1978
1979 static struct iv_cand *
1980 add_candidate_1 (struct ivopts_data *data,
1981                  tree base, tree step, bool important, enum iv_position pos,
1982                  struct iv_use *use, tree incremented_at)
1983 {
1984   unsigned i;
1985   struct iv_cand *cand = NULL;
1986   tree type, orig_type;
1987
1988   if (base)
1989     {
1990       orig_type = TREE_TYPE (base);
1991       type = generic_type_for (orig_type);
1992       if (type != orig_type)
1993         {
1994           base = fold_convert (type, base);
1995           if (step)
1996             step = fold_convert (type, step);
1997         }
1998     }
1999
2000   for (i = 0; i < n_iv_cands (data); i++)
2001     {
2002       cand = iv_cand (data, i);
2003
2004       if (cand->pos != pos)
2005         continue;
2006
2007       if (cand->incremented_at != incremented_at)
2008         continue;
2009
2010       if (!cand->iv)
2011         {
2012           if (!base && !step)
2013             break;
2014
2015           continue;
2016         }
2017
2018       if (!base && !step)
2019         continue;
2020
2021       if (!operand_equal_p (base, cand->iv->base, 0))
2022         continue;
2023
2024       if (zero_p (cand->iv->step))
2025         {
2026           if (zero_p (step))
2027             break;
2028         }
2029       else
2030         {
2031           if (step && operand_equal_p (step, cand->iv->step, 0))
2032             break;
2033         }
2034     }
2035
2036   if (i == n_iv_cands (data))
2037     {
2038       cand = xcalloc (1, sizeof (struct iv_cand));
2039       cand->id = i;
2040
2041       if (!base && !step)
2042         cand->iv = NULL;
2043       else
2044         cand->iv = alloc_iv (base, step);
2045
2046       cand->pos = pos;
2047       if (pos != IP_ORIGINAL && cand->iv)
2048         {
2049           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2050           cand->var_after = cand->var_before;
2051         }
2052       cand->important = important;
2053       cand->incremented_at = incremented_at;
2054       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2055
2056       if (step
2057           && TREE_CODE (step) != INTEGER_CST)
2058         {
2059           fd_ivopts_data = data;
2060           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2061         }
2062
2063       if (dump_file && (dump_flags & TDF_DETAILS))
2064         dump_cand (dump_file, cand);
2065     }
2066
2067   if (important && !cand->important)
2068     {
2069       cand->important = true;
2070       if (dump_file && (dump_flags & TDF_DETAILS))
2071         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2072     }
2073
2074   if (use)
2075     {
2076       bitmap_set_bit (use->related_cands, i);
2077       if (dump_file && (dump_flags & TDF_DETAILS))
2078         fprintf (dump_file, "Candidate %d is related to use %d\n",
2079                  cand->id, use->id);
2080     }
2081
2082   return cand;
2083 }
2084
2085 /* Returns true if incrementing the induction variable at the end of the LOOP
2086    is allowed.
2087
2088    The purpose is to avoid splitting latch edge with a biv increment, thus
2089    creating a jump, possibly confusing other optimization passes and leaving
2090    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2091    is not available (so we do not have a better alternative), or if the latch
2092    edge is already nonempty.  */
2093
2094 static bool
2095 allow_ip_end_pos_p (struct loop *loop)
2096 {
2097   if (!ip_normal_pos (loop))
2098     return true;
2099
2100   if (!empty_block_p (ip_end_pos (loop)))
2101     return true;
2102
2103   return false;
2104 }
2105
2106 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2107    position to POS.  If USE is not NULL, the candidate is set as related to
2108    it.  The candidate computation is scheduled on all available positions.  */
2109
2110 static void
2111 add_candidate (struct ivopts_data *data,
2112                tree base, tree step, bool important, struct iv_use *use)
2113 {
2114   if (ip_normal_pos (data->current_loop))
2115     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2116   if (ip_end_pos (data->current_loop)
2117       && allow_ip_end_pos_p (data->current_loop))
2118     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2119 }
2120
2121 /* Add a standard "0 + 1 * iteration" iv candidate for a
2122    type with SIZE bits.  */
2123
2124 static void
2125 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2126                                      unsigned int size)
2127 {
2128   tree type = lang_hooks.types.type_for_size (size, true);
2129   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2130                  true, NULL);
2131 }
2132
2133 /* Adds standard iv candidates.  */
2134
2135 static void
2136 add_standard_iv_candidates (struct ivopts_data *data)
2137 {
2138   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2139
2140   /* The same for a double-integer type if it is still fast enough.  */
2141   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2142     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2143 }
2144
2145
2146 /* Adds candidates bases on the old induction variable IV.  */
2147
2148 static void
2149 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2150 {
2151   tree phi, def;
2152   struct iv_cand *cand;
2153
2154   add_candidate (data, iv->base, iv->step, true, NULL);
2155
2156   /* The same, but with initial value zero.  */
2157   add_candidate (data,
2158                  build_int_cst (TREE_TYPE (iv->base), 0),
2159                  iv->step, true, NULL);
2160
2161   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2162   if (TREE_CODE (phi) == PHI_NODE)
2163     {
2164       /* Additionally record the possibility of leaving the original iv
2165          untouched.  */
2166       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2167       cand = add_candidate_1 (data,
2168                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2169                               SSA_NAME_DEF_STMT (def));
2170       cand->var_before = iv->ssa_name;
2171       cand->var_after = def;
2172     }
2173 }
2174
2175 /* Adds candidates based on the old induction variables.  */
2176
2177 static void
2178 add_old_ivs_candidates (struct ivopts_data *data)
2179 {
2180   unsigned i;
2181   struct iv *iv;
2182   bitmap_iterator bi;
2183
2184   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2185     {
2186       iv = ver_info (data, i)->iv;
2187       if (iv && iv->biv_p && !zero_p (iv->step))
2188         add_old_iv_candidates (data, iv);
2189     }
2190 }
2191
2192 /* Adds candidates based on the value of the induction variable IV and USE.  */
2193
2194 static void
2195 add_iv_value_candidates (struct ivopts_data *data,
2196                          struct iv *iv, struct iv_use *use)
2197 {
2198   unsigned HOST_WIDE_INT offset;
2199   tree base;
2200
2201   add_candidate (data, iv->base, iv->step, false, use);
2202
2203   /* The same, but with initial value zero.  Make such variable important,
2204      since it is generic enough so that possibly many uses may be based
2205      on it.  */
2206   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2207                  iv->step, true, use);
2208
2209   /* Third, try removing the constant offset.  */
2210   base = strip_offset (iv->base, &offset);
2211   if (offset)
2212     add_candidate (data, base, iv->step, false, use);
2213 }
2214
2215 /* Possibly adds pseudocandidate for replacing the final value of USE by
2216    a direct computation.  */
2217
2218 static void
2219 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2220 {
2221   struct tree_niter_desc *niter;
2222
2223   /* We must know where we exit the loop and how many times does it roll.  */
2224   niter = niter_for_single_dom_exit (data);
2225   if (!niter
2226       || !zero_p (niter->may_be_zero))
2227     return;
2228
2229   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2230 }
2231
2232 /* Adds candidates based on the uses.  */
2233
2234 static void
2235 add_derived_ivs_candidates (struct ivopts_data *data)
2236 {
2237   unsigned i;
2238
2239   for (i = 0; i < n_iv_uses (data); i++)
2240     {
2241       struct iv_use *use = iv_use (data, i);
2242
2243       if (!use)
2244         continue;
2245
2246       switch (use->type)
2247         {
2248         case USE_NONLINEAR_EXPR:
2249         case USE_COMPARE:
2250         case USE_ADDRESS:
2251           /* Just add the ivs based on the value of the iv used here.  */
2252           add_iv_value_candidates (data, use->iv, use);
2253           break;
2254
2255         case USE_OUTER:
2256           add_iv_value_candidates (data, use->iv, use);
2257
2258           /* Additionally, add the pseudocandidate for the possibility to
2259              replace the final value by a direct computation.  */
2260           add_iv_outer_candidates (data, use);
2261           break;
2262
2263         default:
2264           gcc_unreachable ();
2265         }
2266     }
2267 }
2268
2269 /* Record important candidates and add them to related_cands bitmaps
2270    if needed.  */
2271
2272 static void
2273 record_important_candidates (struct ivopts_data *data)
2274 {
2275   unsigned i;
2276   struct iv_use *use;
2277
2278   for (i = 0; i < n_iv_cands (data); i++)
2279     {
2280       struct iv_cand *cand = iv_cand (data, i);
2281
2282       if (cand->important)
2283         bitmap_set_bit (data->important_candidates, i);
2284     }
2285
2286   data->consider_all_candidates = (n_iv_cands (data)
2287                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2288
2289   if (data->consider_all_candidates)
2290     {
2291       /* We will not need "related_cands" bitmaps in this case,
2292          so release them to decrease peak memory consumption.  */
2293       for (i = 0; i < n_iv_uses (data); i++)
2294         {
2295           use = iv_use (data, i);
2296           BITMAP_FREE (use->related_cands);
2297         }
2298     }
2299   else
2300     {
2301       /* Add important candidates to the related_cands bitmaps.  */
2302       for (i = 0; i < n_iv_uses (data); i++)
2303         bitmap_ior_into (iv_use (data, i)->related_cands,
2304                          data->important_candidates);
2305     }
2306 }
2307
2308 /* Finds the candidates for the induction variables.  */
2309
2310 static void
2311 find_iv_candidates (struct ivopts_data *data)
2312 {
2313   /* Add commonly used ivs.  */
2314   add_standard_iv_candidates (data);
2315
2316   /* Add old induction variables.  */
2317   add_old_ivs_candidates (data);
2318
2319   /* Add induction variables derived from uses.  */
2320   add_derived_ivs_candidates (data);
2321
2322   /* Record the important candidates.  */
2323   record_important_candidates (data);
2324 }
2325
2326 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2327    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2328    we allocate a simple list to every use.  */
2329
2330 static void
2331 alloc_use_cost_map (struct ivopts_data *data)
2332 {
2333   unsigned i, size, s, j;
2334
2335   for (i = 0; i < n_iv_uses (data); i++)
2336     {
2337       struct iv_use *use = iv_use (data, i);
2338       bitmap_iterator bi;
2339
2340       if (data->consider_all_candidates)
2341         size = n_iv_cands (data);
2342       else
2343         {
2344           s = 0;
2345           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2346             {
2347               s++;
2348             }
2349
2350           /* Round up to the power of two, so that moduling by it is fast.  */
2351           for (size = 1; size < s; size <<= 1)
2352             continue;
2353         }
2354
2355       use->n_map_members = size;
2356       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2357     }
2358 }
2359
2360 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2361    on invariants DEPENDS_ON and that the value used in expressing it
2362    is VALUE.*/
2363
2364 static void
2365 set_use_iv_cost (struct ivopts_data *data,
2366                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2367                  bitmap depends_on, tree value)
2368 {
2369   unsigned i, s;
2370
2371   if (cost == INFTY)
2372     {
2373       BITMAP_FREE (depends_on);
2374       return;
2375     }
2376
2377   if (data->consider_all_candidates)
2378     {
2379       use->cost_map[cand->id].cand = cand;
2380       use->cost_map[cand->id].cost = cost;
2381       use->cost_map[cand->id].depends_on = depends_on;
2382       use->cost_map[cand->id].value = value;
2383       return;
2384     }
2385
2386   /* n_map_members is a power of two, so this computes modulo.  */
2387   s = cand->id & (use->n_map_members - 1);
2388   for (i = s; i < use->n_map_members; i++)
2389     if (!use->cost_map[i].cand)
2390       goto found;
2391   for (i = 0; i < s; i++)
2392     if (!use->cost_map[i].cand)
2393       goto found;
2394
2395   gcc_unreachable ();
2396
2397 found:
2398   use->cost_map[i].cand = cand;
2399   use->cost_map[i].cost = cost;
2400   use->cost_map[i].depends_on = depends_on;
2401   use->cost_map[i].value = value;
2402 }
2403
2404 /* Gets cost of (USE, CANDIDATE) pair.  */
2405
2406 static struct cost_pair *
2407 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2408                  struct iv_cand *cand)
2409 {
2410   unsigned i, s;
2411   struct cost_pair *ret;
2412
2413   if (!cand)
2414     return NULL;
2415
2416   if (data->consider_all_candidates)
2417     {
2418       ret = use->cost_map + cand->id;
2419       if (!ret->cand)
2420         return NULL;
2421
2422       return ret;
2423     }
2424
2425   /* n_map_members is a power of two, so this computes modulo.  */
2426   s = cand->id & (use->n_map_members - 1);
2427   for (i = s; i < use->n_map_members; i++)
2428     if (use->cost_map[i].cand == cand)
2429       return use->cost_map + i;
2430
2431   for (i = 0; i < s; i++)
2432     if (use->cost_map[i].cand == cand)
2433       return use->cost_map + i;
2434
2435   return NULL;
2436 }
2437
2438 /* Returns estimate on cost of computing SEQ.  */
2439
2440 static unsigned
2441 seq_cost (rtx seq)
2442 {
2443   unsigned cost = 0;
2444   rtx set;
2445
2446   for (; seq; seq = NEXT_INSN (seq))
2447     {
2448       set = single_set (seq);
2449       if (set)
2450         cost += rtx_cost (set, SET);
2451       else
2452         cost++;
2453     }
2454
2455   return cost;
2456 }
2457
2458 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2459 static rtx
2460 produce_memory_decl_rtl (tree obj, int *regno)
2461 {
2462   rtx x;
2463
2464   gcc_assert (obj);
2465   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2466     {
2467       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2468       x = gen_rtx_SYMBOL_REF (Pmode, name);
2469     }
2470   else
2471     x = gen_raw_REG (Pmode, (*regno)++);
2472
2473   return gen_rtx_MEM (DECL_MODE (obj), x);
2474 }
2475
2476 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2477    walk_tree.  DATA contains the actual fake register number.  */
2478
2479 static tree
2480 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2481 {
2482   tree obj = NULL_TREE;
2483   rtx x = NULL_RTX;
2484   int *regno = data;
2485
2486   switch (TREE_CODE (*expr_p))
2487     {
2488     case ADDR_EXPR:
2489       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2490            handled_component_p (*expr_p);
2491            expr_p = &TREE_OPERAND (*expr_p, 0))
2492         continue;
2493       obj = *expr_p;
2494       if (DECL_P (obj))
2495         x = produce_memory_decl_rtl (obj, regno);
2496       break;
2497
2498     case SSA_NAME:
2499       *ws = 0;
2500       obj = SSA_NAME_VAR (*expr_p);
2501       if (!DECL_RTL_SET_P (obj))
2502         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2503       break;
2504
2505     case VAR_DECL:
2506     case PARM_DECL:
2507     case RESULT_DECL:
2508       *ws = 0;
2509       obj = *expr_p;
2510
2511       if (DECL_RTL_SET_P (obj))
2512         break;
2513
2514       if (DECL_MODE (obj) == BLKmode)
2515         x = produce_memory_decl_rtl (obj, regno);
2516       else
2517         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2518
2519       break;
2520
2521     default:
2522       break;
2523     }
2524
2525   if (x)
2526     {
2527       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2528       SET_DECL_RTL (obj, x);
2529     }
2530
2531   return NULL_TREE;
2532 }
2533
2534 /* Determines cost of the computation of EXPR.  */
2535
2536 static unsigned
2537 computation_cost (tree expr)
2538 {
2539   rtx seq, rslt;
2540   tree type = TREE_TYPE (expr);
2541   unsigned cost;
2542   /* Avoid using hard regs in ways which may be unsupported.  */
2543   int regno = LAST_VIRTUAL_REGISTER + 1;
2544
2545   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2546   start_sequence ();
2547   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2548   seq = get_insns ();
2549   end_sequence ();
2550
2551   cost = seq_cost (seq);
2552   if (MEM_P (rslt))
2553     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2554
2555   return cost;
2556 }
2557
2558 /* Returns variable containing the value of candidate CAND at statement AT.  */
2559
2560 static tree
2561 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2562 {
2563   if (stmt_after_increment (loop, cand, stmt))
2564     return cand->var_after;
2565   else
2566     return cand->var_before;
2567 }
2568
2569 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2570    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2571
2572 int
2573 tree_int_cst_sign_bit (tree t)
2574 {
2575   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2576   unsigned HOST_WIDE_INT w;
2577
2578   if (bitno < HOST_BITS_PER_WIDE_INT)
2579     w = TREE_INT_CST_LOW (t);
2580   else
2581     {
2582       w = TREE_INT_CST_HIGH (t);
2583       bitno -= HOST_BITS_PER_WIDE_INT;
2584     }
2585
2586   return (w >> bitno) & 1;
2587 }
2588
2589 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2590    return cst.  Otherwise return NULL_TREE.  */
2591
2592 static tree
2593 constant_multiple_of (tree type, tree top, tree bot)
2594 {
2595   tree res, mby, p0, p1;
2596   enum tree_code code;
2597   bool negate;
2598
2599   STRIP_NOPS (top);
2600   STRIP_NOPS (bot);
2601
2602   if (operand_equal_p (top, bot, 0))
2603     return build_int_cst (type, 1);
2604
2605   code = TREE_CODE (top);
2606   switch (code)
2607     {
2608     case MULT_EXPR:
2609       mby = TREE_OPERAND (top, 1);
2610       if (TREE_CODE (mby) != INTEGER_CST)
2611         return NULL_TREE;
2612
2613       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2614       if (!res)
2615         return NULL_TREE;
2616
2617       return fold_binary_to_constant (MULT_EXPR, type, res,
2618                                       fold_convert (type, mby));
2619
2620     case PLUS_EXPR:
2621     case MINUS_EXPR:
2622       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2623       if (!p0)
2624         return NULL_TREE;
2625       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2626       if (!p1)
2627         return NULL_TREE;
2628
2629       return fold_binary_to_constant (code, type, p0, p1);
2630
2631     case INTEGER_CST:
2632       if (TREE_CODE (bot) != INTEGER_CST)
2633         return NULL_TREE;
2634
2635       bot = fold_convert (type, bot);
2636       top = fold_convert (type, top);
2637
2638       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2639          the result afterwards.  */
2640       if (tree_int_cst_sign_bit (bot))
2641         {
2642           negate = true;
2643           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2644         }
2645       else
2646         negate = false;
2647
2648       /* Ditto for TOP.  */
2649       if (tree_int_cst_sign_bit (top))
2650         {
2651           negate = !negate;
2652           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2653         }
2654
2655       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2656         return NULL_TREE;
2657
2658       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2659       if (negate)
2660         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2661       return res;
2662
2663     default:
2664       return NULL_TREE;
2665     }
2666 }
2667
2668 /* Sets COMB to CST.  */
2669
2670 static void
2671 aff_combination_const (struct affine_tree_combination *comb, tree type,
2672                        unsigned HOST_WIDE_INT cst)
2673 {
2674   unsigned prec = TYPE_PRECISION (type);
2675
2676   comb->type = type;
2677   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2678
2679   comb->n = 0;
2680   comb->rest = NULL_TREE;
2681   comb->offset = cst & comb->mask;
2682 }
2683
2684 /* Sets COMB to single element ELT.  */
2685
2686 static void
2687 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2688 {
2689   unsigned prec = TYPE_PRECISION (type);
2690
2691   comb->type = type;
2692   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2693
2694   comb->n = 1;
2695   comb->elts[0] = elt;
2696   comb->coefs[0] = 1;
2697   comb->rest = NULL_TREE;
2698   comb->offset = 0;
2699 }
2700
2701 /* Scales COMB by SCALE.  */
2702
2703 static void
2704 aff_combination_scale (struct affine_tree_combination *comb,
2705                        unsigned HOST_WIDE_INT scale)
2706 {
2707   unsigned i, j;
2708
2709   if (scale == 1)
2710     return;
2711
2712   if (scale == 0)
2713     {
2714       aff_combination_const (comb, comb->type, 0);
2715       return;
2716     }
2717
2718   comb->offset = (scale * comb->offset) & comb->mask;
2719   for (i = 0, j = 0; i < comb->n; i++)
2720     {
2721       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2722       comb->elts[j] = comb->elts[i];
2723       if (comb->coefs[j] != 0)
2724         j++;
2725     }
2726   comb->n = j;
2727
2728   if (comb->rest)
2729     {
2730       if (comb->n < MAX_AFF_ELTS)
2731         {
2732           comb->coefs[comb->n] = scale;
2733           comb->elts[comb->n] = comb->rest;
2734           comb->rest = NULL_TREE;
2735           comb->n++;
2736         }
2737       else
2738         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2739                                   build_int_cst_type (comb->type, scale));
2740     }
2741 }
2742
2743 /* Adds ELT * SCALE to COMB.  */
2744
2745 static void
2746 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2747                          unsigned HOST_WIDE_INT scale)
2748 {
2749   unsigned i;
2750
2751   if (scale == 0)
2752     return;
2753
2754   for (i = 0; i < comb->n; i++)
2755     if (operand_equal_p (comb->elts[i], elt, 0))
2756       {
2757         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2758         if (comb->coefs[i])
2759           return;
2760
2761         comb->n--;
2762         comb->coefs[i] = comb->coefs[comb->n];
2763         comb->elts[i] = comb->elts[comb->n];
2764         return;
2765       }
2766   if (comb->n < MAX_AFF_ELTS)
2767     {
2768       comb->coefs[comb->n] = scale;
2769       comb->elts[comb->n] = elt;
2770       comb->n++;
2771       return;
2772     }
2773
2774   if (scale == 1)
2775     elt = fold_convert (comb->type, elt);
2776   else
2777     elt = fold_build2 (MULT_EXPR, comb->type,
2778                        fold_convert (comb->type, elt),
2779                        build_int_cst_type (comb->type, scale));
2780
2781   if (comb->rest)
2782     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2783   else
2784     comb->rest = elt;
2785 }
2786
2787 /* Adds COMB2 to COMB1.  */
2788
2789 static void
2790 aff_combination_add (struct affine_tree_combination *comb1,
2791                      struct affine_tree_combination *comb2)
2792 {
2793   unsigned i;
2794
2795   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2796   for (i = 0; i < comb2-> n; i++)
2797     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2798   if (comb2->rest)
2799     aff_combination_add_elt (comb1, comb2->rest, 1);
2800 }
2801
2802 /* Splits EXPR into an affine combination of parts.  */
2803
2804 static void
2805 tree_to_aff_combination (tree expr, tree type,
2806                          struct affine_tree_combination *comb)
2807 {
2808   struct affine_tree_combination tmp;
2809   enum tree_code code;
2810   tree cst, core, toffset;
2811   HOST_WIDE_INT bitpos, bitsize;
2812   enum machine_mode mode;
2813   int unsignedp, volatilep;
2814
2815   STRIP_NOPS (expr);
2816
2817   code = TREE_CODE (expr);
2818   switch (code)
2819     {
2820     case INTEGER_CST:
2821       aff_combination_const (comb, type, int_cst_value (expr));
2822       return;
2823
2824     case PLUS_EXPR:
2825     case MINUS_EXPR:
2826       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2827       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2828       if (code == MINUS_EXPR)
2829         aff_combination_scale (&tmp, -1);
2830       aff_combination_add (comb, &tmp);
2831       return;
2832
2833     case MULT_EXPR:
2834       cst = TREE_OPERAND (expr, 1);
2835       if (TREE_CODE (cst) != INTEGER_CST)
2836         break;
2837       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2838       aff_combination_scale (comb, int_cst_value (cst));
2839       return;
2840
2841     case NEGATE_EXPR:
2842       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2843       aff_combination_scale (comb, -1);
2844       return;
2845
2846     case ADDR_EXPR:
2847       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2848                                   &toffset, &mode, &unsignedp, &volatilep,
2849                                   false);
2850       if (bitpos % BITS_PER_UNIT != 0)
2851         break;
2852       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2853       core = build_fold_addr_expr (core);
2854       if (TREE_CODE (core) == ADDR_EXPR)
2855         aff_combination_add_elt (comb, core, 1);
2856       else
2857         {
2858           tree_to_aff_combination (core, type, &tmp);
2859           aff_combination_add (comb, &tmp);
2860         }
2861       if (toffset)
2862         {
2863           tree_to_aff_combination (toffset, type, &tmp);
2864           aff_combination_add (comb, &tmp);
2865         }
2866       return;
2867
2868     default:
2869       break;
2870     }
2871
2872   aff_combination_elt (comb, type, expr);
2873 }
2874
2875 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2876
2877 static tree
2878 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2879                  unsigned HOST_WIDE_INT mask)
2880 {
2881   enum tree_code code;
2882
2883   scale &= mask;
2884   elt = fold_convert (type, elt);
2885
2886   if (scale == 1)
2887     {
2888       if (!expr)
2889         return elt;
2890
2891       return fold_build2 (PLUS_EXPR, type, expr, elt);
2892     }
2893
2894   if (scale == mask)
2895     {
2896       if (!expr)
2897         return fold_build1 (NEGATE_EXPR, type, elt);
2898
2899       return fold_build2 (MINUS_EXPR, type, expr, elt);
2900     }
2901
2902   if (!expr)
2903     return fold_build2 (MULT_EXPR, type, elt,
2904                         build_int_cst_type (type, scale));
2905
2906   if ((scale | (mask >> 1)) == mask)
2907     {
2908       /* Scale is negative.  */
2909       code = MINUS_EXPR;
2910       scale = (-scale) & mask;
2911     }
2912   else
2913     code = PLUS_EXPR;
2914
2915   elt = fold_build2 (MULT_EXPR, type, elt,
2916                      build_int_cst_type (type, scale));
2917   return fold_build2 (code, type, expr, elt);
2918 }
2919
2920 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2921
2922 static void
2923 unshare_aff_combination (struct affine_tree_combination *comb)
2924 {
2925   unsigned i;
2926
2927   for (i = 0; i < comb->n; i++)
2928     comb->elts[i] = unshare_expr (comb->elts[i]);
2929   if (comb->rest)
2930     comb->rest = unshare_expr (comb->rest);
2931 }
2932
2933 /* Makes tree from the affine combination COMB.  */
2934
2935 static tree
2936 aff_combination_to_tree (struct affine_tree_combination *comb)
2937 {
2938   tree type = comb->type;
2939   tree expr = comb->rest;
2940   unsigned i;
2941   unsigned HOST_WIDE_INT off, sgn;
2942
2943   /* Handle the special case produced by get_computation_aff when
2944      the type does not fit in HOST_WIDE_INT.  */
2945   if (comb->n == 0 && comb->offset == 0)
2946     return fold_convert (type, expr);
2947
2948   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2949
2950   for (i = 0; i < comb->n; i++)
2951     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2952                             comb->mask);
2953
2954   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2955     {
2956       /* Offset is negative.  */
2957       off = (-comb->offset) & comb->mask;
2958       sgn = comb->mask;
2959     }
2960   else
2961     {
2962       off = comb->offset;
2963       sgn = 1;
2964     }
2965   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2966                           comb->mask);
2967 }
2968
2969 /* Determines the expression by that USE is expressed from induction variable
2970    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2971    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2972
2973 static bool
2974 get_computation_aff (struct loop *loop,
2975                      struct iv_use *use, struct iv_cand *cand, tree at,
2976                      struct affine_tree_combination *aff)
2977 {
2978   tree ubase = use->iv->base;
2979   tree ustep = use->iv->step;
2980   tree cbase = cand->iv->base;
2981   tree cstep = cand->iv->step;
2982   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2983   tree uutype;
2984   tree expr, delta;
2985   tree ratio;
2986   unsigned HOST_WIDE_INT ustepi, cstepi;
2987   HOST_WIDE_INT ratioi;
2988   struct affine_tree_combination cbase_aff, expr_aff;
2989
2990   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2991     {
2992       /* We do not have a precision to express the values of use.  */
2993       return false;
2994     }
2995
2996   expr = var_at_stmt (loop, cand, at);
2997
2998   if (TREE_TYPE (expr) != ctype)
2999     {
3000       /* This may happen with the original ivs.  */
3001       expr = fold_convert (ctype, expr);
3002     }
3003
3004   if (TYPE_UNSIGNED (utype))
3005     uutype = utype;
3006   else
3007     {
3008       uutype = unsigned_type_for (utype);
3009       ubase = fold_convert (uutype, ubase);
3010       ustep = fold_convert (uutype, ustep);
3011     }
3012
3013   if (uutype != ctype)
3014     {
3015       expr = fold_convert (uutype, expr);
3016       cbase = fold_convert (uutype, cbase);
3017       cstep = fold_convert (uutype, cstep);
3018     }
3019
3020   if (cst_and_fits_in_hwi (cstep)
3021       && cst_and_fits_in_hwi (ustep))
3022     {
3023       ustepi = int_cst_value (ustep);
3024       cstepi = int_cst_value (cstep);
3025
3026       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3027         {
3028           /* TODO maybe consider case when ustep divides cstep and the ratio is
3029              a power of 2 (so that the division is fast to execute)?  We would
3030              need to be much more careful with overflows etc. then.  */
3031           return false;
3032         }
3033
3034       ratio = build_int_cst_type (uutype, ratioi);
3035     }
3036   else
3037     {
3038       ratio = constant_multiple_of (uutype, ustep, cstep);
3039       if (!ratio)
3040         return false;
3041
3042       /* Ratioi is only used to detect special cases when the multiplicative
3043          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3044          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3045          to integer_onep/integer_all_onesp, since the former ignores
3046          TREE_OVERFLOW.  */
3047       if (cst_and_fits_in_hwi (ratio))
3048         ratioi = int_cst_value (ratio);
3049       else if (integer_onep (ratio))
3050         ratioi = 1;
3051       else if (integer_all_onesp (ratio))
3052         ratioi = -1;
3053       else
3054         ratioi = 0;
3055     }
3056
3057   /* We may need to shift the value if we are after the increment.  */
3058   if (stmt_after_increment (loop, cand, at))
3059     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3060
3061   /* use = ubase - ratio * cbase + ratio * var.
3062
3063      In general case ubase + ratio * (var - cbase) could be better (one less
3064      multiplication), but often it is possible to eliminate redundant parts
3065      of computations from (ubase - ratio * cbase) term, and if it does not
3066      happen, fold is able to apply the distributive law to obtain this form
3067      anyway.  */
3068
3069   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3070     {
3071       /* Let's compute in trees and just return the result in AFF.  This case
3072          should not be very common, and fold itself is not that bad either,
3073          so making the aff. functions more complicated to handle this case
3074          is not that urgent.  */
3075       if (ratioi == 1)
3076         {
3077           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3078           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3079         }
3080       else if (ratioi == -1)
3081         {
3082           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3083           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3084         }
3085       else
3086         {
3087           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3088           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3089           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3090           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3091         }
3092
3093       aff->type = uutype;
3094       aff->n = 0;
3095       aff->offset = 0;
3096       aff->mask = 0;
3097       aff->rest = expr;
3098       return true;
3099     }
3100
3101   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3102      possible to compute ratioi.  */
3103   gcc_assert (ratioi);
3104
3105   tree_to_aff_combination (ubase, uutype, aff);
3106   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3107   tree_to_aff_combination (expr, uutype, &expr_aff);
3108   aff_combination_scale (&cbase_aff, -ratioi);
3109   aff_combination_scale (&expr_aff, ratioi);
3110   aff_combination_add (aff, &cbase_aff);
3111   aff_combination_add (aff, &expr_aff);
3112
3113   return true;
3114 }
3115
3116 /* Determines the expression by that USE is expressed from induction variable
3117    CAND at statement AT in LOOP.  The computation is unshared.  */
3118
3119 static tree
3120 get_computation_at (struct loop *loop,
3121                     struct iv_use *use, struct iv_cand *cand, tree at)
3122 {
3123   struct affine_tree_combination aff;
3124   tree type = TREE_TYPE (use->iv->base);
3125
3126   if (!get_computation_aff (loop, use, cand, at, &aff))
3127     return NULL_TREE;
3128   unshare_aff_combination (&aff);
3129   return fold_convert (type, aff_combination_to_tree (&aff));
3130 }
3131
3132 /* Determines the expression by that USE is expressed from induction variable
3133    CAND in LOOP.  The computation is unshared.  */
3134
3135 static tree
3136 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3137 {
3138   return get_computation_at (loop, use, cand, use->stmt);
3139 }
3140
3141 /* Returns cost of addition in MODE.  */
3142
3143 static unsigned
3144 add_cost (enum machine_mode mode)
3145 {
3146   static unsigned costs[NUM_MACHINE_MODES];
3147   rtx seq;
3148   unsigned cost;
3149
3150   if (costs[mode])
3151     return costs[mode];
3152
3153   start_sequence ();
3154   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3155                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3156                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3157                  NULL_RTX);
3158   seq = get_insns ();
3159   end_sequence ();
3160
3161   cost = seq_cost (seq);
3162   if (!cost)
3163     cost = 1;
3164
3165   costs[mode] = cost;
3166
3167   if (dump_file && (dump_flags & TDF_DETAILS))
3168     fprintf (dump_file, "Addition in %s costs %d\n",
3169              GET_MODE_NAME (mode), cost);
3170   return cost;
3171 }
3172
3173 /* Entry in a hashtable of already known costs for multiplication.  */
3174 struct mbc_entry
3175 {
3176   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3177   enum machine_mode mode;       /* In mode.  */
3178   unsigned cost;                /* The cost.  */
3179 };
3180
3181 /* Counts hash value for the ENTRY.  */
3182
3183 static hashval_t
3184 mbc_entry_hash (const void *entry)
3185 {
3186   const struct mbc_entry *e = entry;
3187
3188   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3189 }
3190
3191 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3192
3193 static int
3194 mbc_entry_eq (const void *entry1, const void *entry2)
3195 {
3196   const struct mbc_entry *e1 = entry1;
3197   const struct mbc_entry *e2 = entry2;
3198
3199   return (e1->mode == e2->mode
3200           && e1->cst == e2->cst);
3201 }
3202
3203 /* Returns cost of multiplication by constant CST in MODE.  */
3204
3205 unsigned
3206 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3207 {
3208   static htab_t costs;
3209   struct mbc_entry **cached, act;
3210   rtx seq;
3211   unsigned cost;
3212
3213   if (!costs)
3214     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3215
3216   act.mode = mode;
3217   act.cst = cst;
3218   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3219   if (*cached)
3220     return (*cached)->cost;
3221
3222   *cached = xmalloc (sizeof (struct mbc_entry));
3223   (*cached)->mode = mode;
3224   (*cached)->cst = cst;
3225
3226   start_sequence ();
3227   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3228                NULL_RTX, 0);
3229   seq = get_insns ();
3230   end_sequence ();
3231
3232   cost = seq_cost (seq);
3233
3234   if (dump_file && (dump_flags & TDF_DETAILS))
3235     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3236              (int) cst, GET_MODE_NAME (mode), cost);
3237
3238   (*cached)->cost = cost;
3239
3240   return cost;
3241 }
3242
3243 /* Returns true if multiplying by RATIO is allowed in address.  */
3244
3245 bool
3246 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3247 {
3248 #define MAX_RATIO 128
3249   static sbitmap valid_mult;
3250
3251   if (!valid_mult)
3252     {
3253       rtx reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3254       rtx addr;
3255       HOST_WIDE_INT i;
3256
3257       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3258       sbitmap_zero (valid_mult);
3259       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3260       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3261         {
3262           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3263           if (memory_address_p (Pmode, addr))
3264             SET_BIT (valid_mult, i + MAX_RATIO);
3265         }
3266
3267       if (dump_file && (dump_flags & TDF_DETAILS))
3268         {
3269           fprintf (dump_file, "  allowed multipliers:");
3270           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3271             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3272               fprintf (dump_file, " %d", (int) i);
3273           fprintf (dump_file, "\n");
3274           fprintf (dump_file, "\n");
3275         }
3276     }
3277
3278   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3279     return false;
3280
3281   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3282 }
3283
3284 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3285    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3286    variable is omitted.  The created memory accesses MODE.
3287
3288    TODO -- there must be some better way.  This all is quite crude.  */
3289
3290 static unsigned
3291 get_address_cost (bool symbol_present, bool var_present,
3292                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3293 {
3294   static bool initialized = false;
3295   static HOST_WIDE_INT rat, off;
3296   static HOST_WIDE_INT min_offset, max_offset;
3297   static unsigned costs[2][2][2][2];
3298   unsigned cost, acost;
3299   rtx seq, addr, base;
3300   bool offset_p, ratio_p;
3301   rtx reg1;
3302   HOST_WIDE_INT s_offset;
3303   unsigned HOST_WIDE_INT mask;
3304   unsigned bits;
3305
3306   if (!initialized)
3307     {
3308       HOST_WIDE_INT i;
3309       initialized = true;
3310
3311       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3312
3313       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3314       for (i = 1; i <= 1 << 20; i <<= 1)
3315         {
3316           XEXP (addr, 1) = GEN_INT (i);
3317           if (!memory_address_p (Pmode, addr))
3318             break;
3319         }
3320       max_offset = i >> 1;
3321       off = max_offset;
3322
3323       for (i = 1; i <= 1 << 20; i <<= 1)
3324         {
3325           XEXP (addr, 1) = GEN_INT (-i);
3326           if (!memory_address_p (Pmode, addr))
3327             break;
3328         }
3329       min_offset = -(i >> 1);
3330
3331       if (dump_file && (dump_flags & TDF_DETAILS))
3332         {
3333           fprintf (dump_file, "get_address_cost:\n");
3334           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3335           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3336         }
3337
3338       rat = 1;
3339       for (i = 2; i <= MAX_RATIO; i++)
3340         if (multiplier_allowed_in_address_p (i))
3341           {
3342             rat = i;
3343             break;
3344           }
3345     }
3346
3347   bits = GET_MODE_BITSIZE (Pmode);
3348   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3349   offset &= mask;
3350   if ((offset >> (bits - 1) & 1))
3351     offset |= ~mask;
3352   s_offset = offset;
3353
3354   cost = 0;
3355   offset_p = (s_offset != 0
3356               && min_offset <= s_offset && s_offset <= max_offset);
3357   ratio_p = (ratio != 1
3358              && multiplier_allowed_in_address_p (ratio));
3359
3360   if (ratio != 1 && !ratio_p)
3361     cost += multiply_by_cost (ratio, Pmode);
3362
3363   if (s_offset && !offset_p && !symbol_present)
3364     {
3365       cost += add_cost (Pmode);
3366       var_present = true;
3367     }
3368
3369   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3370   if (!acost)
3371     {
3372       acost = 0;
3373
3374       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3375       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3376       if (ratio_p)
3377         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3378
3379       if (var_present)
3380         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3381
3382       if (symbol_present)
3383         {
3384           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3385           if (offset_p)
3386             base = gen_rtx_fmt_e (CONST, Pmode,
3387                                   gen_rtx_fmt_ee (PLUS, Pmode,
3388                                                   base,
3389                                                   GEN_INT (off)));
3390         }
3391       else if (offset_p)
3392         base = GEN_INT (off);
3393       else
3394         base = NULL_RTX;
3395
3396       if (base)
3397         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3398
3399       start_sequence ();
3400       addr = memory_address (Pmode, addr);
3401       seq = get_insns ();
3402       end_sequence ();
3403
3404       acost = seq_cost (seq);
3405       acost += address_cost (addr, Pmode);
3406
3407       if (!acost)
3408         acost = 1;
3409       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3410     }
3411
3412   return cost + acost;
3413 }
3414 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3415    invariants the computation depends on.  */
3416
3417 static unsigned
3418 force_var_cost (struct ivopts_data *data,
3419                 tree expr, bitmap *depends_on)
3420 {
3421   static bool costs_initialized = false;
3422   static unsigned integer_cost;
3423   static unsigned symbol_cost;
3424   static unsigned address_cost;
3425   tree op0, op1;
3426   unsigned cost0, cost1, cost;
3427   enum machine_mode mode;
3428
3429   if (!costs_initialized)
3430     {
3431       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3432       rtx x = gen_rtx_MEM (DECL_MODE (var),
3433                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3434       tree addr;
3435       tree type = build_pointer_type (integer_type_node);
3436
3437       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3438                                                            2000));
3439
3440       SET_DECL_RTL (var, x);
3441       TREE_STATIC (var) = 1;
3442       addr = build1 (ADDR_EXPR, type, var);
3443       symbol_cost = computation_cost (addr) + 1;
3444
3445       address_cost
3446         = computation_cost (build2 (PLUS_EXPR, type,
3447                                     addr,
3448                                     build_int_cst_type (type, 2000))) + 1;
3449       if (dump_file && (dump_flags & TDF_DETAILS))
3450         {
3451           fprintf (dump_file, "force_var_cost:\n");
3452           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3453           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3454           fprintf (dump_file, "  address %d\n", (int) address_cost);
3455           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3456           fprintf (dump_file, "\n");
3457         }
3458
3459       costs_initialized = true;
3460     }
3461
3462   STRIP_NOPS (expr);
3463
3464   if (depends_on)
3465     {
3466       fd_ivopts_data = data;
3467       walk_tree (&expr, find_depends, depends_on, NULL);
3468     }
3469
3470   if (SSA_VAR_P (expr))
3471     return 0;
3472
3473   if (TREE_INVARIANT (expr))
3474     {
3475       if (TREE_CODE (expr) == INTEGER_CST)
3476         return integer_cost;
3477
3478       if (TREE_CODE (expr) == ADDR_EXPR)
3479         {
3480           tree obj = TREE_OPERAND (expr, 0);
3481
3482           if (TREE_CODE (obj) == VAR_DECL
3483               || TREE_CODE (obj) == PARM_DECL
3484               || TREE_CODE (obj) == RESULT_DECL)
3485             return symbol_cost;
3486         }
3487
3488       return address_cost;
3489     }
3490
3491   switch (TREE_CODE (expr))
3492     {
3493     case PLUS_EXPR:
3494     case MINUS_EXPR:
3495     case MULT_EXPR:
3496       op0 = TREE_OPERAND (expr, 0);
3497       op1 = TREE_OPERAND (expr, 1);
3498       STRIP_NOPS (op0);
3499       STRIP_NOPS (op1);
3500
3501       if (is_gimple_val (op0))
3502         cost0 = 0;
3503       else
3504         cost0 = force_var_cost (data, op0, NULL);
3505
3506       if (is_gimple_val (op1))
3507         cost1 = 0;
3508       else
3509         cost1 = force_var_cost (data, op1, NULL);
3510
3511       break;
3512
3513     default:
3514       /* Just an arbitrary value, FIXME.  */
3515       return target_spill_cost;
3516     }
3517
3518   mode = TYPE_MODE (TREE_TYPE (expr));
3519   switch (TREE_CODE (expr))
3520     {
3521     case PLUS_EXPR:
3522     case MINUS_EXPR:
3523       cost = add_cost (mode);
3524       break;
3525
3526     case MULT_EXPR:
3527       if (cst_and_fits_in_hwi (op0))
3528         cost = multiply_by_cost (int_cst_value (op0), mode);
3529       else if (cst_and_fits_in_hwi (op1))
3530         cost = multiply_by_cost (int_cst_value (op1), mode);
3531       else
3532         return target_spill_cost;
3533       break;
3534
3535     default:
3536       gcc_unreachable ();
3537     }
3538
3539   cost += cost0;
3540   cost += cost1;
3541
3542   /* Bound the cost by target_spill_cost.  The parts of complicated
3543      computations often are either loop invariant or at least can
3544      be shared between several iv uses, so letting this grow without
3545      limits would not give reasonable results.  */
3546   return cost < target_spill_cost ? cost : target_spill_cost;
3547 }
3548
3549 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3550    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3551    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3552    invariants the computation depends on.  */
3553
3554 static unsigned
3555 split_address_cost (struct ivopts_data *data,
3556                     tree addr, bool *symbol_present, bool *var_present,
3557                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3558 {
3559   tree core;
3560   HOST_WIDE_INT bitsize;
3561   HOST_WIDE_INT bitpos;
3562   tree toffset;
3563   enum machine_mode mode;
3564   int unsignedp, volatilep;
3565
3566   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3567                               &unsignedp, &volatilep, false);
3568
3569   if (toffset != 0
3570       || bitpos % BITS_PER_UNIT != 0
3571       || TREE_CODE (core) != VAR_DECL)
3572     {
3573       *symbol_present = false;
3574       *var_present = true;
3575       fd_ivopts_data = data;
3576       walk_tree (&addr, find_depends, depends_on, NULL);
3577       return target_spill_cost;
3578     }
3579
3580   *offset += bitpos / BITS_PER_UNIT;
3581   if (TREE_STATIC (core)
3582       || DECL_EXTERNAL (core))
3583     {
3584       *symbol_present = true;
3585       *var_present = false;
3586       return 0;
3587     }
3588
3589   *symbol_present = false;
3590   *var_present = true;
3591   return 0;
3592 }
3593
3594 /* Estimates cost of expressing difference of addresses E1 - E2 as
3595    var + symbol + offset.  The value of offset is added to OFFSET,
3596    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3597    part is missing.  DEPENDS_ON is a set of the invariants the computation
3598    depends on.  */
3599
3600 static unsigned
3601 ptr_difference_cost (struct ivopts_data *data,
3602                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3603                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3604 {
3605   HOST_WIDE_INT diff = 0;
3606   unsigned cost;
3607
3608   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3609
3610   if (ptr_difference_const (e1, e2, &diff))
3611     {
3612       *offset += diff;
3613       *symbol_present = false;
3614       *var_present = false;
3615       return 0;
3616     }
3617
3618   if (e2 == integer_zero_node)
3619     return split_address_cost (data, TREE_OPERAND (e1, 0),
3620                                symbol_present, var_present, offset, depends_on);
3621
3622   *symbol_present = false;
3623   *var_present = true;
3624
3625   cost = force_var_cost (data, e1, depends_on);
3626   cost += force_var_cost (data, e2, depends_on);
3627   cost += add_cost (Pmode);
3628
3629   return cost;
3630 }
3631
3632 /* Estimates cost of expressing difference E1 - E2 as
3633    var + symbol + offset.  The value of offset is added to OFFSET,
3634    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3635    part is missing.  DEPENDS_ON is a set of the invariants the computation
3636    depends on.  */
3637
3638 static unsigned
3639 difference_cost (struct ivopts_data *data,
3640                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3641                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3642 {
3643   unsigned cost;
3644   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3645   unsigned HOST_WIDE_INT off1, off2;
3646
3647   e1 = strip_offset (e1, &off1);
3648   e2 = strip_offset (e2, &off2);
3649   *offset += off1 - off2;
3650
3651   STRIP_NOPS (e1);
3652   STRIP_NOPS (e2);
3653
3654   if (TREE_CODE (e1) == ADDR_EXPR)
3655     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3656                                 depends_on);
3657   *symbol_present = false;
3658
3659   if (operand_equal_p (e1, e2, 0))
3660     {
3661       *var_present = false;
3662       return 0;
3663     }
3664   *var_present = true;
3665   if (zero_p (e2))
3666     return force_var_cost (data, e1, depends_on);
3667
3668   if (zero_p (e1))
3669     {
3670       cost = force_var_cost (data, e2, depends_on);
3671       cost += multiply_by_cost (-1, mode);
3672
3673       return cost;
3674     }
3675
3676   cost = force_var_cost (data, e1, depends_on);
3677   cost += force_var_cost (data, e2, depends_on);
3678   cost += add_cost (mode);
3679
3680   return cost;
3681 }
3682
3683 /* Determines the cost of the computation by that USE is expressed
3684    from induction variable CAND.  If ADDRESS_P is true, we just need
3685    to create an address from it, otherwise we want to get it into
3686    register.  A set of invariants we depend on is stored in
3687    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3688
3689 static unsigned
3690 get_computation_cost_at (struct ivopts_data *data,
3691                          struct iv_use *use, struct iv_cand *cand,
3692                          bool address_p, bitmap *depends_on, tree at)
3693 {
3694   tree ubase = use->iv->base, ustep = use->iv->step;
3695   tree cbase, cstep;
3696   tree utype = TREE_TYPE (ubase), ctype;
3697   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3698   HOST_WIDE_INT ratio, aratio;
3699   bool var_present, symbol_present;
3700   unsigned cost = 0, n_sums;
3701
3702   *depends_on = NULL;
3703
3704   /* Only consider real candidates.  */
3705   if (!cand->iv)
3706     return INFTY;
3707
3708   cbase = cand->iv->base;
3709   cstep = cand->iv->step;
3710   ctype = TREE_TYPE (cbase);
3711
3712   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3713     {
3714       /* We do not have a precision to express the values of use.  */
3715       return INFTY;
3716     }
3717
3718   if (address_p)
3719     {
3720       /* Do not try to express address of an object with computation based
3721          on address of a different object.  This may cause problems in rtl
3722          level alias analysis (that does not expect this to be happening,
3723          as this is illegal in C), and would be unlikely to be useful
3724          anyway.  */
3725       if (use->iv->base_object
3726           && cand->iv->base_object
3727           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3728         return INFTY;
3729     }
3730
3731   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3732     {
3733       /* TODO -- add direct handling of this case.  */
3734       goto fallback;
3735     }
3736
3737   /* CSTEPI is removed from the offset in case statement is after the
3738      increment.  If the step is not constant, we use zero instead.
3739      This is a bit imprecise (there is the extra addition), but
3740      redundancy elimination is likely to transform the code so that
3741      it uses value of the variable before increment anyway,
3742      so it is not that much unrealistic.  */
3743   if (cst_and_fits_in_hwi (cstep))
3744     cstepi = int_cst_value (cstep);
3745   else
3746     cstepi = 0;
3747
3748   if (cst_and_fits_in_hwi (ustep)
3749       && cst_and_fits_in_hwi (cstep))
3750     {
3751       ustepi = int_cst_value (ustep);
3752
3753       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3754         return INFTY;
3755     }
3756   else
3757     {
3758       tree rat;
3759
3760       rat = constant_multiple_of (utype, ustep, cstep);
3761
3762       if (!rat)
3763         return INFTY;
3764
3765       if (cst_and_fits_in_hwi (rat))
3766         ratio = int_cst_value (rat);
3767       else if (integer_onep (rat))
3768         ratio = 1;
3769       else if (integer_all_onesp (rat))
3770         ratio = -1;
3771       else
3772         return INFTY;
3773     }
3774
3775   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3776      or ratio == 1, it is better to handle this like
3777
3778      ubase - ratio * cbase + ratio * var
3779
3780      (also holds in the case ratio == -1, TODO.  */
3781
3782   if (cst_and_fits_in_hwi (cbase))
3783     {
3784       offset = - ratio * int_cst_value (cbase);
3785       cost += difference_cost (data,
3786                                ubase, integer_zero_node,
3787                                &symbol_present, &var_present, &offset,
3788                                depends_on);
3789     }
3790   else if (ratio == 1)
3791     {
3792       cost += difference_cost (data,
3793                                ubase, cbase,
3794                                &symbol_present, &var_present, &offset,
3795                                depends_on);
3796     }
3797   else
3798     {
3799       cost += force_var_cost (data, cbase, depends_on);
3800       cost += add_cost (TYPE_MODE (ctype));
3801       cost += difference_cost (data,
3802                                ubase, integer_zero_node,
3803                                &symbol_present, &var_present, &offset,
3804                                depends_on);
3805     }
3806
3807   /* If we are after the increment, the value of the candidate is higher by
3808      one iteration.  */
3809   if (stmt_after_increment (data->current_loop, cand, at))
3810     offset -= ratio * cstepi;
3811
3812   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3813      (symbol/var/const parts may be omitted).  If we are looking for an address,
3814      find the cost of addressing this.  */
3815   if (address_p)
3816     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3817
3818   /* Otherwise estimate the costs for computing the expression.  */
3819   aratio = ratio > 0 ? ratio : -ratio;
3820   if (!symbol_present && !var_present && !offset)
3821     {
3822       if (ratio != 1)
3823         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3824
3825       return cost;
3826     }
3827
3828   if (aratio != 1)
3829     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3830
3831   n_sums = 1;
3832   if (var_present
3833       /* Symbol + offset should be compile-time computable.  */
3834       && (symbol_present || offset))
3835     n_sums++;
3836
3837   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3838
3839 fallback:
3840   {
3841     /* Just get the expression, expand it and measure the cost.  */
3842     tree comp = get_computation_at (data->current_loop, use, cand, at);
3843
3844     if (!comp)
3845       return INFTY;
3846
3847     if (address_p)
3848       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3849
3850     return computation_cost (comp);
3851   }
3852 }
3853
3854 /* Determines the cost of the computation by that USE is expressed
3855    from induction variable CAND.  If ADDRESS_P is true, we just need
3856    to create an address from it, otherwise we want to get it into
3857    register.  A set of invariants we depend on is stored in
3858    DEPENDS_ON.  */
3859
3860 static unsigned
3861 get_computation_cost (struct ivopts_data *data,
3862                       struct iv_use *use, struct iv_cand *cand,
3863                       bool address_p, bitmap *depends_on)
3864 {
3865   return get_computation_cost_at (data,
3866                                   use, cand, address_p, depends_on, use->stmt);
3867 }
3868
3869 /* Determines cost of basing replacement of USE on CAND in a generic
3870    expression.  */
3871
3872 static bool
3873 determine_use_iv_cost_generic (struct ivopts_data *data,
3874                                struct iv_use *use, struct iv_cand *cand)
3875 {
3876   bitmap depends_on;
3877   unsigned cost;
3878
3879   /* The simple case first -- if we need to express value of the preserved
3880      original biv, the cost is 0.  This also prevents us from counting the
3881      cost of increment twice -- once at this use and once in the cost of
3882      the candidate.  */
3883   if (cand->pos == IP_ORIGINAL
3884       && cand->incremented_at == use->stmt)
3885     {
3886       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3887       return true;
3888     }
3889
3890   cost = get_computation_cost (data, use, cand, false, &depends_on);
3891   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3892
3893   return cost != INFTY;
3894 }
3895
3896 /* Determines cost of basing replacement of USE on CAND in an address.  */
3897
3898 static bool
3899 determine_use_iv_cost_address (struct ivopts_data *data,
3900                                struct iv_use *use, struct iv_cand *cand)
3901 {
3902   bitmap depends_on;
3903   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3904
3905   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3906
3907   return cost != INFTY;
3908 }
3909
3910 /* Computes value of induction variable IV in iteration NITER.  */
3911
3912 static tree
3913 iv_value (struct iv *iv, tree niter)
3914 {
3915   tree val;
3916   tree type = TREE_TYPE (iv->base);
3917
3918   niter = fold_convert (type, niter);
3919   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3920
3921   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3922 }
3923
3924 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3925
3926 static tree
3927 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3928 {
3929   tree val = iv_value (cand->iv, niter);
3930   tree type = TREE_TYPE (cand->iv->base);
3931
3932   if (stmt_after_increment (loop, cand, at))
3933     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3934
3935   return val;
3936 }
3937
3938 /* Returns period of induction variable iv.  */
3939
3940 static tree
3941 iv_period (struct iv *iv)
3942 {
3943   tree step = iv->step, period, type;
3944   tree pow2div;
3945
3946   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3947
3948   /* Period of the iv is gcd (step, type range).  Since type range is power
3949      of two, it suffices to determine the maximum power of two that divides
3950      step.  */
3951   pow2div = num_ending_zeros (step);
3952   type = unsigned_type_for (TREE_TYPE (step));
3953
3954   period = build_low_bits_mask (type,
3955                                 (TYPE_PRECISION (type)
3956                                  - tree_low_cst (pow2div, 1)));
3957
3958   return period;
3959 }
3960
3961 /* Returns the comparison operator used when eliminating the iv USE.  */
3962
3963 static enum tree_code
3964 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3965 {
3966   struct loop *loop = data->current_loop;
3967   basic_block ex_bb;
3968   edge exit;
3969
3970   ex_bb = bb_for_stmt (use->stmt);
3971   exit = EDGE_SUCC (ex_bb, 0);
3972   if (flow_bb_inside_loop_p (loop, exit->dest))
3973     exit = EDGE_SUCC (ex_bb, 1);
3974
3975   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3976 }
3977
3978 /* Check whether it is possible to express the condition in USE by comparison
3979    of candidate CAND.  If so, store the value compared with to BOUND.  */
3980
3981 static bool
3982 may_eliminate_iv (struct ivopts_data *data,
3983                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3984 {
3985   basic_block ex_bb;
3986   edge exit;
3987   struct tree_niter_desc *niter;
3988   tree nit, nit_type;
3989   tree wider_type, period, per_type;
3990   struct loop *loop = data->current_loop;
3991
3992   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3993     return false;
3994
3995   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3996      for other conditions inside loop body.  */
3997   ex_bb = bb_for_stmt (use->stmt);
3998   if (use->stmt != last_stmt (ex_bb)
3999       || TREE_CODE (use->stmt) != COND_EXPR)
4000     return false;
4001   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4002     return false;
4003
4004   exit = EDGE_SUCC (ex_bb, 0);
4005   if (flow_bb_inside_loop_p (loop, exit->dest))
4006     exit = EDGE_SUCC (ex_bb, 1);
4007   if (flow_bb_inside_loop_p (loop, exit->dest))
4008     return false;
4009
4010   niter = niter_for_exit (data, exit);
4011   if (!niter
4012       || !zero_p (niter->may_be_zero))
4013     return false;
4014
4015   nit = niter->niter;
4016   nit_type = TREE_TYPE (nit);
4017
4018   /* Determine whether we may use the variable to test whether niter iterations
4019      elapsed.  This is the case iff the period of the induction variable is
4020      greater than the number of iterations.  */
4021   period = iv_period (cand->iv);
4022   if (!period)
4023     return false;
4024   per_type = TREE_TYPE (period);
4025
4026   wider_type = TREE_TYPE (period);
4027   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4028     wider_type = per_type;
4029   else
4030     wider_type = nit_type;
4031
4032   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
4033                                        fold_convert (wider_type, period),
4034                                        fold_convert (wider_type, nit)))))
4035     return false;
4036
4037   *bound = cand_value_at (loop, cand, use->stmt, nit);
4038   return true;
4039 }
4040
4041 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4042
4043 static bool
4044 determine_use_iv_cost_condition (struct ivopts_data *data,
4045                                  struct iv_use *use, struct iv_cand *cand)
4046 {
4047   tree bound = NULL_TREE, op, cond;
4048   bitmap depends_on = NULL;
4049   unsigned cost;
4050
4051   /* Only consider real candidates.  */
4052   if (!cand->iv)
4053     {
4054       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4055       return false;
4056     }
4057
4058   if (may_eliminate_iv (data, use, cand, &bound))
4059     {
4060       cost = force_var_cost (data, bound, &depends_on);
4061
4062       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4063       return cost != INFTY;
4064     }
4065
4066   /* The induction variable elimination failed; just express the original
4067      giv.  If it is compared with an invariant, note that we cannot get
4068      rid of it.  */
4069   cost = get_computation_cost (data, use, cand, false, &depends_on);
4070
4071   cond = *use->op_p;
4072   if (TREE_CODE (cond) != SSA_NAME)
4073     {
4074       op = TREE_OPERAND (cond, 0);
4075       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4076         op = TREE_OPERAND (cond, 1);
4077       if (TREE_CODE (op) == SSA_NAME)
4078         {
4079           op = get_iv (data, op)->base;
4080           fd_ivopts_data = data;
4081           walk_tree (&op, find_depends, &depends_on, NULL);
4082         }
4083     }
4084
4085   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4086   return cost != INFTY;
4087 }
4088
4089 /* Checks whether it is possible to replace the final value of USE by
4090    a direct computation.  If so, the formula is stored to *VALUE.  */
4091
4092 static bool
4093 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4094                          tree *value)
4095 {
4096   struct loop *loop = data->current_loop;
4097   edge exit;
4098   struct tree_niter_desc *niter;
4099
4100   exit = single_dom_exit (loop);
4101   if (!exit)
4102     return false;
4103
4104   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4105                               bb_for_stmt (use->stmt)));
4106
4107   niter = niter_for_single_dom_exit (data);
4108   if (!niter
4109       || !zero_p (niter->may_be_zero))
4110     return false;
4111
4112   *value = iv_value (use->iv, niter->niter);
4113
4114   return true;
4115 }
4116
4117 /* Determines cost of replacing final value of USE using CAND.  */
4118
4119 static bool
4120 determine_use_iv_cost_outer (struct ivopts_data *data,
4121                              struct iv_use *use, struct iv_cand *cand)
4122 {
4123   bitmap depends_on;
4124   unsigned cost;
4125   edge exit;
4126   tree value = NULL_TREE;
4127   struct loop *loop = data->current_loop;
4128
4129   /* The simple case first -- if we need to express value of the preserved
4130      original biv, the cost is 0.  This also prevents us from counting the
4131      cost of increment twice -- once at this use and once in the cost of
4132      the candidate.  */
4133   if (cand->pos == IP_ORIGINAL
4134       && cand->incremented_at == use->stmt)
4135     {
4136       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4137       return true;
4138     }
4139
4140   if (!cand->iv)
4141     {
4142       if (!may_replace_final_value (data, use, &value))
4143         {
4144           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4145           return false;
4146         }
4147
4148       depends_on = NULL;
4149       cost = force_var_cost (data, value, &depends_on);
4150
4151       cost /= AVG_LOOP_NITER (loop);
4152
4153       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4154       return cost != INFTY;
4155     }
4156
4157   exit = single_dom_exit (loop);
4158   if (exit)
4159     {
4160       /* If there is just a single exit, we may use value of the candidate
4161          after we take it to determine the value of use.  */
4162       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4163                                       last_stmt (exit->src));
4164       if (cost != INFTY)
4165         cost /= AVG_LOOP_NITER (loop);
4166     }
4167   else
4168     {
4169       /* Otherwise we just need to compute the iv.  */
4170       cost = get_computation_cost (data, use, cand, false, &depends_on);
4171     }
4172
4173   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4174
4175   return cost != INFTY;
4176 }
4177
4178 /* Determines cost of basing replacement of USE on CAND.  Returns false
4179    if USE cannot be based on CAND.  */
4180
4181 static bool
4182 determine_use_iv_cost (struct ivopts_data *data,
4183                        struct iv_use *use, struct iv_cand *cand)
4184 {
4185   switch (use->type)
4186     {
4187     case USE_NONLINEAR_EXPR:
4188       return determine_use_iv_cost_generic (data, use, cand);
4189
4190     case USE_OUTER:
4191       return determine_use_iv_cost_outer (data, use, cand);
4192
4193     case USE_ADDRESS:
4194       return determine_use_iv_cost_address (data, use, cand);
4195
4196     case USE_COMPARE:
4197       return determine_use_iv_cost_condition (data, use, cand);
4198
4199     default:
4200       gcc_unreachable ();
4201     }
4202 }
4203
4204 /* Determines costs of basing the use of the iv on an iv candidate.  */
4205
4206 static void
4207 determine_use_iv_costs (struct ivopts_data *data)
4208 {
4209   unsigned i, j;
4210   struct iv_use *use;
4211   struct iv_cand *cand;
4212   bitmap to_clear = BITMAP_ALLOC (NULL);
4213
4214   alloc_use_cost_map (data);
4215
4216   for (i = 0; i < n_iv_uses (data); i++)
4217     {
4218       use = iv_use (data, i);
4219
4220       if (data->consider_all_candidates)
4221         {
4222           for (j = 0; j < n_iv_cands (data); j++)
4223             {
4224               cand = iv_cand (data, j);
4225               determine_use_iv_cost (data, use, cand);
4226             }
4227         }
4228       else
4229         {
4230           bitmap_iterator bi;
4231
4232           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4233             {
4234               cand = iv_cand (data, j);
4235               if (!determine_use_iv_cost (data, use, cand))
4236                 bitmap_set_bit (to_clear, j);
4237             }
4238
4239           /* Remove the candidates for that the cost is infinite from
4240              the list of related candidates.  */
4241           bitmap_and_compl_into (use->related_cands, to_clear);
4242           bitmap_clear (to_clear);
4243         }
4244     }
4245
4246   BITMAP_FREE (to_clear);
4247
4248   if (dump_file && (dump_flags & TDF_DETAILS))
4249     {
4250       fprintf (dump_file, "Use-candidate costs:\n");
4251
4252       for (i = 0; i < n_iv_uses (data); i++)
4253         {
4254           use = iv_use (data, i);
4255
4256           fprintf (dump_file, "Use %d:\n", i);
4257           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4258           for (j = 0; j < use->n_map_members; j++)
4259             {
4260               if (!use->cost_map[j].cand
4261                   || use->cost_map[j].cost == INFTY)
4262                 continue;
4263
4264               fprintf (dump_file, "  %d\t%d\t",
4265                        use->cost_map[j].cand->id,
4266                        use->cost_map[j].cost);
4267               if (use->cost_map[j].depends_on)
4268                 bitmap_print (dump_file,
4269                               use->cost_map[j].depends_on, "","");
4270               fprintf (dump_file, "\n");
4271             }
4272
4273           fprintf (dump_file, "\n");
4274         }
4275       fprintf (dump_file, "\n");
4276     }
4277 }
4278
4279 /* Determines cost of the candidate CAND.  */
4280
4281 static void
4282 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4283 {
4284   unsigned cost_base, cost_step;
4285   tree base;
4286
4287   if (!cand->iv)
4288     {
4289       cand->cost = 0;
4290       return;
4291     }
4292
4293   /* There are two costs associated with the candidate -- its increment
4294      and its initialization.  The second is almost negligible for any loop
4295      that rolls enough, so we take it just very little into account.  */
4296
4297   base = cand->iv->base;
4298   cost_base = force_var_cost (data, base, NULL);
4299   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4300
4301   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4302
4303   /* Prefer the original iv unless we may gain something by replacing it;
4304      this is not really relevant for artificial ivs created by other
4305      passes.  */
4306   if (cand->pos == IP_ORIGINAL
4307       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4308     cand->cost--;
4309
4310   /* Prefer not to insert statements into latch unless there are some
4311      already (so that we do not create unnecessary jumps).  */
4312   if (cand->pos == IP_END
4313       && empty_block_p (ip_end_pos (data->current_loop)))
4314     cand->cost++;
4315 }
4316
4317 /* Determines costs of computation of the candidates.  */
4318
4319 static void
4320 determine_iv_costs (struct ivopts_data *data)
4321 {
4322   unsigned i;
4323
4324   if (dump_file && (dump_flags & TDF_DETAILS))
4325     {
4326       fprintf (dump_file, "Candidate costs:\n");
4327       fprintf (dump_file, "  cand\tcost\n");
4328     }
4329
4330   for (i = 0; i < n_iv_cands (data); i++)
4331     {
4332       struct iv_cand *cand = iv_cand (data, i);
4333
4334       determine_iv_cost (data, cand);
4335
4336       if (dump_file && (dump_flags & TDF_DETAILS))
4337         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4338     }
4339
4340 if (dump_file && (dump_flags & TDF_DETAILS))
4341       fprintf (dump_file, "\n");
4342 }
4343
4344 /* Calculates cost for having SIZE induction variables.  */
4345
4346 static unsigned
4347 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4348 {
4349   return global_cost_for_size (size,
4350                                loop_data (data->current_loop)->regs_used,
4351                                n_iv_uses (data));
4352 }
4353
4354 /* For each size of the induction variable set determine the penalty.  */
4355
4356 static void
4357 determine_set_costs (struct ivopts_data *data)
4358 {
4359   unsigned j, n;
4360   tree phi, op;
4361   struct loop *loop = data->current_loop;
4362   bitmap_iterator bi;
4363
4364   /* We use the following model (definitely improvable, especially the
4365      cost function -- TODO):
4366
4367      We estimate the number of registers available (using MD data), name it A.
4368
4369      We estimate the number of registers used by the loop, name it U.  This
4370      number is obtained as the number of loop phi nodes (not counting virtual
4371      registers and bivs) + the number of variables from outside of the loop.
4372
4373      We set a reserve R (free regs that are used for temporary computations,
4374      etc.).  For now the reserve is a constant 3.
4375
4376      Let I be the number of induction variables.
4377
4378      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4379         make a lot of ivs without a reason).
4380      -- if A - R < U + I <= A, the cost is I * PRES_COST
4381      -- if U + I > A, the cost is I * PRES_COST and
4382         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4383
4384   if (dump_file && (dump_flags & TDF_DETAILS))
4385     {
4386       fprintf (dump_file, "Global costs:\n");
4387       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4388       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4389       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4390       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4391     }
4392
4393   n = 0;
4394   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4395     {
4396       op = PHI_RESULT (phi);
4397
4398       if (!is_gimple_reg (op))
4399         continue;
4400
4401       if (get_iv (data, op))
4402         continue;
4403
4404       n++;
4405     }
4406
4407   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4408     {
4409       struct version_info *info = ver_info (data, j);
4410
4411       if (info->inv_id && info->has_nonlin_use)
4412         n++;
4413     }
4414
4415   loop_data (loop)->regs_used = n;
4416   if (dump_file && (dump_flags & TDF_DETAILS))
4417     fprintf (dump_file, "  regs_used %d\n", n);
4418
4419   if (dump_file && (dump_flags & TDF_DETAILS))
4420     {
4421       fprintf (dump_file, "  cost for size:\n");
4422       fprintf (dump_file, "  ivs\tcost\n");
4423       for (j = 0; j <= 2 * target_avail_regs; j++)
4424         fprintf (dump_file, "  %d\t%d\n", j,
4425                  ivopts_global_cost_for_size (data, j));
4426       fprintf (dump_file, "\n");
4427     }
4428 }
4429
4430 /* Returns true if A is a cheaper cost pair than B.  */
4431
4432 static bool
4433 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4434 {
4435   if (!a)
4436     return false;
4437
4438   if (!b)
4439     return true;
4440
4441   if (a->cost < b->cost)
4442     return true;
4443
4444   if (a->cost > b->cost)
4445     return false;
4446
4447   /* In case the costs are the same, prefer the cheaper candidate.  */
4448   if (a->cand->cost < b->cand->cost)
4449     return true;
4450
4451   return false;
4452 }
4453
4454 /* Computes the cost field of IVS structure.  */
4455
4456 static void
4457 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4458 {
4459   unsigned cost = 0;
4460
4461   cost += ivs->cand_use_cost;
4462   cost += ivs->cand_cost;
4463   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4464
4465   ivs->cost = cost;
4466 }
4467
4468 /* Remove invariants in set INVS to set IVS.  */
4469
4470 static void
4471 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4472 {
4473   bitmap_iterator bi;
4474   unsigned iid;
4475
4476   if (!invs)
4477     return;
4478
4479   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4480     {
4481       ivs->n_invariant_uses[iid]--;
4482       if (ivs->n_invariant_uses[iid] == 0)
4483         ivs->n_regs--;
4484     }
4485 }
4486
4487 /* Set USE not to be expressed by any candidate in IVS.  */
4488
4489 static void
4490 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4491                  struct iv_use *use)
4492 {
4493   unsigned uid = use->id, cid;
4494   struct cost_pair *cp;
4495
4496   cp = ivs->cand_for_use[uid];
4497   if (!cp)
4498     return;
4499   cid = cp->cand->id;
4500
4501   ivs->bad_uses++;
4502   ivs->cand_for_use[uid] = NULL;
4503   ivs->n_cand_uses[cid]--;
4504
4505   if (ivs->n_cand_uses[cid] == 0)
4506     {
4507       bitmap_clear_bit (ivs->cands, cid);
4508       /* Do not count the pseudocandidates.  */
4509       if (cp->cand->iv)
4510         ivs->n_regs--;
4511       ivs->n_cands--;
4512       ivs->cand_cost -= cp->cand->cost;
4513
4514       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4515     }
4516
4517   ivs->cand_use_cost -= cp->cost;
4518
4519   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4520   iv_ca_recount_cost (data, ivs);
4521 }
4522
4523 /* Add invariants in set INVS to set IVS.  */
4524
4525 static void
4526 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4527 {
4528   bitmap_iterator bi;
4529   unsigned iid;
4530
4531   if (!invs)
4532     return;
4533
4534   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4535     {
4536       ivs->n_invariant_uses[iid]++;
4537       if (ivs->n_invariant_uses[iid] == 1)
4538         ivs->n_regs++;
4539     }
4540 }
4541
4542 /* Set cost pair for USE in set IVS to CP.  */
4543
4544 static void
4545 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4546               struct iv_use *use, struct cost_pair *cp)
4547 {
4548   unsigned uid = use->id, cid;
4549
4550   if (ivs->cand_for_use[uid] == cp)
4551     return;
4552
4553   if (ivs->cand_for_use[uid])
4554     iv_ca_set_no_cp (data, ivs, use);
4555
4556   if (cp)
4557     {
4558       cid = cp->cand->id;
4559
4560       ivs->bad_uses--;
4561       ivs->cand_for_use[uid] = cp;
4562       ivs->n_cand_uses[cid]++;
4563       if (ivs->n_cand_uses[cid] == 1)
4564         {
4565           bitmap_set_bit (ivs->cands, cid);
4566           /* Do not count the pseudocandidates.  */
4567           if (cp->cand->iv)
4568             ivs->n_regs++;
4569           ivs->n_cands++;
4570           ivs->cand_cost += cp->cand->cost;
4571
4572           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4573         }
4574
4575       ivs->cand_use_cost += cp->cost;
4576       iv_ca_set_add_invariants (ivs, cp->depends_on);
4577       iv_ca_recount_cost (data, ivs);
4578     }
4579 }
4580
4581 /* Extend set IVS by expressing USE by some of the candidates in it
4582    if possible.  */
4583
4584 static void
4585 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4586                struct iv_use *use)
4587 {
4588   struct cost_pair *best_cp = NULL, *cp;
4589   bitmap_iterator bi;
4590   unsigned i;
4591
4592   gcc_assert (ivs->upto >= use->id);
4593
4594   if (ivs->upto == use->id)
4595     {
4596       ivs->upto++;
4597       ivs->bad_uses++;
4598     }
4599
4600   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4601     {
4602       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4603
4604       if (cheaper_cost_pair (cp, best_cp))
4605         best_cp = cp;
4606     }
4607
4608   iv_ca_set_cp (data, ivs, use, best_cp);
4609 }
4610
4611 /* Get cost for assignment IVS.  */
4612
4613 static unsigned
4614 iv_ca_cost (struct iv_ca *ivs)
4615 {
4616   return (ivs->bad_uses ? INFTY : ivs->cost);
4617 }
4618
4619 /* Returns true if all dependences of CP are among invariants in IVS.  */
4620
4621 static bool
4622 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4623 {
4624   unsigned i;
4625   bitmap_iterator bi;
4626
4627   if (!cp->depends_on)
4628     return true;
4629
4630   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4631     {
4632       if (ivs->n_invariant_uses[i] == 0)
4633         return false;
4634     }
4635
4636   return true;
4637 }
4638
4639 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4640    it before NEXT_CHANGE.  */
4641
4642 static struct iv_ca_delta *
4643 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4644                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4645 {
4646   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4647
4648   change->use = use;
4649   change->old_cp = old_cp;
4650   change->new_cp = new_cp;
4651   change->next_change = next_change;
4652
4653   return change;
4654 }
4655
4656 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4657    are rewritten.  */
4658
4659 static struct iv_ca_delta *
4660 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4661 {
4662   struct iv_ca_delta *last;
4663
4664   if (!l2)
4665     return l1;
4666
4667   if (!l1)
4668     return l2;
4669
4670   for (last = l1; last->next_change; last = last->next_change)
4671     continue;
4672   last->next_change = l2;
4673
4674   return l1;
4675 }
4676
4677 /* Returns candidate by that USE is expressed in IVS.  */
4678
4679 static struct cost_pair *
4680 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4681 {
4682   return ivs->cand_for_use[use->id];
4683 }
4684
4685 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4686
4687 static struct iv_ca_delta *
4688 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4689 {
4690   struct iv_ca_delta *act, *next, *prev = NULL;
4691   struct cost_pair *tmp;
4692
4693   for (act = delta; act; act = next)
4694     {
4695       next = act->next_change;
4696       act->next_change = prev;
4697       prev = act;
4698
4699       tmp = act->old_cp;
4700       act->old_cp = act->new_cp;
4701       act->new_cp = tmp;
4702     }
4703
4704   return prev;
4705 }
4706
4707 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4708    reverted instead.  */
4709
4710 static void
4711 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4712                     struct iv_ca_delta *delta, bool forward)
4713 {
4714   struct cost_pair *from, *to;
4715   struct iv_ca_delta *act;
4716
4717   if (!forward)
4718     delta = iv_ca_delta_reverse (delta);
4719
4720   for (act = delta; act; act = act->next_change)
4721     {
4722       from = act->old_cp;
4723       to = act->new_cp;
4724       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4725       iv_ca_set_cp (data, ivs, act->use, to);
4726     }
4727
4728   if (!forward)
4729     iv_ca_delta_reverse (delta);
4730 }
4731
4732 /* Returns true if CAND is used in IVS.  */
4733
4734 static bool
4735 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4736 {
4737   return ivs->n_cand_uses[cand->id] > 0;
4738 }
4739
4740 /* Returns number of induction variable candidates in the set IVS.  */
4741
4742 static unsigned
4743 iv_ca_n_cands (struct iv_ca *ivs)
4744 {
4745   return ivs->n_cands;
4746 }
4747
4748 /* Free the list of changes DELTA.  */
4749
4750 static void
4751 iv_ca_delta_free (struct iv_ca_delta **delta)
4752 {
4753   struct iv_ca_delta *act, *next;
4754
4755   for (act = *delta; act; act = next)
4756     {
4757       next = act->next_change;
4758       free (act);
4759     }
4760
4761   *delta = NULL;
4762 }
4763
4764 /* Allocates new iv candidates assignment.  */
4765
4766 static struct iv_ca *
4767 iv_ca_new (struct ivopts_data *data)
4768 {
4769   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4770
4771   nw->upto = 0;
4772   nw->bad_uses = 0;
4773   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4774   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4775   nw->cands = BITMAP_ALLOC (NULL);
4776   nw->n_cands = 0;
4777   nw->n_regs = 0;
4778   nw->cand_use_cost = 0;
4779   nw->cand_cost = 0;
4780   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4781   nw->cost = 0;
4782
4783   return nw;
4784 }
4785
4786 /* Free memory occupied by the set IVS.  */
4787
4788 static void
4789 iv_ca_free (struct iv_ca **ivs)
4790 {
4791   free ((*ivs)->cand_for_use);
4792   free ((*ivs)->n_cand_uses);
4793   BITMAP_FREE ((*ivs)->cands);
4794   free ((*ivs)->n_invariant_uses);
4795   free (*ivs);
4796   *ivs = NULL;
4797 }
4798
4799 /* Dumps IVS to FILE.  */
4800
4801 static void
4802 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4803 {
4804   const char *pref = "  invariants ";
4805   unsigned i;
4806
4807   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4808   bitmap_print (file, ivs->cands, "  candidates ","\n");
4809
4810   for (i = 1; i <= data->max_inv_id; i++)
4811     if (ivs->n_invariant_uses[i])
4812       {
4813         fprintf (file, "%s%d", pref, i);
4814         pref = ", ";
4815       }
4816   fprintf (file, "\n");
4817 }
4818
4819 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4820    new set, and store differences in DELTA.  Number of induction variables
4821    in the new set is stored to N_IVS.  */
4822
4823 static unsigned
4824 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4825               struct iv_cand *cand, struct iv_ca_delta **delta,
4826               unsigned *n_ivs)
4827 {
4828   unsigned i, cost;
4829   struct iv_use *use;
4830   struct cost_pair *old_cp, *new_cp;
4831
4832   *delta = NULL;
4833   for (i = 0; i < ivs->upto; i++)
4834     {
4835       use = iv_use (data, i);
4836       old_cp = iv_ca_cand_for_use (ivs, use);
4837
4838       if (old_cp
4839           && old_cp->cand == cand)
4840         continue;
4841
4842       new_cp = get_use_iv_cost (data, use, cand);
4843       if (!new_cp)
4844         continue;
4845
4846       if (!iv_ca_has_deps (ivs, new_cp))
4847         continue;
4848
4849       if (!cheaper_cost_pair (new_cp, old_cp))
4850         continue;
4851
4852       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4853     }
4854
4855   iv_ca_delta_commit (data, ivs, *delta, true);
4856   cost = iv_ca_cost (ivs);
4857   if (n_ivs)
4858     *n_ivs = iv_ca_n_cands (ivs);
4859   iv_ca_delta_commit (data, ivs, *delta, false);
4860
4861   return cost;
4862 }
4863
4864 /* Try narrowing set IVS by removing CAND.  Return the cost of
4865    the new set and store the differences in DELTA.  */
4866
4867 static unsigned
4868 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4869               struct iv_cand *cand, struct iv_ca_delta **delta)
4870 {
4871   unsigned i, ci;
4872   struct iv_use *use;
4873   struct cost_pair *old_cp, *new_cp, *cp;
4874   bitmap_iterator bi;
4875   struct iv_cand *cnd;
4876   unsigned cost;
4877
4878   *delta = NULL;
4879   for (i = 0; i < n_iv_uses (data); i++)
4880     {
4881       use = iv_use (data, i);
4882
4883       old_cp = iv_ca_cand_for_use (ivs, use);
4884       if (old_cp->cand != cand)
4885         continue;
4886
4887       new_cp = NULL;
4888
4889       if (data->consider_all_candidates)
4890         {
4891           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4892             {
4893               if (ci == cand->id)
4894                 continue;
4895
4896               cnd = iv_cand (data, ci);
4897
4898               cp = get_use_iv_cost (data, use, cnd);
4899               if (!cp)
4900                 continue;
4901               if (!iv_ca_has_deps (ivs, cp))
4902                 continue;
4903
4904               if (!cheaper_cost_pair (cp, new_cp))
4905                 continue;
4906
4907               new_cp = cp;
4908             }
4909         }
4910       else
4911         {
4912           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4913             {
4914               if (ci == cand->id)
4915                 continue;
4916
4917               cnd = iv_cand (data, ci);
4918
4919               cp = get_use_iv_cost (data, use, cnd);
4920               if (!cp)
4921                 continue;
4922               if (!iv_ca_has_deps (ivs, cp))
4923                 continue;
4924
4925               if (!cheaper_cost_pair (cp, new_cp))
4926                 continue;
4927
4928               new_cp = cp;
4929             }
4930         }
4931
4932       if (!new_cp)
4933         {
4934           iv_ca_delta_free (delta);
4935           return INFTY;
4936         }
4937
4938       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4939     }
4940
4941   iv_ca_delta_commit (data, ivs, *delta, true);
4942   cost = iv_ca_cost (ivs);
4943   iv_ca_delta_commit (data, ivs, *delta, false);
4944
4945   return cost;
4946 }
4947
4948 /* Try optimizing the set of candidates IVS by removing candidates different
4949    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4950    differences in DELTA.  */
4951
4952 static unsigned
4953 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4954              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4955 {
4956   bitmap_iterator bi;
4957   struct iv_ca_delta *act_delta, *best_delta;
4958   unsigned i, best_cost, acost;
4959   struct iv_cand *cand;
4960
4961   best_delta = NULL;
4962   best_cost = iv_ca_cost (ivs);
4963
4964   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4965     {
4966       cand = iv_cand (data, i);
4967
4968       if (cand == except_cand)
4969         continue;
4970
4971       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4972
4973       if (acost < best_cost)
4974         {
4975           best_cost = acost;
4976           iv_ca_delta_free (&best_delta);
4977           best_delta = act_delta;
4978         }
4979       else
4980         iv_ca_delta_free (&act_delta);
4981     }
4982
4983   if (!best_delta)
4984     {
4985       *delta = NULL;
4986       return best_cost;
4987     }
4988
4989   /* Recurse to possibly remove other unnecessary ivs.  */
4990   iv_ca_delta_commit (data, ivs, best_delta, true);
4991   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4992   iv_ca_delta_commit (data, ivs, best_delta, false);
4993   *delta = iv_ca_delta_join (best_delta, *delta);
4994   return best_cost;
4995 }
4996
4997 /* Tries to extend the sets IVS in the best possible way in order
4998    to express the USE.  */
4999
5000 static bool
5001 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5002                   struct iv_use *use)
5003 {
5004   unsigned best_cost, act_cost;
5005   unsigned i;
5006   bitmap_iterator bi;
5007   struct iv_cand *cand;
5008   struct iv_ca_delta *best_delta = NULL, *act_delta;
5009   struct cost_pair *cp;
5010
5011   iv_ca_add_use (data, ivs, use);
5012   best_cost = iv_ca_cost (ivs);
5013
5014   cp = iv_ca_cand_for_use (ivs, use);
5015   if (cp)
5016     {
5017       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5018       iv_ca_set_no_cp (data, ivs, use);
5019     }
5020
5021   /* First try important candidates.  Only if it fails, try the specific ones.
5022      Rationale -- in loops with many variables the best choice often is to use
5023      just one generic biv.  If we added here many ivs specific to the uses,
5024      the optimization algorithm later would be likely to get stuck in a local
5025      minimum, thus causing us to create too many ivs.  The approach from
5026      few ivs to more seems more likely to be successful -- starting from few
5027      ivs, replacing an expensive use by a specific iv should always be a
5028      win.  */
5029   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5030     {
5031       cand = iv_cand (data, i);
5032
5033       if (iv_ca_cand_used_p (ivs, cand))
5034         continue;
5035
5036       cp = get_use_iv_cost (data, use, cand);
5037       if (!cp)
5038         continue;
5039
5040       iv_ca_set_cp (data, ivs, use, cp);
5041       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5042       iv_ca_set_no_cp (data, ivs, use);
5043       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5044
5045       if (act_cost < best_cost)
5046         {
5047           best_cost = act_cost;
5048
5049           iv_ca_delta_free (&best_delta);
5050           best_delta = act_delta;
5051         }
5052       else
5053         iv_ca_delta_free (&act_delta);
5054     }
5055
5056   if (best_cost == INFTY)
5057     {
5058       for (i = 0; i < use->n_map_members; i++)
5059         {
5060           cp = use->cost_map + i;
5061           cand = cp->cand;
5062           if (!cand)
5063             continue;
5064
5065           /* Already tried this.  */
5066           if (cand->important)
5067             continue;
5068
5069           if (iv_ca_cand_used_p (ivs, cand))
5070             continue;
5071
5072           act_delta = NULL;
5073           iv_ca_set_cp (data, ivs, use, cp);
5074           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5075           iv_ca_set_no_cp (data, ivs, use);
5076           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5077                                        cp, act_delta);
5078
5079           if (act_cost < best_cost)
5080             {
5081               best_cost = act_cost;
5082
5083               if (best_delta)
5084                 iv_ca_delta_free (&best_delta);
5085               best_delta = act_delta;
5086             }
5087           else
5088             iv_ca_delta_free (&act_delta);
5089         }
5090     }
5091
5092   iv_ca_delta_commit (data, ivs, best_delta, true);
5093   iv_ca_delta_free (&best_delta);
5094
5095   return (best_cost != INFTY);
5096 }
5097
5098 /* Finds an initial assignment of candidates to uses.  */
5099
5100 static struct iv_ca *
5101 get_initial_solution (struct ivopts_data *data)
5102 {
5103   struct iv_ca *ivs = iv_ca_new (data);
5104   unsigned i;
5105
5106   for (i = 0; i < n_iv_uses (data); i++)
5107     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5108       {
5109         iv_ca_free (&ivs);
5110         return NULL;
5111       }
5112
5113   return ivs;
5114 }
5115
5116 /* Tries to improve set of induction variables IVS.  */
5117
5118 static bool
5119 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5120 {
5121   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5122   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5123   struct iv_cand *cand;
5124
5125   /* Try extending the set of induction variables by one.  */
5126   for (i = 0; i < n_iv_cands (data); i++)
5127     {
5128       cand = iv_cand (data, i);
5129
5130       if (iv_ca_cand_used_p (ivs, cand))
5131         continue;
5132
5133       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5134       if (!act_delta)
5135         continue;
5136
5137       /* If we successfully added the candidate and the set is small enough,
5138          try optimizing it by removing other candidates.  */
5139       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5140         {
5141           iv_ca_delta_commit (data, ivs, act_delta, true);
5142           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5143           iv_ca_delta_commit (data, ivs, act_delta, false);
5144           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5145         }
5146
5147       if (acost < best_cost)
5148         {
5149           best_cost = acost;
5150           iv_ca_delta_free (&best_delta);
5151           best_delta = act_delta;
5152         }
5153       else
5154         iv_ca_delta_free (&act_delta);
5155     }
5156
5157   if (!best_delta)
5158     {
5159       /* Try removing the candidates from the set instead.  */
5160       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5161
5162       /* Nothing more we can do.  */
5163       if (!best_delta)
5164         return false;
5165     }
5166
5167   iv_ca_delta_commit (data, ivs, best_delta, true);
5168   gcc_assert (best_cost == iv_ca_cost (ivs));
5169   iv_ca_delta_free (&best_delta);
5170   return true;
5171 }
5172
5173 /* Attempts to find the optimal set of induction variables.  We do simple
5174    greedy heuristic -- we try to replace at most one candidate in the selected
5175    solution and remove the unused ivs while this improves the cost.  */
5176
5177 static struct iv_ca *
5178 find_optimal_iv_set (struct ivopts_data *data)
5179 {
5180   unsigned i;
5181   struct iv_ca *set;
5182   struct iv_use *use;
5183
5184   /* Get the initial solution.  */
5185   set = get_initial_solution (data);
5186   if (!set)
5187     {
5188       if (dump_file && (dump_flags & TDF_DETAILS))
5189         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5190       return NULL;
5191     }
5192
5193   if (dump_file && (dump_flags & TDF_DETAILS))
5194     {
5195       fprintf (dump_file, "Initial set of candidates:\n");
5196       iv_ca_dump (data, dump_file, set);
5197     }
5198
5199   while (try_improve_iv_set (data, set))
5200     {
5201       if (dump_file && (dump_flags & TDF_DETAILS))
5202         {
5203           fprintf (dump_file, "Improved to:\n");
5204           iv_ca_dump (data, dump_file, set);
5205         }
5206     }
5207
5208   if (dump_file && (dump_flags & TDF_DETAILS))
5209     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5210
5211   for (i = 0; i < n_iv_uses (data); i++)
5212     {
5213       use = iv_use (data, i);
5214       use->selected = iv_ca_cand_for_use (set, use)->cand;
5215     }
5216
5217   return set;
5218 }
5219
5220 /* Creates a new induction variable corresponding to CAND.  */
5221
5222 static void
5223 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5224 {
5225   block_stmt_iterator incr_pos;
5226   tree base;
5227   bool after = false;
5228
5229   if (!cand->iv)
5230     return;
5231
5232   switch (cand->pos)
5233     {
5234     case IP_NORMAL:
5235       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5236       break;
5237
5238     case IP_END:
5239       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5240       after = true;
5241       break;
5242
5243     case IP_ORIGINAL:
5244       /* Mark that the iv is preserved.  */
5245       name_info (data, cand->var_before)->preserve_biv = true;
5246       name_info (data, cand->var_after)->preserve_biv = true;
5247
5248       /* Rewrite the increment so that it uses var_before directly.  */
5249       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5250
5251       return;
5252     }
5253
5254   gimple_add_tmp_var (cand->var_before);
5255   add_referenced_tmp_var (cand->var_before);
5256
5257   base = unshare_expr (cand->iv->base);
5258
5259   create_iv (base, unshare_expr (cand->iv->step),
5260              cand->var_before, data->current_loop,
5261              &incr_pos, after, &cand->var_before, &cand->var_after);
5262 }
5263
5264 /* Creates new induction variables described in SET.  */
5265
5266 static void
5267 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5268 {
5269   unsigned i;
5270   struct iv_cand *cand;
5271   bitmap_iterator bi;
5272
5273   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5274     {
5275       cand = iv_cand (data, i);
5276       create_new_iv (data, cand);
5277     }
5278 }
5279
5280 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5281    is true, remove also the ssa name defined by the statement.  */
5282
5283 static void
5284 remove_statement (tree stmt, bool including_defined_name)
5285 {
5286   if (TREE_CODE (stmt) == PHI_NODE)
5287     {
5288       if (!including_defined_name)
5289         {
5290           /* Prevent the ssa name defined by the statement from being removed.  */
5291           SET_PHI_RESULT (stmt, NULL);
5292         }
5293       remove_phi_node (stmt, NULL_TREE);
5294     }
5295   else
5296     {
5297       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5298
5299       bsi_remove (&bsi);
5300     }
5301 }
5302
5303 /* Rewrites USE (definition of iv used in a nonlinear expression)
5304    using candidate CAND.  */
5305
5306 static void
5307 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5308                             struct iv_use *use, struct iv_cand *cand)
5309 {
5310   tree comp;
5311   tree op, stmts, tgt, ass;
5312   block_stmt_iterator bsi, pbsi;
5313
5314   /* An important special case -- if we are asked to express value of
5315      the original iv by itself, just exit; there is no need to
5316      introduce a new computation (that might also need casting the
5317      variable to unsigned and back).  */
5318   if (cand->pos == IP_ORIGINAL
5319       && TREE_CODE (use->stmt) == MODIFY_EXPR
5320       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5321     {
5322       op = TREE_OPERAND (use->stmt, 1);
5323
5324       /* Be a bit careful.  In case variable is expressed in some
5325          complicated way, rewrite it so that we may get rid of this
5326          complicated expression.  */
5327       if ((TREE_CODE (op) == PLUS_EXPR
5328            || TREE_CODE (op) == MINUS_EXPR)
5329           && TREE_OPERAND (op, 0) == cand->var_before
5330           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5331         return;
5332     }
5333
5334   comp = get_computation (data->current_loop, use, cand);
5335   switch (TREE_CODE (use->stmt))
5336     {
5337     case PHI_NODE:
5338       tgt = PHI_RESULT (use->stmt);
5339
5340       /* If we should keep the biv, do not replace it.  */
5341       if (name_info (data, tgt)->preserve_biv)
5342         return;
5343
5344       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5345       while (!bsi_end_p (pbsi)
5346              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5347         {
5348           bsi = pbsi;
5349           bsi_next (&pbsi);
5350         }
5351       break;
5352
5353     case MODIFY_EXPR:
5354       tgt = TREE_OPERAND (use->stmt, 0);
5355       bsi = bsi_for_stmt (use->stmt);
5356       break;
5357
5358     default:
5359       gcc_unreachable ();
5360     }
5361
5362   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5363
5364   if (TREE_CODE (use->stmt) == PHI_NODE)
5365     {
5366       if (stmts)
5367         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5368       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5369       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5370       remove_statement (use->stmt, false);
5371       SSA_NAME_DEF_STMT (tgt) = ass;
5372     }
5373   else
5374     {
5375       if (stmts)
5376         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5377       TREE_OPERAND (use->stmt, 1) = op;
5378     }
5379 }
5380
5381 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5382    for_each_index.  */
5383
5384 static bool
5385 idx_remove_ssa_names (tree base, tree *idx,
5386                       void *data ATTRIBUTE_UNUSED)
5387 {
5388   tree *op;
5389
5390   if (TREE_CODE (*idx) == SSA_NAME)
5391     *idx = SSA_NAME_VAR (*idx);
5392
5393   if (TREE_CODE (base) == ARRAY_REF)
5394     {
5395       op = &TREE_OPERAND (base, 2);
5396       if (*op
5397           && TREE_CODE (*op) == SSA_NAME)
5398         *op = SSA_NAME_VAR (*op);
5399       op = &TREE_OPERAND (base, 3);
5400       if (*op
5401           && TREE_CODE (*op) == SSA_NAME)
5402         *op = SSA_NAME_VAR (*op);
5403     }
5404
5405   return true;
5406 }
5407
5408 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5409
5410 static tree
5411 unshare_and_remove_ssa_names (tree ref)
5412 {
5413   ref = unshare_expr (ref);
5414   for_each_index (&ref, idx_remove_ssa_names, NULL);
5415
5416   return ref;
5417 }
5418
5419 /* Extract the alias analysis info for the memory reference REF.  There are
5420    several ways how this information may be stored and what precisely is
5421    its semantics depending on the type of the reference, but there always is
5422    somewhere hidden one _DECL node that is used to determine the set of
5423    virtual operands for the reference.  The code below deciphers this jungle
5424    and extracts this single useful piece of information.  */
5425
5426 static tree
5427 get_ref_tag (tree ref)
5428 {
5429   tree var = get_base_address (ref);
5430   tree tag;
5431
5432   if (!var)
5433     return NULL_TREE;
5434
5435   if (TREE_CODE (var) == INDIRECT_REF)
5436     var = TREE_OPERAND (var, 0);
5437   if (TREE_CODE (var) == SSA_NAME)
5438     {
5439       if (SSA_NAME_PTR_INFO (var))
5440         {
5441           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5442           if (tag)
5443             return tag;
5444         }
5445
5446       var = SSA_NAME_VAR (var);
5447     }
5448
5449   if (DECL_P (var))
5450     {
5451       tag = var_ann (var)->type_mem_tag;
5452       if (tag)
5453         return tag;
5454
5455       return var;
5456     }
5457
5458   return NULL_TREE;
5459 }
5460
5461 /* Copies the reference information from OLD_REF to NEW_REF.  */
5462
5463 static void
5464 copy_ref_info (tree new_ref, tree old_ref)
5465 {
5466   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5467     copy_mem_ref_info (new_ref, old_ref);
5468   else
5469     {
5470       TMR_TAG (new_ref) = get_ref_tag (old_ref);
5471       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5472     }
5473 }
5474
5475 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5476
5477 static void
5478 rewrite_use_address (struct ivopts_data *data,
5479                      struct iv_use *use, struct iv_cand *cand)
5480 {
5481   struct affine_tree_combination aff;
5482   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5483   tree ref;
5484
5485   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5486   unshare_aff_combination (&aff);
5487
5488   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5489   copy_ref_info (ref, *use->op_p);
5490   *use->op_p = ref;
5491 }
5492
5493 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5494    candidate CAND.  */
5495
5496 static void
5497 rewrite_use_compare (struct ivopts_data *data,
5498                      struct iv_use *use, struct iv_cand *cand)
5499 {
5500   tree comp;
5501   tree *op_p, cond, op, stmts, bound;
5502   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5503   enum tree_code compare;
5504   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5505
5506   bound = cp->value;
5507   if (bound)
5508     {
5509       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5510       tree var_type = TREE_TYPE (var);
5511
5512       compare = iv_elimination_compare (data, use);
5513       bound = fold_convert (var_type, bound);
5514       op = force_gimple_operand (unshare_expr (bound), &stmts,
5515                                  true, NULL_TREE);
5516
5517       if (stmts)
5518         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5519
5520       *use->op_p = build2 (compare, boolean_type_node, var, op);
5521       update_stmt (use->stmt);
5522       return;
5523     }
5524
5525   /* The induction variable elimination failed; just express the original
5526      giv.  */
5527   comp = get_computation (data->current_loop, use, cand);
5528
5529   cond = *use->op_p;
5530   op_p = &TREE_OPERAND (cond, 0);
5531   if (TREE_CODE (*op_p) != SSA_NAME
5532       || zero_p (get_iv (data, *op_p)->step))
5533     op_p = &TREE_OPERAND (cond, 1);
5534
5535   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5536   if (stmts)
5537     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5538
5539   *op_p = op;
5540 }
5541
5542 /* Ensure that operand *OP_P may be used at the end of EXIT without
5543    violating loop closed ssa form.  */
5544
5545 static void
5546 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5547 {
5548   basic_block def_bb;
5549   struct loop *def_loop;
5550   tree phi, use;
5551
5552   use = USE_FROM_PTR (op_p);
5553   if (TREE_CODE (use) != SSA_NAME)
5554     return;
5555
5556   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5557   if (!def_bb)
5558     return;
5559
5560   def_loop = def_bb->loop_father;
5561   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5562     return;
5563
5564   /* Try finding a phi node that copies the value out of the loop.  */
5565   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5566     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5567       break;
5568
5569   if (!phi)
5570     {
5571       /* Create such a phi node.  */
5572       tree new_name = duplicate_ssa_name (use, NULL);
5573
5574       phi = create_phi_node (new_name, exit->dest);
5575       SSA_NAME_DEF_STMT (new_name) = phi;
5576       add_phi_arg (phi, use, exit);
5577     }
5578
5579   SET_USE (op_p, PHI_RESULT (phi));
5580 }
5581
5582 /* Ensure that operands of STMT may be used at the end of EXIT without
5583    violating loop closed ssa form.  */
5584
5585 static void
5586 protect_loop_closed_ssa_form (edge exit, tree stmt)
5587 {
5588   ssa_op_iter iter;
5589   use_operand_p use_p;
5590
5591   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5592     protect_loop_closed_ssa_form_use (exit, use_p);
5593 }
5594
5595 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5596    so that they are emitted on the correct place, and so that the loop closed
5597    ssa form is preserved.  */
5598
5599 static void
5600 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5601 {
5602   tree_stmt_iterator tsi;
5603   block_stmt_iterator bsi;
5604   tree phi, stmt, def, next;
5605
5606   if (!single_pred_p (exit->dest))
5607     split_loop_exit_edge (exit);
5608
5609   /* Ensure there is label in exit->dest, so that we can
5610      insert after it.  */
5611   tree_block_label (exit->dest);
5612   bsi = bsi_after_labels (exit->dest);
5613
5614   if (TREE_CODE (stmts) == STATEMENT_LIST)
5615     {
5616       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5617         {
5618           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5619           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5620         }
5621     }
5622   else
5623     {
5624       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5625       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5626     }
5627
5628   if (!op)
5629     return;
5630
5631   for (phi = phi_nodes (exit->dest); phi; phi = next)
5632     {
5633       next = PHI_CHAIN (phi);
5634
5635       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5636         {
5637           def = PHI_RESULT (phi);
5638           remove_statement (phi, false);
5639           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5640                         def, op);
5641           SSA_NAME_DEF_STMT (def) = stmt;
5642           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5643         }
5644     }
5645 }
5646
5647 /* Rewrites the final value of USE (that is only needed outside of the loop)
5648    using candidate CAND.  */
5649
5650 static void
5651 rewrite_use_outer (struct ivopts_data *data,
5652                    struct iv_use *use, struct iv_cand *cand)
5653 {
5654   edge exit;
5655   tree value, op, stmts, tgt;
5656   tree phi;
5657
5658   switch (TREE_CODE (use->stmt))
5659     {
5660     case PHI_NODE:
5661       tgt = PHI_RESULT (use->stmt);
5662       break;
5663     case MODIFY_EXPR:
5664       tgt = TREE_OPERAND (use->stmt, 0);
5665       break;
5666     default:
5667       gcc_unreachable ();
5668     }
5669
5670   exit = single_dom_exit (data->current_loop);
5671
5672   if (exit)
5673     {
5674       if (!cand->iv)
5675         {
5676           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5677           value = cp->value;
5678         }
5679       else
5680         value = get_computation_at (data->current_loop,
5681                                     use, cand, last_stmt (exit->src));
5682
5683       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5684
5685       /* If we will preserve the iv anyway and we would need to perform
5686          some computation to replace the final value, do nothing.  */
5687       if (stmts && name_info (data, tgt)->preserve_biv)
5688         return;
5689
5690       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5691         {
5692           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5693
5694           if (USE_FROM_PTR (use_p) == tgt)
5695             SET_USE (use_p, op);
5696         }
5697
5698       if (stmts)
5699         compute_phi_arg_on_exit (exit, stmts, op);
5700
5701       /* Enable removal of the statement.  We cannot remove it directly,
5702          since we may still need the aliasing information attached to the
5703          ssa name defined by it.  */
5704       name_info (data, tgt)->iv->have_use_for = false;
5705       return;
5706     }
5707
5708   /* If the variable is going to be preserved anyway, there is nothing to
5709      do.  */
5710   if (name_info (data, tgt)->preserve_biv)
5711     return;
5712
5713   /* Otherwise we just need to compute the iv.  */
5714   rewrite_use_nonlinear_expr (data, use, cand);
5715 }
5716
5717 /* Rewrites USE using candidate CAND.  */
5718
5719 static void
5720 rewrite_use (struct ivopts_data *data,
5721              struct iv_use *use, struct iv_cand *cand)
5722 {
5723   switch (use->type)
5724     {
5725       case USE_NONLINEAR_EXPR:
5726         rewrite_use_nonlinear_expr (data, use, cand);
5727         break;
5728
5729       case USE_OUTER:
5730         rewrite_use_outer (data, use, cand);
5731         break;
5732
5733       case USE_ADDRESS:
5734         rewrite_use_address (data, use, cand);
5735         break;
5736
5737       case USE_COMPARE:
5738         rewrite_use_compare (data, use, cand);
5739         break;
5740
5741       default:
5742         gcc_unreachable ();
5743     }
5744   update_stmt (use->stmt);
5745 }
5746
5747 /* Rewrite the uses using the selected induction variables.  */
5748
5749 static void
5750 rewrite_uses (struct ivopts_data *data)
5751 {
5752   unsigned i;
5753   struct iv_cand *cand;
5754   struct iv_use *use;
5755
5756   for (i = 0; i < n_iv_uses (data); i++)
5757     {
5758       use = iv_use (data, i);
5759       cand = use->selected;
5760       gcc_assert (cand);
5761
5762       rewrite_use (data, use, cand);
5763     }
5764 }
5765
5766 /* Removes the ivs that are not used after rewriting.  */
5767
5768 static void
5769 remove_unused_ivs (struct ivopts_data *data)
5770 {
5771   unsigned j;
5772   bitmap_iterator bi;
5773
5774   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5775     {
5776       struct version_info *info;
5777
5778       info = ver_info (data, j);
5779       if (info->iv
5780           && !zero_p (info->iv->step)
5781           && !info->inv_id
5782           && !info->iv->have_use_for
5783           && !info->preserve_biv)
5784         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5785     }
5786 }
5787
5788 /* Frees data allocated by the optimization of a single loop.  */
5789
5790 static void
5791 free_loop_data (struct ivopts_data *data)
5792 {
5793   unsigned i, j;
5794   bitmap_iterator bi;
5795   tree obj;
5796
5797   htab_empty (data->niters);
5798
5799   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5800     {
5801       struct version_info *info;
5802
5803       info = ver_info (data, i);
5804       if (info->iv)
5805         free (info->iv);
5806       info->iv = NULL;
5807       info->has_nonlin_use = false;
5808       info->preserve_biv = false;
5809       info->inv_id = 0;
5810     }
5811   bitmap_clear (data->relevant);
5812   bitmap_clear (data->important_candidates);
5813
5814   for (i = 0; i < n_iv_uses (data); i++)
5815     {
5816       struct iv_use *use = iv_use (data, i);
5817
5818       free (use->iv);
5819       BITMAP_FREE (use->related_cands);
5820       for (j = 0; j < use->n_map_members; j++)
5821         if (use->cost_map[j].depends_on)
5822           BITMAP_FREE (use->cost_map[j].depends_on);
5823       free (use->cost_map);
5824       free (use);
5825     }
5826   VEC_truncate (iv_use_p, data->iv_uses, 0);
5827
5828   for (i = 0; i < n_iv_cands (data); i++)
5829     {
5830       struct iv_cand *cand = iv_cand (data, i);
5831
5832       if (cand->iv)
5833         free (cand->iv);
5834       if (cand->depends_on)
5835         BITMAP_FREE (cand->depends_on);
5836       free (cand);
5837     }
5838   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5839
5840   if (data->version_info_size < num_ssa_names)
5841     {
5842       data->version_info_size = 2 * num_ssa_names;
5843       free (data->version_info);
5844       data->version_info = xcalloc (data->version_info_size,
5845                                     sizeof (struct version_info));
5846     }
5847
5848   data->max_inv_id = 0;
5849
5850   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5851     SET_DECL_RTL (obj, NULL_RTX);
5852
5853   VEC_truncate (tree, decl_rtl_to_reset, 0);
5854 }
5855
5856 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5857    loop tree.  */
5858
5859 static void
5860 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5861 {
5862   unsigned i;
5863
5864   for (i = 1; i < loops->num; i++)
5865     if (loops->parray[i])
5866       {
5867         free (loops->parray[i]->aux);
5868         loops->parray[i]->aux = NULL;
5869       }
5870
5871   free_loop_data (data);
5872   free (data->version_info);
5873   BITMAP_FREE (data->relevant);
5874   BITMAP_FREE (data->important_candidates);
5875   htab_delete (data->niters);
5876
5877   VEC_free (tree, heap, decl_rtl_to_reset);
5878   VEC_free (iv_use_p, heap, data->iv_uses);
5879   VEC_free (iv_cand_p, heap, data->iv_candidates);
5880 }
5881
5882 /* Optimizes the LOOP.  Returns true if anything changed.  */
5883
5884 static bool
5885 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5886 {
5887   bool changed = false;
5888   struct iv_ca *iv_ca;
5889   edge exit;
5890
5891   data->current_loop = loop;
5892
5893   if (dump_file && (dump_flags & TDF_DETAILS))
5894     {
5895       fprintf (dump_file, "Processing loop %d\n", loop->num);
5896
5897       exit = single_dom_exit (loop);
5898       if (exit)
5899         {
5900           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5901                    exit->src->index, exit->dest->index);
5902           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5903           fprintf (dump_file, "\n");
5904         }
5905
5906       fprintf (dump_file, "\n");
5907     }
5908
5909   /* For each ssa name determines whether it behaves as an induction variable
5910      in some loop.  */
5911   if (!find_induction_variables (data))
5912     goto finish;
5913
5914   /* Finds interesting uses (item 1).  */
5915   find_interesting_uses (data);
5916   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5917     goto finish;
5918
5919   /* Finds candidates for the induction variables (item 2).  */
5920   find_iv_candidates (data);
5921
5922   /* Calculates the costs (item 3, part 1).  */
5923   determine_use_iv_costs (data);
5924   determine_iv_costs (data);
5925   determine_set_costs (data);
5926
5927   /* Find the optimal set of induction variables (item 3, part 2).  */
5928   iv_ca = find_optimal_iv_set (data);
5929   if (!iv_ca)
5930     goto finish;
5931   changed = true;
5932
5933   /* Create the new induction variables (item 4, part 1).  */
5934   create_new_ivs (data, iv_ca);
5935   iv_ca_free (&iv_ca);
5936
5937   /* Rewrite the uses (item 4, part 2).  */
5938   rewrite_uses (data);
5939
5940   /* Remove the ivs that are unused after rewriting.  */
5941   remove_unused_ivs (data);
5942
5943   /* We have changed the structure of induction variables; it might happen
5944      that definitions in the scev database refer to some of them that were
5945      eliminated.  */
5946   scev_reset ();
5947
5948 finish:
5949   free_loop_data (data);
5950
5951   return changed;
5952 }
5953
5954 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5955
5956 void
5957 tree_ssa_iv_optimize (struct loops *loops)
5958 {
5959   struct loop *loop;
5960   struct ivopts_data data;
5961
5962   tree_ssa_iv_optimize_init (loops, &data);
5963
5964   /* Optimize the loops starting with the innermost ones.  */
5965   loop = loops->tree_root;
5966   while (loop->inner)
5967     loop = loop->inner;
5968
5969   /* Scan the loops, inner ones first.  */
5970   while (loop != loops->tree_root)
5971     {
5972       if (dump_file && (dump_flags & TDF_DETAILS))
5973         flow_loop_dump (loop, dump_file, NULL, 1);
5974
5975       tree_ssa_iv_optimize_loop (&data, loop);
5976
5977       if (loop->next)
5978         {
5979           loop = loop->next;
5980           while (loop->inner)
5981             loop = loop->inner;
5982         }
5983       else
5984         loop = loop->outer;
5985     }
5986
5987   tree_ssa_iv_optimize_finalize (loops, &data);
5988 }