gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194   bitmap depends_on;    /* The list of invariants that are used in step of the
 195                            biv.  */
 196 };
 197
 198 /* The data used by the induction variable optimizations.  */
 199
 200 typedef struct iv_use *iv_use_p;
 201 DEF_VEC_P(iv_use_p);
 202 DEF_VEC_ALLOC_P(iv_use_p,heap);
 203
 204 typedef struct iv_cand *iv_cand_p;
 205 DEF_VEC_P(iv_cand_p);
 206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 207
 208 struct ivopts_data
 209 {
 210   /* The currently optimized loop.  */
 211   struct loop *current_loop;
 212
 213   /* Numbers of iterations for all exits of the current loop.  */
 214   htab_t niters;
 215
 216   /* The size of version_info array allocated.  */
 217   unsigned version_info_size;
 218
 219   /* The array of information for the ssa names.  */
 220   struct version_info *version_info;
 221
 222   /* The bitmap of indices in version_info whose value was changed.  */
 223   bitmap relevant;
 224
 225   /* The maximum invariant id.  */
 226   unsigned max_inv_id;
 227
 228   /* The uses of induction variables.  */
 229   VEC(iv_use_p,heap) *iv_uses;
 230
 231   /* The candidates.  */
 232   VEC(iv_cand_p,heap) *iv_candidates;
 233
 234   /* A bitmap of important candidates.  */
 235   bitmap important_candidates;
 236
 237   /* Whether to consider just related and important candidates when replacing a
 238      use.  */
 239   bool consider_all_candidates;
 240 };
 241
 242 /* An assignment of iv candidates to uses.  */
 243
 244 struct iv_ca
 245 {
 246   /* The number of uses covered by the assignment.  */
 247   unsigned upto;
 248
 249   /* Number of uses that cannot be expressed by the candidates in the set.  */
 250   unsigned bad_uses;
 251
 252   /* Candidate assigned to a use, together with the related costs.  */
 253   struct cost_pair **cand_for_use;
 254
 255   /* Number of times each candidate is used.  */
 256   unsigned *n_cand_uses;
 257
 258   /* The candidates used.  */
 259   bitmap cands;
 260
 261   /* The number of candidates in the set.  */
 262   unsigned n_cands;
 263
 264   /* Total number of registers needed.  */
 265   unsigned n_regs;
 266
 267   /* Total cost of expressing uses.  */
 268   unsigned cand_use_cost;
 269
 270   /* Total cost of candidates.  */
 271   unsigned cand_cost;
 272
 273   /* Number of times each invariant is used.  */
 274   unsigned *n_invariant_uses;
 275
 276   /* Total cost of the assignment.  */
 277   unsigned cost;
 278 };
 279
 280 /* Difference of two iv candidate assignments.  */
 281
 282 struct iv_ca_delta
 283 {
 284   /* Changed use.  */
 285   struct iv_use *use;
 286
 287   /* An old assignment (for rollback purposes).  */
 288   struct cost_pair *old_cp;
 289
 290   /* A new assignment.  */
 291   struct cost_pair *new_cp;
 292
 293   /* Next change in the list.  */
 294   struct iv_ca_delta *next_change;
 295 };
 296
 297 /* Bound on number of candidates below that all candidates are considered.  */
 298
 299 #define CONSIDER_ALL_CANDIDATES_BOUND \
 300   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 301
 302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 303    optimizing such a loop would help, and it would take ages).  */
 304
 305 #define MAX_CONSIDERED_USES \
 306   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 307
 308 /* If there are at most this number of ivs in the set, try removing unnecessary
 309    ivs from the set always.  */
 310
 311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 313
 314 /* The list of trees for that the decl_rtl field must be reset is stored
 315    here.  */
 316
 317 static VEC(tree,heap) *decl_rtl_to_reset;
 318
 319 /* Number of uses recorded in DATA.  */
 320
 321 static inline unsigned
 322 n_iv_uses (struct ivopts_data *data)
 323 {
 324   return VEC_length (iv_use_p, data->iv_uses);
 325 }
 326
 327 /* Ith use recorded in DATA.  */
 328
 329 static inline struct iv_use *
 330 iv_use (struct ivopts_data *data, unsigned i)
 331 {
 332   return VEC_index (iv_use_p, data->iv_uses, i);
 333 }
 334
 335 /* Number of candidates recorded in DATA.  */
 336
 337 static inline unsigned
 338 n_iv_cands (struct ivopts_data *data)
 339 {
 340   return VEC_length (iv_cand_p, data->iv_candidates);
 341 }
 342
 343 /* Ith candidate recorded in DATA.  */
 344
 345 static inline struct iv_cand *
 346 iv_cand (struct ivopts_data *data, unsigned i)
 347 {
 348   return VEC_index (iv_cand_p, data->iv_candidates, i);
 349 }
 350
 351 /* The data for LOOP.  */
 352
 353 static inline struct loop_data *
 354 loop_data (struct loop *loop)
 355 {
 356   return loop->aux;
 357 }
 358
 359 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 360
 361 edge
 362 single_dom_exit (struct loop *loop)
 363 {
 364   edge exit = loop->single_exit;
 365
 366   if (!exit)
 367     return NULL;
 368
 369   if (!just_once_each_iteration_p (loop, exit->src))
 370     return NULL;
 371
 372   return exit;
 373 }
 374
 375 /* Dumps information about the induction variable IV to FILE.  */
 376
 377 extern void dump_iv (FILE *, struct iv *);
 378 void
 379 dump_iv (FILE *file, struct iv *iv)
 380 {
 381   if (iv->ssa_name)
 382     {
 383       fprintf (file, "ssa name ");
 384       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 385       fprintf (file, "\n");
 386     }
 387
 388   fprintf (file, "  type ");
 389   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 390   fprintf (file, "\n");
 391
 392   if (iv->step)
 393     {
 394       fprintf (file, "  base ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397
 398       fprintf (file, "  step ");
 399       print_generic_expr (file, iv->step, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402   else
 403     {
 404       fprintf (file, "  invariant ");
 405       print_generic_expr (file, iv->base, TDF_SLIM);
 406       fprintf (file, "\n");
 407     }
 408
 409   if (iv->base_object)
 410     {
 411       fprintf (file, "  base object ");
 412       print_generic_expr (file, iv->base_object, TDF_SLIM);
 413       fprintf (file, "\n");
 414     }
 415
 416   if (iv->biv_p)
 417     fprintf (file, "  is a biv\n");
 418 }
 419
 420 /* Dumps information about the USE to FILE.  */
 421
 422 extern void dump_use (FILE *, struct iv_use *);
 423 void
 424 dump_use (FILE *file, struct iv_use *use)
 425 {
 426   fprintf (file, "use %d\n", use->id);
 427
 428   switch (use->type)
 429     {
 430     case USE_NONLINEAR_EXPR:
 431       fprintf (file, "  generic\n");
 432       break;
 433
 434     case USE_OUTER:
 435       fprintf (file, "  outside\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Checks whether there exists number X such that X * B = A, counting modulo
 544    2^BITS.  */
 545
 546 static bool
 547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 548         HOST_WIDE_INT *x)
 549 {
 550   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 551   unsigned HOST_WIDE_INT inv, ex, val;
 552   unsigned i;
 553
 554   a &= mask;
 555   b &= mask;
 556
 557   /* First divide the whole equation by 2 as long as possible.  */
 558   while (!(a & 1) && !(b & 1))
 559     {
 560       a >>= 1;
 561       b >>= 1;
 562       bits--;
 563       mask >>= 1;
 564     }
 565
 566   if (!(b & 1))
 567     {
 568       /* If b is still even, a is odd and there is no such x.  */
 569       return false;
 570     }
 571
 572   /* Find the inverse of b.  We compute it as
 573      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 574   inv = 1;
 575   ex = b;
 576   for (i = 0; i < bits - 1; i++)
 577     {
 578       inv = (inv * ex) & mask;
 579       ex = (ex * ex) & mask;
 580     }
 581
 582   val = (a * inv) & mask;
 583
 584   gcc_assert (((val * b) & mask) == a);
 585
 586   if ((val >> (bits - 1)) & 1)
 587     val |= ~mask;
 588
 589   *x = val;
 590
 591   return true;
 592 }
 593
 594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 595    emitted in LOOP.  */
 596
 597 static bool
 598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 599 {
 600   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 601
 602   gcc_assert (bb);
 603
 604   if (sbb == loop->latch)
 605     return true;
 606
 607   if (sbb != bb)
 608     return false;
 609
 610   return stmt == last_stmt (bb);
 611 }
 612
 613 /* Returns true if STMT if after the place where the original induction
 614    variable CAND is incremented.  */
 615
 616 static bool
 617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 618 {
 619   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 620   basic_block stmt_bb = bb_for_stmt (stmt);
 621   block_stmt_iterator bsi;
 622
 623   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 624     return false;
 625
 626   if (stmt_bb != cand_bb)
 627     return true;
 628
 629   /* Scan the block from the end, since the original ivs are usually
 630      incremented at the end of the loop body.  */
 631   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 632     {
 633       if (bsi_stmt (bsi) == cand->incremented_at)
 634         return false;
 635       if (bsi_stmt (bsi) == stmt)
 636         return true;
 637     }
 638 }
 639
 640 /* Returns true if STMT if after the place where the induction variable
 641    CAND is incremented in LOOP.  */
 642
 643 static bool
 644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 645 {
 646   switch (cand->pos)
 647     {
 648     case IP_END:
 649       return false;
 650
 651     case IP_NORMAL:
 652       return stmt_after_ip_normal_pos (loop, stmt);
 653
 654     case IP_ORIGINAL:
 655       return stmt_after_ip_original_pos (cand, stmt);
 656
 657     default:
 658       gcc_unreachable ();
 659     }
 660 }
 661
 662 /* Element of the table in that we cache the numbers of iterations obtained
 663    from exits of the loop.  */
 664
 665 struct nfe_cache_elt
 666 {
 667   /* The edge for that the number of iterations is cached.  */
 668   edge exit;
 669
 670   /* True if the # of iterations was successfully determined.  */
 671   bool valid_p;
 672
 673   /* Description of # of iterations.  */
 674   struct tree_niter_desc niter;
 675 };
 676
 677 /* Hash function for nfe_cache_elt E.  */
 678
 679 static hashval_t
 680 nfe_hash (const void *e)
 681 {
 682   const struct nfe_cache_elt *elt = e;
 683
 684   return htab_hash_pointer (elt->exit);
 685 }
 686
 687 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 688
 689 static int
 690 nfe_eq (const void *e1, const void *e2)
 691 {
 692   const struct nfe_cache_elt *elt1 = e1;
 693
 694   return elt1->exit == e2;
 695 }
 696
 697 /*  Returns structure describing number of iterations determined from
 698     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 699
 700 static struct tree_niter_desc *
 701 niter_for_exit (struct ivopts_data *data, edge exit)
 702 {
 703   struct nfe_cache_elt *nfe_desc;
 704   PTR *slot;
 705
 706   slot = htab_find_slot_with_hash (data->niters, exit,
 707                                    htab_hash_pointer (exit),
 708                                    INSERT);
 709
 710   if (!*slot)
 711     {
 712       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 713       nfe_desc->exit = exit;
 714       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 715                                                      exit, &nfe_desc->niter,
 716                                                      true);
 717       *slot = nfe_desc;
 718     }
 719   else
 720     nfe_desc = *slot;
 721
 722   if (!nfe_desc->valid_p)
 723     return NULL;
 724
 725   return &nfe_desc->niter;
 726 }
 727
 728 /* Returns structure describing number of iterations determined from
 729    single dominating exit of DATA->current_loop, or NULL if something
 730    goes wrong.  */
 731
 732 static struct tree_niter_desc *
 733 niter_for_single_dom_exit (struct ivopts_data *data)
 734 {
 735   edge exit = single_dom_exit (data->current_loop);
 736
 737   if (!exit)
 738     return NULL;
 739
 740   return niter_for_exit (data, exit);
 741 }
 742
 743 /* Initializes data structures used by the iv optimization pass, stored
 744    in DATA.  LOOPS is the loop tree.  */
 745
 746 static void
 747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 748 {
 749   unsigned i;
 750
 751   data->version_info_size = 2 * num_ssa_names;
 752   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 753   data->relevant = BITMAP_ALLOC (NULL);
 754   data->important_candidates = BITMAP_ALLOC (NULL);
 755   data->max_inv_id = 0;
 756   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 757
 758   for (i = 1; i < loops->num; i++)
 759     if (loops->parray[i])
 760       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 761
 762   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 763   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 764   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 765 }
 766
 767 /* Returns a memory object to that EXPR points.  In case we are able to
 768    determine that it does not point to any such object, NULL is returned.  */
 769
 770 static tree
 771 determine_base_object (tree expr)
 772 {
 773   enum tree_code code = TREE_CODE (expr);
 774   tree base, obj, op0, op1;
 775
 776   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 777     return NULL_TREE;
 778
 779   switch (code)
 780     {
 781     case INTEGER_CST:
 782       return NULL_TREE;
 783
 784     case ADDR_EXPR:
 785       obj = TREE_OPERAND (expr, 0);
 786       base = get_base_address (obj);
 787
 788       if (!base)
 789         return expr;
 790
 791       if (TREE_CODE (base) == INDIRECT_REF)
 792         return determine_base_object (TREE_OPERAND (base, 0));
 793
 794       return fold_convert (ptr_type_node,
 795                            build_fold_addr_expr (base));
 796
 797     case PLUS_EXPR:
 798     case MINUS_EXPR:
 799       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 800       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 801
 802       if (!op1)
 803         return op0;
 804
 805       if (!op0)
 806         return (code == PLUS_EXPR
 807                 ? op1
 808                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 809
 810       return fold_build2 (code, ptr_type_node, op0, op1);
 811
 812     case NOP_EXPR:
 813     case CONVERT_EXPR:
 814       return determine_base_object (TREE_OPERAND (expr, 0));
 815
 816     default:
 817       return fold_convert (ptr_type_node, expr);
 818     }
 819 }
 820
 821 /* Allocates an induction variable with given initial value BASE and step STEP
 822    for loop LOOP.  */
 823
 824 static struct iv *
 825 alloc_iv (tree base, tree step)
 826 {
 827   struct iv *iv = XCNEW (struct iv);
 828
 829   if (step && integer_zerop (step))
 830     step = NULL_TREE;
 831
 832   iv->base = base;
 833   iv->base_object = determine_base_object (base);
 834   iv->step = step;
 835   iv->biv_p = false;
 836   iv->have_use_for = false;
 837   iv->use_id = 0;
 838   iv->ssa_name = NULL_TREE;
 839
 840   return iv;
 841 }
 842
 843 /* Sets STEP and BASE for induction variable IV.  */
 844
 845 static void
 846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 847 {
 848   struct version_info *info = name_info (data, iv);
 849
 850   gcc_assert (!info->iv);
 851
 852   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 853   info->iv = alloc_iv (base, step);
 854   info->iv->ssa_name = iv;
 855 }
 856
 857 /* Finds induction variable declaration for VAR.  */
 858
 859 static struct iv *
 860 get_iv (struct ivopts_data *data, tree var)
 861 {
 862   basic_block bb;
 863
 864   if (!name_info (data, var)->iv)
 865     {
 866       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 867
 868       if (!bb
 869           || !flow_bb_inside_loop_p (data->current_loop, bb))
 870         set_iv (data, var, var, NULL_TREE);
 871     }
 872
 873   return name_info (data, var)->iv;
 874 }
 875
 876 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 877    not define a simple affine biv with nonzero step.  */
 878
 879 static tree
 880 determine_biv_step (tree phi)
 881 {
 882   struct loop *loop = bb_for_stmt (phi)->loop_father;
 883   tree name = PHI_RESULT (phi);
 884   affine_iv iv;
 885
 886   if (!is_gimple_reg (name))
 887     return NULL_TREE;
 888
 889   if (!simple_iv (loop, phi, name, &iv, true))
 890     return NULL_TREE;
 891
 892   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 893 }
 894
 895 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 896
 897 static bool
 898 abnormal_ssa_name_p (tree exp)
 899 {
 900   if (!exp)
 901     return false;
 902
 903   if (TREE_CODE (exp) != SSA_NAME)
 904     return false;
 905
 906   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 907 }
 908
 909 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 910    abnormal phi node.  Callback for for_each_index.  */
 911
 912 static bool
 913 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 914                                   void *data ATTRIBUTE_UNUSED)
 915 {
 916   if (TREE_CODE (base) == ARRAY_REF)
 917     {
 918       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 919         return false;
 920       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 921         return false;
 922     }
 923
 924   return !abnormal_ssa_name_p (*index);
 925 }
 926
 927 /* Returns true if EXPR contains a ssa name that occurs in an
 928    abnormal phi node.  */
 929
 930 static bool
 931 contains_abnormal_ssa_name_p (tree expr)
 932 {
 933   enum tree_code code;
 934   enum tree_code_class class;
 935
 936   if (!expr)
 937     return false;
 938
 939   code = TREE_CODE (expr);
 940   class = TREE_CODE_CLASS (code);
 941
 942   if (code == SSA_NAME)
 943     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 944
 945   if (code == INTEGER_CST
 946       || is_gimple_min_invariant (expr))
 947     return false;
 948
 949   if (code == ADDR_EXPR)
 950     return !for_each_index (&TREE_OPERAND (expr, 0),
 951                             idx_contains_abnormal_ssa_name_p,
 952                             NULL);
 953
 954   switch (class)
 955     {
 956     case tcc_binary:
 957     case tcc_comparison:
 958       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 959         return true;
 960
 961       /* Fallthru.  */
 962     case tcc_unary:
 963       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 964         return true;
 965
 966       break;
 967
 968     default:
 969       gcc_unreachable ();
 970     }
 971
 972   return false;
 973 }
 974
 975 /* Finds basic ivs.  */
 976
 977 static bool
 978 find_bivs (struct ivopts_data *data)
 979 {
 980   tree phi, step, type, base;
 981   bool found = false;
 982   struct loop *loop = data->current_loop;
 983
 984   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 985     {
 986       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 987         continue;
 988
 989       step = determine_biv_step (phi);
 990       if (!step)
 991         continue;
 992
 993       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 994       base = expand_simple_operations (base);
 995       if (contains_abnormal_ssa_name_p (base)
 996           || contains_abnormal_ssa_name_p (step))
 997         continue;
 998
 999       type = TREE_TYPE (PHI_RESULT (phi));
1000       base = fold_convert (type, base);
1001       if (step)
1002         step = fold_convert (type, step);
1003
1004       set_iv (data, PHI_RESULT (phi), base, step);
1005       found = true;
1006     }
1007
1008   return found;
1009 }
1010
1011 /* Marks basic ivs.  */
1012
1013 static void
1014 mark_bivs (struct ivopts_data *data)
1015 {
1016   tree phi, var;
1017   struct iv *iv, *incr_iv;
1018   struct loop *loop = data->current_loop;
1019   basic_block incr_bb;
1020
1021   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1022     {
1023       iv = get_iv (data, PHI_RESULT (phi));
1024       if (!iv)
1025         continue;
1026
1027       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1028       incr_iv = get_iv (data, var);
1029       if (!incr_iv)
1030         continue;
1031
1032       /* If the increment is in the subloop, ignore it.  */
1033       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1034       if (incr_bb->loop_father != data->current_loop
1035           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1036         continue;
1037
1038       iv->biv_p = true;
1039       incr_iv->biv_p = true;
1040     }
1041 }
1042
1043 /* Checks whether STMT defines a linear induction variable and stores its
1044    parameters to IV.  */
1045
1046 static bool
1047 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1048 {
1049   tree lhs;
1050   struct loop *loop = data->current_loop;
1051
1052   iv->base = NULL_TREE;
1053   iv->step = NULL_TREE;
1054
1055   if (TREE_CODE (stmt) != MODIFY_EXPR)
1056     return false;
1057
1058   lhs = TREE_OPERAND (stmt, 0);
1059   if (TREE_CODE (lhs) != SSA_NAME)
1060     return false;
1061
1062   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1063     return false;
1064   iv->base = expand_simple_operations (iv->base);
1065
1066   if (contains_abnormal_ssa_name_p (iv->base)
1067       || contains_abnormal_ssa_name_p (iv->step))
1068     return false;
1069
1070   return true;
1071 }
1072
1073 /* Finds general ivs in statement STMT.  */
1074
1075 static void
1076 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1077 {
1078   affine_iv iv;
1079
1080   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1081     return;
1082
1083   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1084 }
1085
1086 /* Finds general ivs in basic block BB.  */
1087
1088 static void
1089 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1090 {
1091   block_stmt_iterator bsi;
1092
1093   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1094     find_givs_in_stmt (data, bsi_stmt (bsi));
1095 }
1096
1097 /* Finds general ivs.  */
1098
1099 static void
1100 find_givs (struct ivopts_data *data)
1101 {
1102   struct loop *loop = data->current_loop;
1103   basic_block *body = get_loop_body_in_dom_order (loop);
1104   unsigned i;
1105
1106   for (i = 0; i < loop->num_nodes; i++)
1107     find_givs_in_bb (data, body[i]);
1108   free (body);
1109 }
1110
1111 /* For each ssa name defined in LOOP determines whether it is an induction
1112    variable and if so, its initial value and step.  */
1113
1114 static bool
1115 find_induction_variables (struct ivopts_data *data)
1116 {
1117   unsigned i;
1118   bitmap_iterator bi;
1119
1120   if (!find_bivs (data))
1121     return false;
1122
1123   find_givs (data);
1124   mark_bivs (data);
1125
1126   if (dump_file && (dump_flags & TDF_DETAILS))
1127     {
1128       struct tree_niter_desc *niter;
1129
1130       niter = niter_for_single_dom_exit (data);
1131
1132       if (niter)
1133         {
1134           fprintf (dump_file, "  number of iterations ");
1135           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1136           fprintf (dump_file, "\n");
1137
1138           fprintf (dump_file, "  may be zero if ");
1139           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1140           fprintf (dump_file, "\n");
1141           fprintf (dump_file, "\n");
1142         };
1143
1144       fprintf (dump_file, "Induction variables:\n\n");
1145
1146       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1147         {
1148           if (ver_info (data, i)->iv)
1149             dump_iv (dump_file, ver_info (data, i)->iv);
1150         }
1151     }
1152
1153   return true;
1154 }
1155
1156 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1157
1158 static struct iv_use *
1159 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1160             tree stmt, enum use_type use_type)
1161 {
1162   struct iv_use *use = XCNEW (struct iv_use);
1163
1164   use->id = n_iv_uses (data);
1165   use->type = use_type;
1166   use->iv = iv;
1167   use->stmt = stmt;
1168   use->op_p = use_p;
1169   use->related_cands = BITMAP_ALLOC (NULL);
1170
1171   /* To avoid showing ssa name in the dumps, if it was not reset by the
1172      caller.  */
1173   iv->ssa_name = NULL_TREE;
1174
1175   if (dump_file && (dump_flags & TDF_DETAILS))
1176     dump_use (dump_file, use);
1177
1178   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1179
1180   return use;
1181 }
1182
1183 /* Checks whether OP is a loop-level invariant and if so, records it.
1184    NONLINEAR_USE is true if the invariant is used in a way we do not
1185    handle specially.  */
1186
1187 static void
1188 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1189 {
1190   basic_block bb;
1191   struct version_info *info;
1192
1193   if (TREE_CODE (op) != SSA_NAME
1194       || !is_gimple_reg (op))
1195     return;
1196
1197   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1198   if (bb
1199       && flow_bb_inside_loop_p (data->current_loop, bb))
1200     return;
1201
1202   info = name_info (data, op);
1203   info->name = op;
1204   info->has_nonlin_use |= nonlinear_use;
1205   if (!info->inv_id)
1206     info->inv_id = ++data->max_inv_id;
1207   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1208 }
1209
1210 /* Checks whether the use OP is interesting and if so, records it
1211    as TYPE.  */
1212
1213 static struct iv_use *
1214 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1215                                        enum use_type type)
1216 {
1217   struct iv *iv;
1218   struct iv *civ;
1219   tree stmt;
1220   struct iv_use *use;
1221
1222   if (TREE_CODE (op) != SSA_NAME)
1223     return NULL;
1224
1225   iv = get_iv (data, op);
1226   if (!iv)
1227     return NULL;
1228
1229   if (iv->have_use_for)
1230     {
1231       use = iv_use (data, iv->use_id);
1232
1233       gcc_assert (use->type == USE_NONLINEAR_EXPR
1234                   || use->type == USE_OUTER);
1235
1236       if (type == USE_NONLINEAR_EXPR)
1237         use->type = USE_NONLINEAR_EXPR;
1238       return use;
1239     }
1240
1241   if (zero_p (iv->step))
1242     {
1243       record_invariant (data, op, true);
1244       return NULL;
1245     }
1246   iv->have_use_for = true;
1247
1248   civ = XNEW (struct iv);
1249   *civ = *iv;
1250
1251   stmt = SSA_NAME_DEF_STMT (op);
1252   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1253               || TREE_CODE (stmt) == MODIFY_EXPR);
1254
1255   use = record_use (data, NULL, civ, stmt, type);
1256   iv->use_id = use->id;
1257
1258   return use;
1259 }
1260
1261 /* Checks whether the use OP is interesting and if so, records it.  */
1262
1263 static struct iv_use *
1264 find_interesting_uses_op (struct ivopts_data *data, tree op)
1265 {
1266   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1267 }
1268
1269 /* Records a definition of induction variable OP that is used outside of the
1270    loop.  */
1271
1272 static struct iv_use *
1273 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1274 {
1275   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1276 }
1277
1278 /* Checks whether the condition *COND_P in STMT is interesting
1279    and if so, records it.  */
1280
1281 static void
1282 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1283 {
1284   tree *op0_p;
1285   tree *op1_p;
1286   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1287   struct iv const_iv;
1288   tree zero = integer_zero_node;
1289
1290   const_iv.step = NULL_TREE;
1291
1292   if (TREE_CODE (*cond_p) != SSA_NAME
1293       && !COMPARISON_CLASS_P (*cond_p))
1294     return;
1295
1296   if (TREE_CODE (*cond_p) == SSA_NAME)
1297     {
1298       op0_p = cond_p;
1299       op1_p = &zero;
1300     }
1301   else
1302     {
1303       op0_p = &TREE_OPERAND (*cond_p, 0);
1304       op1_p = &TREE_OPERAND (*cond_p, 1);
1305     }
1306
1307   if (TREE_CODE (*op0_p) == SSA_NAME)
1308     iv0 = get_iv (data, *op0_p);
1309   else
1310     iv0 = &const_iv;
1311
1312   if (TREE_CODE (*op1_p) == SSA_NAME)
1313     iv1 = get_iv (data, *op1_p);
1314   else
1315     iv1 = &const_iv;
1316
1317   if (/* When comparing with non-invariant value, we may not do any senseful
1318          induction variable elimination.  */
1319       (!iv0 || !iv1)
1320       /* Eliminating condition based on two ivs would be nontrivial.
1321          ??? TODO -- it is not really important to handle this case.  */
1322       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1323     {
1324       find_interesting_uses_op (data, *op0_p);
1325       find_interesting_uses_op (data, *op1_p);
1326       return;
1327     }
1328
1329   if (zero_p (iv0->step) && zero_p (iv1->step))
1330     {
1331       /* If both are invariants, this is a work for unswitching.  */
1332       return;
1333     }
1334
1335   civ = XNEW (struct iv);
1336   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1337   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1338 }
1339
1340 /* Returns true if expression EXPR is obviously invariant in LOOP,
1341    i.e. if all its operands are defined outside of the LOOP.  */
1342
1343 bool
1344 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1345 {
1346   basic_block def_bb;
1347   unsigned i, len;
1348
1349   if (is_gimple_min_invariant (expr))
1350     return true;
1351
1352   if (TREE_CODE (expr) == SSA_NAME)
1353     {
1354       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1355       if (def_bb
1356           && flow_bb_inside_loop_p (loop, def_bb))
1357         return false;
1358
1359       return true;
1360     }
1361
1362   if (!EXPR_P (expr))
1363     return false;
1364
1365   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1366   for (i = 0; i < len; i++)
1367     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1368       return false;
1369
1370   return true;
1371 }
1372
1373 /* Cumulates the steps of indices into DATA and replaces their values with the
1374    initial ones.  Returns false when the value of the index cannot be determined.
1375    Callback for for_each_index.  */
1376
1377 struct ifs_ivopts_data
1378 {
1379   struct ivopts_data *ivopts_data;
1380   tree stmt;
1381   tree *step_p;
1382 };
1383
1384 static bool
1385 idx_find_step (tree base, tree *idx, void *data)
1386 {
1387   struct ifs_ivopts_data *dta = data;
1388   struct iv *iv;
1389   tree step, iv_step, lbound, off;
1390   struct loop *loop = dta->ivopts_data->current_loop;
1391
1392   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1393       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1394     return false;
1395
1396   /* If base is a component ref, require that the offset of the reference
1397      be invariant.  */
1398   if (TREE_CODE (base) == COMPONENT_REF)
1399     {
1400       off = component_ref_field_offset (base);
1401       return expr_invariant_in_loop_p (loop, off);
1402     }
1403
1404   /* If base is array, first check whether we will be able to move the
1405      reference out of the loop (in order to take its address in strength
1406      reduction).  In order for this to work we need both lower bound
1407      and step to be loop invariants.  */
1408   if (TREE_CODE (base) == ARRAY_REF)
1409     {
1410       step = array_ref_element_size (base);
1411       lbound = array_ref_low_bound (base);
1412
1413       if (!expr_invariant_in_loop_p (loop, step)
1414           || !expr_invariant_in_loop_p (loop, lbound))
1415         return false;
1416     }
1417
1418   if (TREE_CODE (*idx) != SSA_NAME)
1419     return true;
1420
1421   iv = get_iv (dta->ivopts_data, *idx);
1422   if (!iv)
1423     return false;
1424
1425   *idx = iv->base;
1426
1427   if (!iv->step)
1428     return true;
1429
1430   if (TREE_CODE (base) == ARRAY_REF)
1431     {
1432       step = array_ref_element_size (base);
1433
1434       /* We only handle addresses whose step is an integer constant.  */
1435       if (TREE_CODE (step) != INTEGER_CST)
1436         return false;
1437     }
1438   else
1439     /* The step for pointer arithmetics already is 1 byte.  */
1440     step = build_int_cst (sizetype, 1);
1441
1442   /* FIXME: convert_step should not be used outside chrec_convert: fix
1443      this by calling chrec_convert.  */
1444   iv_step = convert_step (dta->ivopts_data->current_loop,
1445                           sizetype, iv->base, iv->step, dta->stmt);
1446
1447   if (!iv_step)
1448     {
1449       /* The index might wrap.  */
1450       return false;
1451     }
1452
1453   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1454
1455   if (!*dta->step_p)
1456     *dta->step_p = step;
1457   else
1458     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1459
1460   return true;
1461 }
1462
1463 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1464    object is passed to it in DATA.  */
1465
1466 static bool
1467 idx_record_use (tree base, tree *idx,
1468                 void *data)
1469 {
1470   find_interesting_uses_op (data, *idx);
1471   if (TREE_CODE (base) == ARRAY_REF)
1472     {
1473       find_interesting_uses_op (data, array_ref_element_size (base));
1474       find_interesting_uses_op (data, array_ref_low_bound (base));
1475     }
1476   return true;
1477 }
1478
1479 /* Returns true if memory reference REF may be unaligned.  */
1480
1481 static bool
1482 may_be_unaligned_p (tree ref)
1483 {
1484   tree base;
1485   tree base_type;
1486   HOST_WIDE_INT bitsize;
1487   HOST_WIDE_INT bitpos;
1488   tree toffset;
1489   enum machine_mode mode;
1490   int unsignedp, volatilep;
1491   unsigned base_align;
1492
1493   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1494      thus they are not misaligned.  */
1495   if (TREE_CODE (ref) == TARGET_MEM_REF)
1496     return false;
1497
1498   /* The test below is basically copy of what expr.c:normal_inner_ref
1499      does to check whether the object must be loaded by parts when
1500      STRICT_ALIGNMENT is true.  */
1501   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1502                               &unsignedp, &volatilep, true);
1503   base_type = TREE_TYPE (base);
1504   base_align = TYPE_ALIGN (base_type);
1505
1506   if (mode != BLKmode
1507       && (base_align < GET_MODE_ALIGNMENT (mode)
1508           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1509           || bitpos % BITS_PER_UNIT != 0))
1510     return true;
1511
1512   return false;
1513 }
1514
1515 /* Finds addresses in *OP_P inside STMT.  */
1516
1517 static void
1518 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1519 {
1520   tree base = *op_p, step = NULL;
1521   struct iv *civ;
1522   struct ifs_ivopts_data ifs_ivopts_data;
1523
1524   /* Do not play with volatile memory references.  A bit too conservative,
1525      perhaps, but safe.  */
1526   if (stmt_ann (stmt)->has_volatile_ops)
1527     goto fail;
1528
1529   /* Ignore bitfields for now.  Not really something terribly complicated
1530      to handle.  TODO.  */
1531   if (TREE_CODE (base) == COMPONENT_REF
1532       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1533     goto fail;
1534
1535   if (STRICT_ALIGNMENT
1536       && may_be_unaligned_p (base))
1537     goto fail;
1538
1539   base = unshare_expr (base);
1540
1541   if (TREE_CODE (base) == TARGET_MEM_REF)
1542     {
1543       tree type = build_pointer_type (TREE_TYPE (base));
1544       tree astep;
1545
1546       if (TMR_BASE (base)
1547           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1548         {
1549           civ = get_iv (data, TMR_BASE (base));
1550           if (!civ)
1551             goto fail;
1552
1553           TMR_BASE (base) = civ->base;
1554           step = civ->step;
1555         }
1556       if (TMR_INDEX (base)
1557           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1558         {
1559           civ = get_iv (data, TMR_INDEX (base));
1560           if (!civ)
1561             goto fail;
1562
1563           TMR_INDEX (base) = civ->base;
1564           astep = civ->step;
1565
1566           if (astep)
1567             {
1568               if (TMR_STEP (base))
1569                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1570
1571               if (step)
1572                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1573               else
1574                 step = astep;
1575             }
1576         }
1577
1578       if (zero_p (step))
1579         goto fail;
1580       base = tree_mem_ref_addr (type, base);
1581     }
1582   else
1583     {
1584       ifs_ivopts_data.ivopts_data = data;
1585       ifs_ivopts_data.stmt = stmt;
1586       ifs_ivopts_data.step_p = &step;
1587       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1588           || zero_p (step))
1589         goto fail;
1590
1591       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1592       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1593
1594       base = build_fold_addr_expr (base);
1595     }
1596
1597   civ = alloc_iv (base, step);
1598   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599   return;
1600
1601 fail:
1602   for_each_index (op_p, idx_record_use, data);
1603 }
1604
1605 /* Finds and records invariants used in STMT.  */
1606
1607 static void
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 {
1610   ssa_op_iter iter;
1611   use_operand_p use_p;
1612   tree op;
1613
1614   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615     {
1616       op = USE_FROM_PTR (use_p);
1617       record_invariant (data, op, false);
1618     }
1619 }
1620
1621 /* Finds interesting uses of induction variables in the statement STMT.  */
1622
1623 static void
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 {
1626   struct iv *iv;
1627   tree op, lhs, rhs;
1628   ssa_op_iter iter;
1629   use_operand_p use_p;
1630
1631   find_invariants_stmt (data, stmt);
1632
1633   if (TREE_CODE (stmt) == COND_EXPR)
1634     {
1635       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636       return;
1637     }
1638
1639   if (TREE_CODE (stmt) == MODIFY_EXPR)
1640     {
1641       lhs = TREE_OPERAND (stmt, 0);
1642       rhs = TREE_OPERAND (stmt, 1);
1643
1644       if (TREE_CODE (lhs) == SSA_NAME)
1645         {
1646           /* If the statement defines an induction variable, the uses are not
1647              interesting by themselves.  */
1648
1649           iv = get_iv (data, lhs);
1650
1651           if (iv && !zero_p (iv->step))
1652             return;
1653         }
1654
1655       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656         {
1657         case tcc_comparison:
1658           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659           return;
1660
1661         case tcc_reference:
1662           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663           if (REFERENCE_CLASS_P (lhs))
1664             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665           return;
1666
1667         default: ;
1668         }
1669
1670       if (REFERENCE_CLASS_P (lhs)
1671           && is_gimple_val (rhs))
1672         {
1673           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674           find_interesting_uses_op (data, rhs);
1675           return;
1676         }
1677
1678       /* TODO -- we should also handle address uses of type
1679
1680          memory = call (whatever);
1681
1682          and
1683
1684          call (memory).  */
1685     }
1686
1687   if (TREE_CODE (stmt) == PHI_NODE
1688       && bb_for_stmt (stmt) == data->current_loop->header)
1689     {
1690       lhs = PHI_RESULT (stmt);
1691       iv = get_iv (data, lhs);
1692
1693       if (iv && !zero_p (iv->step))
1694         return;
1695     }
1696
1697   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698     {
1699       op = USE_FROM_PTR (use_p);
1700
1701       if (TREE_CODE (op) != SSA_NAME)
1702         continue;
1703
1704       iv = get_iv (data, op);
1705       if (!iv)
1706         continue;
1707
1708       find_interesting_uses_op (data, op);
1709     }
1710 }
1711
1712 /* Finds interesting uses of induction variables outside of loops
1713    on loop exit edge EXIT.  */
1714
1715 static void
1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 {
1718   tree phi, def;
1719
1720   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721     {
1722       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723       find_interesting_uses_outer (data, def);
1724     }
1725 }
1726
1727 /* Finds uses of the induction variables that are interesting.  */
1728
1729 static void
1730 find_interesting_uses (struct ivopts_data *data)
1731 {
1732   basic_block bb;
1733   block_stmt_iterator bsi;
1734   tree phi;
1735   basic_block *body = get_loop_body (data->current_loop);
1736   unsigned i;
1737   struct version_info *info;
1738   edge e;
1739
1740   if (dump_file && (dump_flags & TDF_DETAILS))
1741     fprintf (dump_file, "Uses:\n\n");
1742
1743   for (i = 0; i < data->current_loop->num_nodes; i++)
1744     {
1745       edge_iterator ei;
1746       bb = body[i];
1747
1748       FOR_EACH_EDGE (e, ei, bb->succs)
1749         if (e->dest != EXIT_BLOCK_PTR
1750             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751           find_interesting_uses_outside (data, e);
1752
1753       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754         find_interesting_uses_stmt (data, phi);
1755       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757     }
1758
1759   if (dump_file && (dump_flags & TDF_DETAILS))
1760     {
1761       bitmap_iterator bi;
1762
1763       fprintf (dump_file, "\n");
1764
1765       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766         {
1767           info = ver_info (data, i);
1768           if (info->inv_id)
1769             {
1770               fprintf (dump_file, "  ");
1771               print_generic_expr (dump_file, info->name, TDF_SLIM);
1772               fprintf (dump_file, " is invariant (%d)%s\n",
1773                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774             }
1775         }
1776
1777       fprintf (dump_file, "\n");
1778     }
1779
1780   free (body);
1781 }
1782
1783 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1784    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1785    we are at the top-level of the processed address.  */
1786
1787 static tree
1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789                 unsigned HOST_WIDE_INT *offset)
1790 {
1791   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792   enum tree_code code;
1793   tree type, orig_type = TREE_TYPE (expr);
1794   unsigned HOST_WIDE_INT off0, off1, st;
1795   tree orig_expr = expr;
1796
1797   STRIP_NOPS (expr);
1798
1799   type = TREE_TYPE (expr);
1800   code = TREE_CODE (expr);
1801   *offset = 0;
1802
1803   switch (code)
1804     {
1805     case INTEGER_CST:
1806       if (!cst_and_fits_in_hwi (expr)
1807           || zero_p (expr))
1808         return orig_expr;
1809
1810       *offset = int_cst_value (expr);
1811       return build_int_cst_type (orig_type, 0);
1812
1813     case PLUS_EXPR:
1814     case MINUS_EXPR:
1815       op0 = TREE_OPERAND (expr, 0);
1816       op1 = TREE_OPERAND (expr, 1);
1817
1818       op0 = strip_offset_1 (op0, false, false, &off0);
1819       op1 = strip_offset_1 (op1, false, false, &off1);
1820
1821       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822       if (op0 == TREE_OPERAND (expr, 0)
1823           && op1 == TREE_OPERAND (expr, 1))
1824         return orig_expr;
1825
1826       if (zero_p (op1))
1827         expr = op0;
1828       else if (zero_p (op0))
1829         {
1830           if (code == PLUS_EXPR)
1831             expr = op1;
1832           else
1833             expr = fold_build1 (NEGATE_EXPR, type, op1);
1834         }
1835       else
1836         expr = fold_build2 (code, type, op0, op1);
1837
1838       return fold_convert (orig_type, expr);
1839
1840     case ARRAY_REF:
1841       if (!inside_addr)
1842         return orig_expr;
1843
1844       step = array_ref_element_size (expr);
1845       if (!cst_and_fits_in_hwi (step))
1846         break;
1847
1848       st = int_cst_value (step);
1849       op1 = TREE_OPERAND (expr, 1);
1850       op1 = strip_offset_1 (op1, false, false, &off1);
1851       *offset = off1 * st;
1852
1853       if (top_compref
1854           && zero_p (op1))
1855         {
1856           /* Strip the component reference completely.  */
1857           op0 = TREE_OPERAND (expr, 0);
1858           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859           *offset += off0;
1860           return op0;
1861         }
1862       break;
1863
1864     case COMPONENT_REF:
1865       if (!inside_addr)
1866         return orig_expr;
1867
1868       tmp = component_ref_field_offset (expr);
1869       if (top_compref
1870           && cst_and_fits_in_hwi (tmp))
1871         {
1872           /* Strip the component reference completely.  */
1873           op0 = TREE_OPERAND (expr, 0);
1874           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875           *offset = off0 + int_cst_value (tmp);
1876           return op0;
1877         }
1878       break;
1879
1880     case ADDR_EXPR:
1881       op0 = TREE_OPERAND (expr, 0);
1882       op0 = strip_offset_1 (op0, true, true, &off0);
1883       *offset += off0;
1884
1885       if (op0 == TREE_OPERAND (expr, 0))
1886         return orig_expr;
1887
1888       expr = build_fold_addr_expr (op0);
1889       return fold_convert (orig_type, expr);
1890
1891     case INDIRECT_REF:
1892       inside_addr = false;
1893       break;
1894
1895     default:
1896       return orig_expr;
1897     }
1898
1899   /* Default handling of expressions for that we want to recurse into
1900      the first operand.  */
1901   op0 = TREE_OPERAND (expr, 0);
1902   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903   *offset += off0;
1904
1905   if (op0 == TREE_OPERAND (expr, 0)
1906       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907     return orig_expr;
1908
1909   expr = copy_node (expr);
1910   TREE_OPERAND (expr, 0) = op0;
1911   if (op1)
1912     TREE_OPERAND (expr, 1) = op1;
1913
1914   /* Inside address, we might strip the top level component references,
1915      thus changing type of the expression.  Handling of ADDR_EXPR
1916      will fix that.  */
1917   expr = fold_convert (orig_type, expr);
1918
1919   return expr;
1920 }
1921
1922 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1923
1924 static tree
1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 {
1927   return strip_offset_1 (expr, false, false, offset);
1928 }
1929
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931    For integer types, we return unsigned variant of the type, which
1932    avoids problems with overflows.  For pointer types, we return void *.  */
1933
1934 static tree
1935 generic_type_for (tree type)
1936 {
1937   if (POINTER_TYPE_P (type))
1938     return ptr_type_node;
1939
1940   if (TYPE_UNSIGNED (type))
1941     return type;
1942
1943   return unsigned_type_for (type);
1944 }
1945
1946 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1947    the bitmap to that we should store it.  */
1948
1949 static struct ivopts_data *fd_ivopts_data;
1950 static tree
1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 {
1953   bitmap *depends_on = data;
1954   struct version_info *info;
1955
1956   if (TREE_CODE (*expr_p) != SSA_NAME)
1957     return NULL_TREE;
1958   info = name_info (fd_ivopts_data, *expr_p);
1959
1960   if (!info->inv_id || info->has_nonlin_use)
1961     return NULL_TREE;
1962
1963   if (!*depends_on)
1964     *depends_on = BITMAP_ALLOC (NULL);
1965   bitmap_set_bit (*depends_on, info->inv_id);
1966
1967   return NULL_TREE;
1968 }
1969
1970 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1971    position to POS.  If USE is not NULL, the candidate is set as related to
1972    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1973    replacement of the final value of the iv by a direct computation.  */
1974
1975 static struct iv_cand *
1976 add_candidate_1 (struct ivopts_data *data,
1977                  tree base, tree step, bool important, enum iv_position pos,
1978                  struct iv_use *use, tree incremented_at)
1979 {
1980   unsigned i;
1981   struct iv_cand *cand = NULL;
1982   tree type, orig_type;
1983
1984   if (base)
1985     {
1986       orig_type = TREE_TYPE (base);
1987       type = generic_type_for (orig_type);
1988       if (type != orig_type)
1989         {
1990           base = fold_convert (type, base);
1991           if (step)
1992             step = fold_convert (type, step);
1993         }
1994     }
1995
1996   for (i = 0; i < n_iv_cands (data); i++)
1997     {
1998       cand = iv_cand (data, i);
1999
2000       if (cand->pos != pos)
2001         continue;
2002
2003       if (cand->incremented_at != incremented_at)
2004         continue;
2005
2006       if (!cand->iv)
2007         {
2008           if (!base && !step)
2009             break;
2010
2011           continue;
2012         }
2013
2014       if (!base && !step)
2015         continue;
2016
2017       if (!operand_equal_p (base, cand->iv->base, 0))
2018         continue;
2019
2020       if (zero_p (cand->iv->step))
2021         {
2022           if (zero_p (step))
2023             break;
2024         }
2025       else
2026         {
2027           if (step && operand_equal_p (step, cand->iv->step, 0))
2028             break;
2029         }
2030     }
2031
2032   if (i == n_iv_cands (data))
2033     {
2034       cand = XCNEW (struct iv_cand);
2035       cand->id = i;
2036
2037       if (!base && !step)
2038         cand->iv = NULL;
2039       else
2040         cand->iv = alloc_iv (base, step);
2041
2042       cand->pos = pos;
2043       if (pos != IP_ORIGINAL && cand->iv)
2044         {
2045           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046           cand->var_after = cand->var_before;
2047         }
2048       cand->important = important;
2049       cand->incremented_at = incremented_at;
2050       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051
2052       if (step
2053           && TREE_CODE (step) != INTEGER_CST)
2054         {
2055           fd_ivopts_data = data;
2056           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057         }
2058
2059       if (dump_file && (dump_flags & TDF_DETAILS))
2060         dump_cand (dump_file, cand);
2061     }
2062
2063   if (important && !cand->important)
2064     {
2065       cand->important = true;
2066       if (dump_file && (dump_flags & TDF_DETAILS))
2067         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068     }
2069
2070   if (use)
2071     {
2072       bitmap_set_bit (use->related_cands, i);
2073       if (dump_file && (dump_flags & TDF_DETAILS))
2074         fprintf (dump_file, "Candidate %d is related to use %d\n",
2075                  cand->id, use->id);
2076     }
2077
2078   return cand;
2079 }
2080
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2082    is allowed.
2083
2084    The purpose is to avoid splitting latch edge with a biv increment, thus
2085    creating a jump, possibly confusing other optimization passes and leaving
2086    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2087    is not available (so we do not have a better alternative), or if the latch
2088    edge is already nonempty.  */
2089
2090 static bool
2091 allow_ip_end_pos_p (struct loop *loop)
2092 {
2093   if (!ip_normal_pos (loop))
2094     return true;
2095
2096   if (!empty_block_p (ip_end_pos (loop)))
2097     return true;
2098
2099   return false;
2100 }
2101
2102 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2103    position to POS.  If USE is not NULL, the candidate is set as related to
2104    it.  The candidate computation is scheduled on all available positions.  */
2105
2106 static void
2107 add_candidate (struct ivopts_data *data,
2108                tree base, tree step, bool important, struct iv_use *use)
2109 {
2110   if (ip_normal_pos (data->current_loop))
2111     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112   if (ip_end_pos (data->current_loop)
2113       && allow_ip_end_pos_p (data->current_loop))
2114     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115 }
2116
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118    type with SIZE bits.  */
2119
2120 static void
2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122                                      unsigned int size)
2123 {
2124   tree type = lang_hooks.types.type_for_size (size, true);
2125   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126                  true, NULL);
2127 }
2128
2129 /* Adds standard iv candidates.  */
2130
2131 static void
2132 add_standard_iv_candidates (struct ivopts_data *data)
2133 {
2134   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135
2136   /* The same for a double-integer type if it is still fast enough.  */
2137   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139 }
2140
2141
2142 /* Adds candidates bases on the old induction variable IV.  */
2143
2144 static void
2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 {
2147   tree phi, def;
2148   struct iv_cand *cand;
2149
2150   add_candidate (data, iv->base, iv->step, true, NULL);
2151
2152   /* The same, but with initial value zero.  */
2153   add_candidate (data,
2154                  build_int_cst (TREE_TYPE (iv->base), 0),
2155                  iv->step, true, NULL);
2156
2157   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158   if (TREE_CODE (phi) == PHI_NODE)
2159     {
2160       /* Additionally record the possibility of leaving the original iv
2161          untouched.  */
2162       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163       cand = add_candidate_1 (data,
2164                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165                               SSA_NAME_DEF_STMT (def));
2166       cand->var_before = iv->ssa_name;
2167       cand->var_after = def;
2168     }
2169 }
2170
2171 /* Adds candidates based on the old induction variables.  */
2172
2173 static void
2174 add_old_ivs_candidates (struct ivopts_data *data)
2175 {
2176   unsigned i;
2177   struct iv *iv;
2178   bitmap_iterator bi;
2179
2180   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181     {
2182       iv = ver_info (data, i)->iv;
2183       if (iv && iv->biv_p && !zero_p (iv->step))
2184         add_old_iv_candidates (data, iv);
2185     }
2186 }
2187
2188 /* Adds candidates based on the value of the induction variable IV and USE.  */
2189
2190 static void
2191 add_iv_value_candidates (struct ivopts_data *data,
2192                          struct iv *iv, struct iv_use *use)
2193 {
2194   unsigned HOST_WIDE_INT offset;
2195   tree base;
2196
2197   add_candidate (data, iv->base, iv->step, false, use);
2198
2199   /* The same, but with initial value zero.  Make such variable important,
2200      since it is generic enough so that possibly many uses may be based
2201      on it.  */
2202   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203                  iv->step, true, use);
2204
2205   /* Third, try removing the constant offset.  */
2206   base = strip_offset (iv->base, &offset);
2207   if (offset)
2208     add_candidate (data, base, iv->step, false, use);
2209 }
2210
2211 /* Possibly adds pseudocandidate for replacing the final value of USE by
2212    a direct computation.  */
2213
2214 static void
2215 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2216 {
2217   struct tree_niter_desc *niter;
2218
2219   /* We must know where we exit the loop and how many times does it roll.  */
2220   niter = niter_for_single_dom_exit (data);
2221   if (!niter
2222       || !zero_p (niter->may_be_zero))
2223     return;
2224
2225   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2226 }
2227
2228 /* Adds candidates based on the uses.  */
2229
2230 static void
2231 add_derived_ivs_candidates (struct ivopts_data *data)
2232 {
2233   unsigned i;
2234
2235   for (i = 0; i < n_iv_uses (data); i++)
2236     {
2237       struct iv_use *use = iv_use (data, i);
2238
2239       if (!use)
2240         continue;
2241
2242       switch (use->type)
2243         {
2244         case USE_NONLINEAR_EXPR:
2245         case USE_COMPARE:
2246         case USE_ADDRESS:
2247           /* Just add the ivs based on the value of the iv used here.  */
2248           add_iv_value_candidates (data, use->iv, use);
2249           break;
2250
2251         case USE_OUTER:
2252           add_iv_value_candidates (data, use->iv, use);
2253
2254           /* Additionally, add the pseudocandidate for the possibility to
2255              replace the final value by a direct computation.  */
2256           add_iv_outer_candidates (data, use);
2257           break;
2258
2259         default:
2260           gcc_unreachable ();
2261         }
2262     }
2263 }
2264
2265 /* Record important candidates and add them to related_cands bitmaps
2266    if needed.  */
2267
2268 static void
2269 record_important_candidates (struct ivopts_data *data)
2270 {
2271   unsigned i;
2272   struct iv_use *use;
2273
2274   for (i = 0; i < n_iv_cands (data); i++)
2275     {
2276       struct iv_cand *cand = iv_cand (data, i);
2277
2278       if (cand->important)
2279         bitmap_set_bit (data->important_candidates, i);
2280     }
2281
2282   data->consider_all_candidates = (n_iv_cands (data)
2283                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2284
2285   if (data->consider_all_candidates)
2286     {
2287       /* We will not need "related_cands" bitmaps in this case,
2288          so release them to decrease peak memory consumption.  */
2289       for (i = 0; i < n_iv_uses (data); i++)
2290         {
2291           use = iv_use (data, i);
2292           BITMAP_FREE (use->related_cands);
2293         }
2294     }
2295   else
2296     {
2297       /* Add important candidates to the related_cands bitmaps.  */
2298       for (i = 0; i < n_iv_uses (data); i++)
2299         bitmap_ior_into (iv_use (data, i)->related_cands,
2300                          data->important_candidates);
2301     }
2302 }
2303
2304 /* Finds the candidates for the induction variables.  */
2305
2306 static void
2307 find_iv_candidates (struct ivopts_data *data)
2308 {
2309   /* Add commonly used ivs.  */
2310   add_standard_iv_candidates (data);
2311
2312   /* Add old induction variables.  */
2313   add_old_ivs_candidates (data);
2314
2315   /* Add induction variables derived from uses.  */
2316   add_derived_ivs_candidates (data);
2317
2318   /* Record the important candidates.  */
2319   record_important_candidates (data);
2320 }
2321
2322 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2323    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2324    we allocate a simple list to every use.  */
2325
2326 static void
2327 alloc_use_cost_map (struct ivopts_data *data)
2328 {
2329   unsigned i, size, s, j;
2330
2331   for (i = 0; i < n_iv_uses (data); i++)
2332     {
2333       struct iv_use *use = iv_use (data, i);
2334       bitmap_iterator bi;
2335
2336       if (data->consider_all_candidates)
2337         size = n_iv_cands (data);
2338       else
2339         {
2340           s = 0;
2341           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2342             {
2343               s++;
2344             }
2345
2346           /* Round up to the power of two, so that moduling by it is fast.  */
2347           for (size = 1; size < s; size <<= 1)
2348             continue;
2349         }
2350
2351       use->n_map_members = size;
2352       use->cost_map = XCNEWVEC (struct cost_pair, size);
2353     }
2354 }
2355
2356 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2357    on invariants DEPENDS_ON and that the value used in expressing it
2358    is VALUE.*/
2359
2360 static void
2361 set_use_iv_cost (struct ivopts_data *data,
2362                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2363                  bitmap depends_on, tree value)
2364 {
2365   unsigned i, s;
2366
2367   if (cost == INFTY)
2368     {
2369       BITMAP_FREE (depends_on);
2370       return;
2371     }
2372
2373   if (data->consider_all_candidates)
2374     {
2375       use->cost_map[cand->id].cand = cand;
2376       use->cost_map[cand->id].cost = cost;
2377       use->cost_map[cand->id].depends_on = depends_on;
2378       use->cost_map[cand->id].value = value;
2379       return;
2380     }
2381
2382   /* n_map_members is a power of two, so this computes modulo.  */
2383   s = cand->id & (use->n_map_members - 1);
2384   for (i = s; i < use->n_map_members; i++)
2385     if (!use->cost_map[i].cand)
2386       goto found;
2387   for (i = 0; i < s; i++)
2388     if (!use->cost_map[i].cand)
2389       goto found;
2390
2391   gcc_unreachable ();
2392
2393 found:
2394   use->cost_map[i].cand = cand;
2395   use->cost_map[i].cost = cost;
2396   use->cost_map[i].depends_on = depends_on;
2397   use->cost_map[i].value = value;
2398 }
2399
2400 /* Gets cost of (USE, CANDIDATE) pair.  */
2401
2402 static struct cost_pair *
2403 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2404                  struct iv_cand *cand)
2405 {
2406   unsigned i, s;
2407   struct cost_pair *ret;
2408
2409   if (!cand)
2410     return NULL;
2411
2412   if (data->consider_all_candidates)
2413     {
2414       ret = use->cost_map + cand->id;
2415       if (!ret->cand)
2416         return NULL;
2417
2418       return ret;
2419     }
2420
2421   /* n_map_members is a power of two, so this computes modulo.  */
2422   s = cand->id & (use->n_map_members - 1);
2423   for (i = s; i < use->n_map_members; i++)
2424     if (use->cost_map[i].cand == cand)
2425       return use->cost_map + i;
2426
2427   for (i = 0; i < s; i++)
2428     if (use->cost_map[i].cand == cand)
2429       return use->cost_map + i;
2430
2431   return NULL;
2432 }
2433
2434 /* Returns estimate on cost of computing SEQ.  */
2435
2436 static unsigned
2437 seq_cost (rtx seq)
2438 {
2439   unsigned cost = 0;
2440   rtx set;
2441
2442   for (; seq; seq = NEXT_INSN (seq))
2443     {
2444       set = single_set (seq);
2445       if (set)
2446         cost += rtx_cost (set, SET);
2447       else
2448         cost++;
2449     }
2450
2451   return cost;
2452 }
2453
2454 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2455 static rtx
2456 produce_memory_decl_rtl (tree obj, int *regno)
2457 {
2458   rtx x;
2459
2460   gcc_assert (obj);
2461   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2462     {
2463       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2464       x = gen_rtx_SYMBOL_REF (Pmode, name);
2465     }
2466   else
2467     x = gen_raw_REG (Pmode, (*regno)++);
2468
2469   return gen_rtx_MEM (DECL_MODE (obj), x);
2470 }
2471
2472 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2473    walk_tree.  DATA contains the actual fake register number.  */
2474
2475 static tree
2476 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2477 {
2478   tree obj = NULL_TREE;
2479   rtx x = NULL_RTX;
2480   int *regno = data;
2481
2482   switch (TREE_CODE (*expr_p))
2483     {
2484     case ADDR_EXPR:
2485       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2486            handled_component_p (*expr_p);
2487            expr_p = &TREE_OPERAND (*expr_p, 0))
2488         continue;
2489       obj = *expr_p;
2490       if (DECL_P (obj))
2491         x = produce_memory_decl_rtl (obj, regno);
2492       break;
2493
2494     case SSA_NAME:
2495       *ws = 0;
2496       obj = SSA_NAME_VAR (*expr_p);
2497       if (!DECL_RTL_SET_P (obj))
2498         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2499       break;
2500
2501     case VAR_DECL:
2502     case PARM_DECL:
2503     case RESULT_DECL:
2504       *ws = 0;
2505       obj = *expr_p;
2506
2507       if (DECL_RTL_SET_P (obj))
2508         break;
2509
2510       if (DECL_MODE (obj) == BLKmode)
2511         x = produce_memory_decl_rtl (obj, regno);
2512       else
2513         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2514
2515       break;
2516
2517     default:
2518       break;
2519     }
2520
2521   if (x)
2522     {
2523       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2524       SET_DECL_RTL (obj, x);
2525     }
2526
2527   return NULL_TREE;
2528 }
2529
2530 /* Determines cost of the computation of EXPR.  */
2531
2532 static unsigned
2533 computation_cost (tree expr)
2534 {
2535   rtx seq, rslt;
2536   tree type = TREE_TYPE (expr);
2537   unsigned cost;
2538   /* Avoid using hard regs in ways which may be unsupported.  */
2539   int regno = LAST_VIRTUAL_REGISTER + 1;
2540
2541   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2542   start_sequence ();
2543   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2544   seq = get_insns ();
2545   end_sequence ();
2546
2547   cost = seq_cost (seq);
2548   if (MEM_P (rslt))
2549     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2550
2551   return cost;
2552 }
2553
2554 /* Returns variable containing the value of candidate CAND at statement AT.  */
2555
2556 static tree
2557 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2558 {
2559   if (stmt_after_increment (loop, cand, stmt))
2560     return cand->var_after;
2561   else
2562     return cand->var_before;
2563 }
2564
2565 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2566    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2567
2568 int
2569 tree_int_cst_sign_bit (tree t)
2570 {
2571   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2572   unsigned HOST_WIDE_INT w;
2573
2574   if (bitno < HOST_BITS_PER_WIDE_INT)
2575     w = TREE_INT_CST_LOW (t);
2576   else
2577     {
2578       w = TREE_INT_CST_HIGH (t);
2579       bitno -= HOST_BITS_PER_WIDE_INT;
2580     }
2581
2582   return (w >> bitno) & 1;
2583 }
2584
2585 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2586    return cst.  Otherwise return NULL_TREE.  */
2587
2588 static tree
2589 constant_multiple_of (tree type, tree top, tree bot)
2590 {
2591   tree res, mby, p0, p1;
2592   enum tree_code code;
2593   bool negate;
2594
2595   STRIP_NOPS (top);
2596   STRIP_NOPS (bot);
2597
2598   if (operand_equal_p (top, bot, 0))
2599     return build_int_cst (type, 1);
2600
2601   code = TREE_CODE (top);
2602   switch (code)
2603     {
2604     case MULT_EXPR:
2605       mby = TREE_OPERAND (top, 1);
2606       if (TREE_CODE (mby) != INTEGER_CST)
2607         return NULL_TREE;
2608
2609       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2610       if (!res)
2611         return NULL_TREE;
2612
2613       return fold_binary_to_constant (MULT_EXPR, type, res,
2614                                       fold_convert (type, mby));
2615
2616     case PLUS_EXPR:
2617     case MINUS_EXPR:
2618       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2619       if (!p0)
2620         return NULL_TREE;
2621       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2622       if (!p1)
2623         return NULL_TREE;
2624
2625       return fold_binary_to_constant (code, type, p0, p1);
2626
2627     case INTEGER_CST:
2628       if (TREE_CODE (bot) != INTEGER_CST)
2629         return NULL_TREE;
2630
2631       bot = fold_convert (type, bot);
2632       top = fold_convert (type, top);
2633
2634       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2635          the result afterwards.  */
2636       if (tree_int_cst_sign_bit (bot))
2637         {
2638           negate = true;
2639           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2640         }
2641       else
2642         negate = false;
2643
2644       /* Ditto for TOP.  */
2645       if (tree_int_cst_sign_bit (top))
2646         {
2647           negate = !negate;
2648           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2649         }
2650
2651       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2652         return NULL_TREE;
2653
2654       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2655       if (negate)
2656         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2657       return res;
2658
2659     default:
2660       return NULL_TREE;
2661     }
2662 }
2663
2664 /* Sets COMB to CST.  */
2665
2666 static void
2667 aff_combination_const (struct affine_tree_combination *comb, tree type,
2668                        unsigned HOST_WIDE_INT cst)
2669 {
2670   unsigned prec = TYPE_PRECISION (type);
2671
2672   comb->type = type;
2673   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2674
2675   comb->n = 0;
2676   comb->rest = NULL_TREE;
2677   comb->offset = cst & comb->mask;
2678 }
2679
2680 /* Sets COMB to single element ELT.  */
2681
2682 static void
2683 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2684 {
2685   unsigned prec = TYPE_PRECISION (type);
2686
2687   comb->type = type;
2688   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2689
2690   comb->n = 1;
2691   comb->elts[0] = elt;
2692   comb->coefs[0] = 1;
2693   comb->rest = NULL_TREE;
2694   comb->offset = 0;
2695 }
2696
2697 /* Scales COMB by SCALE.  */
2698
2699 static void
2700 aff_combination_scale (struct affine_tree_combination *comb,
2701                        unsigned HOST_WIDE_INT scale)
2702 {
2703   unsigned i, j;
2704
2705   if (scale == 1)
2706     return;
2707
2708   if (scale == 0)
2709     {
2710       aff_combination_const (comb, comb->type, 0);
2711       return;
2712     }
2713
2714   comb->offset = (scale * comb->offset) & comb->mask;
2715   for (i = 0, j = 0; i < comb->n; i++)
2716     {
2717       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2718       comb->elts[j] = comb->elts[i];
2719       if (comb->coefs[j] != 0)
2720         j++;
2721     }
2722   comb->n = j;
2723
2724   if (comb->rest)
2725     {
2726       if (comb->n < MAX_AFF_ELTS)
2727         {
2728           comb->coefs[comb->n] = scale;
2729           comb->elts[comb->n] = comb->rest;
2730           comb->rest = NULL_TREE;
2731           comb->n++;
2732         }
2733       else
2734         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2735                                   build_int_cst_type (comb->type, scale));
2736     }
2737 }
2738
2739 /* Adds ELT * SCALE to COMB.  */
2740
2741 static void
2742 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2743                          unsigned HOST_WIDE_INT scale)
2744 {
2745   unsigned i;
2746
2747   if (scale == 0)
2748     return;
2749
2750   for (i = 0; i < comb->n; i++)
2751     if (operand_equal_p (comb->elts[i], elt, 0))
2752       {
2753         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2754         if (comb->coefs[i])
2755           return;
2756
2757         comb->n--;
2758         comb->coefs[i] = comb->coefs[comb->n];
2759         comb->elts[i] = comb->elts[comb->n];
2760
2761         if (comb->rest)
2762           {
2763             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2764             comb->coefs[comb->n] = 1;
2765             comb->elts[comb->n] = comb->rest;
2766             comb->rest = NULL_TREE;
2767             comb->n++;
2768           }
2769         return;
2770       }
2771   if (comb->n < MAX_AFF_ELTS)
2772     {
2773       comb->coefs[comb->n] = scale;
2774       comb->elts[comb->n] = elt;
2775       comb->n++;
2776       return;
2777     }
2778
2779   if (scale == 1)
2780     elt = fold_convert (comb->type, elt);
2781   else
2782     elt = fold_build2 (MULT_EXPR, comb->type,
2783                        fold_convert (comb->type, elt),
2784                        build_int_cst_type (comb->type, scale));
2785
2786   if (comb->rest)
2787     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2788   else
2789     comb->rest = elt;
2790 }
2791
2792 /* Adds COMB2 to COMB1.  */
2793
2794 static void
2795 aff_combination_add (struct affine_tree_combination *comb1,
2796                      struct affine_tree_combination *comb2)
2797 {
2798   unsigned i;
2799
2800   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2801   for (i = 0; i < comb2->n; i++)
2802     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2803   if (comb2->rest)
2804     aff_combination_add_elt (comb1, comb2->rest, 1);
2805 }
2806
2807 /* Splits EXPR into an affine combination of parts.  */
2808
2809 static void
2810 tree_to_aff_combination (tree expr, tree type,
2811                          struct affine_tree_combination *comb)
2812 {
2813   struct affine_tree_combination tmp;
2814   enum tree_code code;
2815   tree cst, core, toffset;
2816   HOST_WIDE_INT bitpos, bitsize;
2817   enum machine_mode mode;
2818   int unsignedp, volatilep;
2819
2820   STRIP_NOPS (expr);
2821
2822   code = TREE_CODE (expr);
2823   switch (code)
2824     {
2825     case INTEGER_CST:
2826       aff_combination_const (comb, type, int_cst_value (expr));
2827       return;
2828
2829     case PLUS_EXPR:
2830     case MINUS_EXPR:
2831       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2832       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2833       if (code == MINUS_EXPR)
2834         aff_combination_scale (&tmp, -1);
2835       aff_combination_add (comb, &tmp);
2836       return;
2837
2838     case MULT_EXPR:
2839       cst = TREE_OPERAND (expr, 1);
2840       if (TREE_CODE (cst) != INTEGER_CST)
2841         break;
2842       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2843       aff_combination_scale (comb, int_cst_value (cst));
2844       return;
2845
2846     case NEGATE_EXPR:
2847       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2848       aff_combination_scale (comb, -1);
2849       return;
2850
2851     case ADDR_EXPR:
2852       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2853                                   &toffset, &mode, &unsignedp, &volatilep,
2854                                   false);
2855       if (bitpos % BITS_PER_UNIT != 0)
2856         break;
2857       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2858       core = build_fold_addr_expr (core);
2859       if (TREE_CODE (core) == ADDR_EXPR)
2860         aff_combination_add_elt (comb, core, 1);
2861       else
2862         {
2863           tree_to_aff_combination (core, type, &tmp);
2864           aff_combination_add (comb, &tmp);
2865         }
2866       if (toffset)
2867         {
2868           tree_to_aff_combination (toffset, type, &tmp);
2869           aff_combination_add (comb, &tmp);
2870         }
2871       return;
2872
2873     default:
2874       break;
2875     }
2876
2877   aff_combination_elt (comb, type, expr);
2878 }
2879
2880 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2881
2882 static tree
2883 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2884                  unsigned HOST_WIDE_INT mask)
2885 {
2886   enum tree_code code;
2887
2888   scale &= mask;
2889   elt = fold_convert (type, elt);
2890
2891   if (scale == 1)
2892     {
2893       if (!expr)
2894         return elt;
2895
2896       return fold_build2 (PLUS_EXPR, type, expr, elt);
2897     }
2898
2899   if (scale == mask)
2900     {
2901       if (!expr)
2902         return fold_build1 (NEGATE_EXPR, type, elt);
2903
2904       return fold_build2 (MINUS_EXPR, type, expr, elt);
2905     }
2906
2907   if (!expr)
2908     return fold_build2 (MULT_EXPR, type, elt,
2909                         build_int_cst_type (type, scale));
2910
2911   if ((scale | (mask >> 1)) == mask)
2912     {
2913       /* Scale is negative.  */
2914       code = MINUS_EXPR;
2915       scale = (-scale) & mask;
2916     }
2917   else
2918     code = PLUS_EXPR;
2919
2920   elt = fold_build2 (MULT_EXPR, type, elt,
2921                      build_int_cst_type (type, scale));
2922   return fold_build2 (code, type, expr, elt);
2923 }
2924
2925 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2926
2927 static void
2928 unshare_aff_combination (struct affine_tree_combination *comb)
2929 {
2930   unsigned i;
2931
2932   for (i = 0; i < comb->n; i++)
2933     comb->elts[i] = unshare_expr (comb->elts[i]);
2934   if (comb->rest)
2935     comb->rest = unshare_expr (comb->rest);
2936 }
2937
2938 /* Makes tree from the affine combination COMB.  */
2939
2940 static tree
2941 aff_combination_to_tree (struct affine_tree_combination *comb)
2942 {
2943   tree type = comb->type;
2944   tree expr = comb->rest;
2945   unsigned i;
2946   unsigned HOST_WIDE_INT off, sgn;
2947
2948   /* Handle the special case produced by get_computation_aff when
2949      the type does not fit in HOST_WIDE_INT.  */
2950   if (comb->n == 0 && comb->offset == 0)
2951     return fold_convert (type, expr);
2952
2953   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2954
2955   for (i = 0; i < comb->n; i++)
2956     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2957                             comb->mask);
2958
2959   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2960     {
2961       /* Offset is negative.  */
2962       off = (-comb->offset) & comb->mask;
2963       sgn = comb->mask;
2964     }
2965   else
2966     {
2967       off = comb->offset;
2968       sgn = 1;
2969     }
2970   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2971                           comb->mask);
2972 }
2973
2974 /* Determines the expression by that USE is expressed from induction variable
2975    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2976    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2977
2978 static bool
2979 get_computation_aff (struct loop *loop,
2980                      struct iv_use *use, struct iv_cand *cand, tree at,
2981                      struct affine_tree_combination *aff)
2982 {
2983   tree ubase = use->iv->base;
2984   tree ustep = use->iv->step;
2985   tree cbase = cand->iv->base;
2986   tree cstep = cand->iv->step;
2987   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2988   tree uutype;
2989   tree expr, delta;
2990   tree ratio;
2991   unsigned HOST_WIDE_INT ustepi, cstepi;
2992   HOST_WIDE_INT ratioi;
2993   struct affine_tree_combination cbase_aff, expr_aff;
2994   tree cstep_orig = cstep, ustep_orig = ustep;
2995
2996   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2997     {
2998       /* We do not have a precision to express the values of use.  */
2999       return false;
3000     }
3001
3002   expr = var_at_stmt (loop, cand, at);
3003
3004   if (TREE_TYPE (expr) != ctype)
3005     {
3006       /* This may happen with the original ivs.  */
3007       expr = fold_convert (ctype, expr);
3008     }
3009
3010   if (TYPE_UNSIGNED (utype))
3011     uutype = utype;
3012   else
3013     {
3014       uutype = unsigned_type_for (utype);
3015       ubase = fold_convert (uutype, ubase);
3016       ustep = fold_convert (uutype, ustep);
3017     }
3018
3019   if (uutype != ctype)
3020     {
3021       expr = fold_convert (uutype, expr);
3022       cbase = fold_convert (uutype, cbase);
3023       cstep = fold_convert (uutype, cstep);
3024
3025       /* If the conversion is not noop, we must take it into account when
3026          considering the value of the step.  */
3027       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3028         cstep_orig = cstep;
3029     }
3030
3031   if (cst_and_fits_in_hwi (cstep_orig)
3032       && cst_and_fits_in_hwi (ustep_orig))
3033     {
3034       ustepi = int_cst_value (ustep_orig);
3035       cstepi = int_cst_value (cstep_orig);
3036
3037       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3038         {
3039           /* TODO maybe consider case when ustep divides cstep and the ratio is
3040              a power of 2 (so that the division is fast to execute)?  We would
3041              need to be much more careful with overflows etc. then.  */
3042           return false;
3043         }
3044
3045       ratio = build_int_cst_type (uutype, ratioi);
3046     }
3047   else
3048     {
3049       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3050       if (!ratio)
3051         return false;
3052
3053       /* Ratioi is only used to detect special cases when the multiplicative
3054          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3055          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3056          to integer_onep/integer_all_onesp, since the former ignores
3057          TREE_OVERFLOW.  */
3058       if (cst_and_fits_in_hwi (ratio))
3059         ratioi = int_cst_value (ratio);
3060       else if (integer_onep (ratio))
3061         ratioi = 1;
3062       else if (integer_all_onesp (ratio))
3063         ratioi = -1;
3064       else
3065         ratioi = 0;
3066     }
3067
3068   /* We may need to shift the value if we are after the increment.  */
3069   if (stmt_after_increment (loop, cand, at))
3070     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3071
3072   /* use = ubase - ratio * cbase + ratio * var.
3073
3074      In general case ubase + ratio * (var - cbase) could be better (one less
3075      multiplication), but often it is possible to eliminate redundant parts
3076      of computations from (ubase - ratio * cbase) term, and if it does not
3077      happen, fold is able to apply the distributive law to obtain this form
3078      anyway.  */
3079
3080   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3081     {
3082       /* Let's compute in trees and just return the result in AFF.  This case
3083          should not be very common, and fold itself is not that bad either,
3084          so making the aff. functions more complicated to handle this case
3085          is not that urgent.  */
3086       if (ratioi == 1)
3087         {
3088           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3089           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3090         }
3091       else if (ratioi == -1)
3092         {
3093           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3094           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3095         }
3096       else
3097         {
3098           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3099           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3100           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3101           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3102         }
3103
3104       aff->type = uutype;
3105       aff->n = 0;
3106       aff->offset = 0;
3107       aff->mask = 0;
3108       aff->rest = expr;
3109       return true;
3110     }
3111
3112   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3113      possible to compute ratioi.  */
3114   gcc_assert (ratioi);
3115
3116   tree_to_aff_combination (ubase, uutype, aff);
3117   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3118   tree_to_aff_combination (expr, uutype, &expr_aff);
3119   aff_combination_scale (&cbase_aff, -ratioi);
3120   aff_combination_scale (&expr_aff, ratioi);
3121   aff_combination_add (aff, &cbase_aff);
3122   aff_combination_add (aff, &expr_aff);
3123
3124   return true;
3125 }
3126
3127 /* Determines the expression by that USE is expressed from induction variable
3128    CAND at statement AT in LOOP.  The computation is unshared.  */
3129
3130 static tree
3131 get_computation_at (struct loop *loop,
3132                     struct iv_use *use, struct iv_cand *cand, tree at)
3133 {
3134   struct affine_tree_combination aff;
3135   tree type = TREE_TYPE (use->iv->base);
3136
3137   if (!get_computation_aff (loop, use, cand, at, &aff))
3138     return NULL_TREE;
3139   unshare_aff_combination (&aff);
3140   return fold_convert (type, aff_combination_to_tree (&aff));
3141 }
3142
3143 /* Determines the expression by that USE is expressed from induction variable
3144    CAND in LOOP.  The computation is unshared.  */
3145
3146 static tree
3147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3148 {
3149   return get_computation_at (loop, use, cand, use->stmt);
3150 }
3151
3152 /* Returns cost of addition in MODE.  */
3153
3154 static unsigned
3155 add_cost (enum machine_mode mode)
3156 {
3157   static unsigned costs[NUM_MACHINE_MODES];
3158   rtx seq;
3159   unsigned cost;
3160
3161   if (costs[mode])
3162     return costs[mode];
3163
3164   start_sequence ();
3165   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3166                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3167                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3168                  NULL_RTX);
3169   seq = get_insns ();
3170   end_sequence ();
3171
3172   cost = seq_cost (seq);
3173   if (!cost)
3174     cost = 1;
3175
3176   costs[mode] = cost;
3177
3178   if (dump_file && (dump_flags & TDF_DETAILS))
3179     fprintf (dump_file, "Addition in %s costs %d\n",
3180              GET_MODE_NAME (mode), cost);
3181   return cost;
3182 }
3183
3184 /* Entry in a hashtable of already known costs for multiplication.  */
3185 struct mbc_entry
3186 {
3187   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3188   enum machine_mode mode;       /* In mode.  */
3189   unsigned cost;                /* The cost.  */
3190 };
3191
3192 /* Counts hash value for the ENTRY.  */
3193
3194 static hashval_t
3195 mbc_entry_hash (const void *entry)
3196 {
3197   const struct mbc_entry *e = entry;
3198
3199   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3200 }
3201
3202 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3203
3204 static int
3205 mbc_entry_eq (const void *entry1, const void *entry2)
3206 {
3207   const struct mbc_entry *e1 = entry1;
3208   const struct mbc_entry *e2 = entry2;
3209
3210   return (e1->mode == e2->mode
3211           && e1->cst == e2->cst);
3212 }
3213
3214 /* Returns cost of multiplication by constant CST in MODE.  */
3215
3216 unsigned
3217 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3218 {
3219   static htab_t costs;
3220   struct mbc_entry **cached, act;
3221   rtx seq;
3222   unsigned cost;
3223
3224   if (!costs)
3225     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3226
3227   act.mode = mode;
3228   act.cst = cst;
3229   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3230   if (*cached)
3231     return (*cached)->cost;
3232
3233   *cached = XNEW (struct mbc_entry);
3234   (*cached)->mode = mode;
3235   (*cached)->cst = cst;
3236
3237   start_sequence ();
3238   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3239                gen_int_mode (cst, mode), NULL_RTX, 0);
3240   seq = get_insns ();
3241   end_sequence ();
3242
3243   cost = seq_cost (seq);
3244
3245   if (dump_file && (dump_flags & TDF_DETAILS))
3246     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3247              (int) cst, GET_MODE_NAME (mode), cost);
3248
3249   (*cached)->cost = cost;
3250
3251   return cost;
3252 }
3253
3254 /* Returns true if multiplying by RATIO is allowed in address.  */
3255
3256 bool
3257 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3258 {
3259 #define MAX_RATIO 128
3260   static sbitmap valid_mult;
3261
3262   if (!valid_mult)
3263     {
3264       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3265       rtx addr;
3266       HOST_WIDE_INT i;
3267
3268       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3269       sbitmap_zero (valid_mult);
3270       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3271       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3272         {
3273           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3274           if (memory_address_p (Pmode, addr))
3275             SET_BIT (valid_mult, i + MAX_RATIO);
3276         }
3277
3278       if (dump_file && (dump_flags & TDF_DETAILS))
3279         {
3280           fprintf (dump_file, "  allowed multipliers:");
3281           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3282             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3283               fprintf (dump_file, " %d", (int) i);
3284           fprintf (dump_file, "\n");
3285           fprintf (dump_file, "\n");
3286         }
3287     }
3288
3289   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3290     return false;
3291
3292   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3293 }
3294
3295 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3296    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3297    variable is omitted.  The created memory accesses MODE.
3298
3299    TODO -- there must be some better way.  This all is quite crude.  */
3300
3301 static unsigned
3302 get_address_cost (bool symbol_present, bool var_present,
3303                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3304 {
3305   static bool initialized = false;
3306   static HOST_WIDE_INT rat, off;
3307   static HOST_WIDE_INT min_offset, max_offset;
3308   static unsigned costs[2][2][2][2];
3309   unsigned cost, acost;
3310   rtx seq, addr, base;
3311   bool offset_p, ratio_p;
3312   rtx reg1;
3313   HOST_WIDE_INT s_offset;
3314   unsigned HOST_WIDE_INT mask;
3315   unsigned bits;
3316
3317   if (!initialized)
3318     {
3319       HOST_WIDE_INT i;
3320       initialized = true;
3321
3322       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3323
3324       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3325       for (i = 1; i <= 1 << 20; i <<= 1)
3326         {
3327           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3328           if (!memory_address_p (Pmode, addr))
3329             break;
3330         }
3331       max_offset = i >> 1;
3332       off = max_offset;
3333
3334       for (i = 1; i <= 1 << 20; i <<= 1)
3335         {
3336           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3337           if (!memory_address_p (Pmode, addr))
3338             break;
3339         }
3340       min_offset = -(i >> 1);
3341
3342       if (dump_file && (dump_flags & TDF_DETAILS))
3343         {
3344           fprintf (dump_file, "get_address_cost:\n");
3345           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3346           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3347         }
3348
3349       rat = 1;
3350       for (i = 2; i <= MAX_RATIO; i++)
3351         if (multiplier_allowed_in_address_p (i))
3352           {
3353             rat = i;
3354             break;
3355           }
3356     }
3357
3358   bits = GET_MODE_BITSIZE (Pmode);
3359   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3360   offset &= mask;
3361   if ((offset >> (bits - 1) & 1))
3362     offset |= ~mask;
3363   s_offset = offset;
3364
3365   cost = 0;
3366   offset_p = (s_offset != 0
3367               && min_offset <= s_offset && s_offset <= max_offset);
3368   ratio_p = (ratio != 1
3369              && multiplier_allowed_in_address_p (ratio));
3370
3371   if (ratio != 1 && !ratio_p)
3372     cost += multiply_by_cost (ratio, Pmode);
3373
3374   if (s_offset && !offset_p && !symbol_present)
3375     {
3376       cost += add_cost (Pmode);
3377       var_present = true;
3378     }
3379
3380   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3381   if (!acost)
3382     {
3383       int old_cse_not_expected;
3384       acost = 0;
3385
3386       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3387       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3388       if (ratio_p)
3389         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3390
3391       if (var_present)
3392         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3393
3394       if (symbol_present)
3395         {
3396           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3397           if (offset_p)
3398             base = gen_rtx_fmt_e (CONST, Pmode,
3399                                   gen_rtx_fmt_ee (PLUS, Pmode,
3400                                                   base,
3401                                                   gen_int_mode (off, Pmode)));
3402         }
3403       else if (offset_p)
3404         base = gen_int_mode (off, Pmode);
3405       else
3406         base = NULL_RTX;
3407
3408       if (base)
3409         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3410
3411       start_sequence ();
3412       /* To avoid splitting addressing modes, pretend that no cse will
3413          follow.  */
3414       old_cse_not_expected = cse_not_expected;
3415       cse_not_expected = true;
3416       addr = memory_address (Pmode, addr);
3417       cse_not_expected = old_cse_not_expected;
3418       seq = get_insns ();
3419       end_sequence ();
3420
3421       acost = seq_cost (seq);
3422       acost += address_cost (addr, Pmode);
3423
3424       if (!acost)
3425         acost = 1;
3426       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3427     }
3428
3429   return cost + acost;
3430 }
3431
3432 /* Estimates cost of forcing expression EXPR into a variable.  */
3433
3434 unsigned
3435 force_expr_to_var_cost (tree expr)
3436 {
3437   static bool costs_initialized = false;
3438   static unsigned integer_cost;
3439   static unsigned symbol_cost;
3440   static unsigned address_cost;
3441   tree op0, op1;
3442   unsigned cost0, cost1, cost;
3443   enum machine_mode mode;
3444
3445   if (!costs_initialized)
3446     {
3447       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3448       rtx x = gen_rtx_MEM (DECL_MODE (var),
3449                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3450       tree addr;
3451       tree type = build_pointer_type (integer_type_node);
3452
3453       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3454                                                            2000));
3455
3456       SET_DECL_RTL (var, x);
3457       TREE_STATIC (var) = 1;
3458       addr = build1 (ADDR_EXPR, type, var);
3459       symbol_cost = computation_cost (addr) + 1;
3460
3461       address_cost
3462         = computation_cost (build2 (PLUS_EXPR, type,
3463                                     addr,
3464                                     build_int_cst_type (type, 2000))) + 1;
3465       if (dump_file && (dump_flags & TDF_DETAILS))
3466         {
3467           fprintf (dump_file, "force_expr_to_var_cost:\n");
3468           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3469           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3470           fprintf (dump_file, "  address %d\n", (int) address_cost);
3471           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3472           fprintf (dump_file, "\n");
3473         }
3474
3475       costs_initialized = true;
3476     }
3477
3478   STRIP_NOPS (expr);
3479
3480   if (SSA_VAR_P (expr))
3481     return 0;
3482
3483   if (TREE_INVARIANT (expr))
3484     {
3485       if (TREE_CODE (expr) == INTEGER_CST)
3486         return integer_cost;
3487
3488       if (TREE_CODE (expr) == ADDR_EXPR)
3489         {
3490           tree obj = TREE_OPERAND (expr, 0);
3491
3492           if (TREE_CODE (obj) == VAR_DECL
3493               || TREE_CODE (obj) == PARM_DECL
3494               || TREE_CODE (obj) == RESULT_DECL)
3495             return symbol_cost;
3496         }
3497
3498       return address_cost;
3499     }
3500
3501   switch (TREE_CODE (expr))
3502     {
3503     case PLUS_EXPR:
3504     case MINUS_EXPR:
3505     case MULT_EXPR:
3506       op0 = TREE_OPERAND (expr, 0);
3507       op1 = TREE_OPERAND (expr, 1);
3508       STRIP_NOPS (op0);
3509       STRIP_NOPS (op1);
3510
3511       if (is_gimple_val (op0))
3512         cost0 = 0;
3513       else
3514         cost0 = force_expr_to_var_cost (op0);
3515
3516       if (is_gimple_val (op1))
3517         cost1 = 0;
3518       else
3519         cost1 = force_expr_to_var_cost (op1);
3520
3521       break;
3522
3523     default:
3524       /* Just an arbitrary value, FIXME.  */
3525       return target_spill_cost;
3526     }
3527
3528   mode = TYPE_MODE (TREE_TYPE (expr));
3529   switch (TREE_CODE (expr))
3530     {
3531     case PLUS_EXPR:
3532     case MINUS_EXPR:
3533       cost = add_cost (mode);
3534       break;
3535
3536     case MULT_EXPR:
3537       if (cst_and_fits_in_hwi (op0))
3538         cost = multiply_by_cost (int_cst_value (op0), mode);
3539       else if (cst_and_fits_in_hwi (op1))
3540         cost = multiply_by_cost (int_cst_value (op1), mode);
3541       else
3542         return target_spill_cost;
3543       break;
3544
3545     default:
3546       gcc_unreachable ();
3547     }
3548
3549   cost += cost0;
3550   cost += cost1;
3551
3552   /* Bound the cost by target_spill_cost.  The parts of complicated
3553      computations often are either loop invariant or at least can
3554      be shared between several iv uses, so letting this grow without
3555      limits would not give reasonable results.  */
3556   return cost < target_spill_cost ? cost : target_spill_cost;
3557 }
3558
3559 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3560    invariants the computation depends on.  */
3561
3562 static unsigned
3563 force_var_cost (struct ivopts_data *data,
3564                 tree expr, bitmap *depends_on)
3565 {
3566   if (depends_on)
3567     {
3568       fd_ivopts_data = data;
3569       walk_tree (&expr, find_depends, depends_on, NULL);
3570     }
3571
3572   return force_expr_to_var_cost (expr);
3573 }
3574
3575 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3576    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3577    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3578    invariants the computation depends on.  */
3579
3580 static unsigned
3581 split_address_cost (struct ivopts_data *data,
3582                     tree addr, bool *symbol_present, bool *var_present,
3583                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3584 {
3585   tree core;
3586   HOST_WIDE_INT bitsize;
3587   HOST_WIDE_INT bitpos;
3588   tree toffset;
3589   enum machine_mode mode;
3590   int unsignedp, volatilep;
3591
3592   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3593                               &unsignedp, &volatilep, false);
3594
3595   if (toffset != 0
3596       || bitpos % BITS_PER_UNIT != 0
3597       || TREE_CODE (core) != VAR_DECL)
3598     {
3599       *symbol_present = false;
3600       *var_present = true;
3601       fd_ivopts_data = data;
3602       walk_tree (&addr, find_depends, depends_on, NULL);
3603       return target_spill_cost;
3604     }
3605
3606   *offset += bitpos / BITS_PER_UNIT;
3607   if (TREE_STATIC (core)
3608       || DECL_EXTERNAL (core))
3609     {
3610       *symbol_present = true;
3611       *var_present = false;
3612       return 0;
3613     }
3614
3615   *symbol_present = false;
3616   *var_present = true;
3617   return 0;
3618 }
3619
3620 /* Estimates cost of expressing difference of addresses E1 - E2 as
3621    var + symbol + offset.  The value of offset is added to OFFSET,
3622    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3623    part is missing.  DEPENDS_ON is a set of the invariants the computation
3624    depends on.  */
3625
3626 static unsigned
3627 ptr_difference_cost (struct ivopts_data *data,
3628                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3629                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3630 {
3631   HOST_WIDE_INT diff = 0;
3632   unsigned cost;
3633
3634   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3635
3636   if (ptr_difference_const (e1, e2, &diff))
3637     {
3638       *offset += diff;
3639       *symbol_present = false;
3640       *var_present = false;
3641       return 0;
3642     }
3643
3644   if (e2 == integer_zero_node)
3645     return split_address_cost (data, TREE_OPERAND (e1, 0),
3646                                symbol_present, var_present, offset, depends_on);
3647
3648   *symbol_present = false;
3649   *var_present = true;
3650
3651   cost = force_var_cost (data, e1, depends_on);
3652   cost += force_var_cost (data, e2, depends_on);
3653   cost += add_cost (Pmode);
3654
3655   return cost;
3656 }
3657
3658 /* Estimates cost of expressing difference E1 - E2 as
3659    var + symbol + offset.  The value of offset is added to OFFSET,
3660    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3661    part is missing.  DEPENDS_ON is a set of the invariants the computation
3662    depends on.  */
3663
3664 static unsigned
3665 difference_cost (struct ivopts_data *data,
3666                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3667                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3668 {
3669   unsigned cost;
3670   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3671   unsigned HOST_WIDE_INT off1, off2;
3672
3673   e1 = strip_offset (e1, &off1);
3674   e2 = strip_offset (e2, &off2);
3675   *offset += off1 - off2;
3676
3677   STRIP_NOPS (e1);
3678   STRIP_NOPS (e2);
3679
3680   if (TREE_CODE (e1) == ADDR_EXPR)
3681     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3682                                 depends_on);
3683   *symbol_present = false;
3684
3685   if (operand_equal_p (e1, e2, 0))
3686     {
3687       *var_present = false;
3688       return 0;
3689     }
3690   *var_present = true;
3691   if (zero_p (e2))
3692     return force_var_cost (data, e1, depends_on);
3693
3694   if (zero_p (e1))
3695     {
3696       cost = force_var_cost (data, e2, depends_on);
3697       cost += multiply_by_cost (-1, mode);
3698
3699       return cost;
3700     }
3701
3702   cost = force_var_cost (data, e1, depends_on);
3703   cost += force_var_cost (data, e2, depends_on);
3704   cost += add_cost (mode);
3705
3706   return cost;
3707 }
3708
3709 /* Determines the cost of the computation by that USE is expressed
3710    from induction variable CAND.  If ADDRESS_P is true, we just need
3711    to create an address from it, otherwise we want to get it into
3712    register.  A set of invariants we depend on is stored in
3713    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3714
3715 static unsigned
3716 get_computation_cost_at (struct ivopts_data *data,
3717                          struct iv_use *use, struct iv_cand *cand,
3718                          bool address_p, bitmap *depends_on, tree at)
3719 {
3720   tree ubase = use->iv->base, ustep = use->iv->step;
3721   tree cbase, cstep;
3722   tree utype = TREE_TYPE (ubase), ctype;
3723   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3724   HOST_WIDE_INT ratio, aratio;
3725   bool var_present, symbol_present;
3726   unsigned cost = 0, n_sums;
3727
3728   *depends_on = NULL;
3729
3730   /* Only consider real candidates.  */
3731   if (!cand->iv)
3732     return INFTY;
3733
3734   cbase = cand->iv->base;
3735   cstep = cand->iv->step;
3736   ctype = TREE_TYPE (cbase);
3737
3738   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3739     {
3740       /* We do not have a precision to express the values of use.  */
3741       return INFTY;
3742     }
3743
3744   if (address_p)
3745     {
3746       /* Do not try to express address of an object with computation based
3747          on address of a different object.  This may cause problems in rtl
3748          level alias analysis (that does not expect this to be happening,
3749          as this is illegal in C), and would be unlikely to be useful
3750          anyway.  */
3751       if (use->iv->base_object
3752           && cand->iv->base_object
3753           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3754         return INFTY;
3755     }
3756
3757   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3758     {
3759       /* TODO -- add direct handling of this case.  */
3760       goto fallback;
3761     }
3762
3763   /* CSTEPI is removed from the offset in case statement is after the
3764      increment.  If the step is not constant, we use zero instead.
3765      This is a bit imprecise (there is the extra addition), but
3766      redundancy elimination is likely to transform the code so that
3767      it uses value of the variable before increment anyway,
3768      so it is not that much unrealistic.  */
3769   if (cst_and_fits_in_hwi (cstep))
3770     cstepi = int_cst_value (cstep);
3771   else
3772     cstepi = 0;
3773
3774   if (cst_and_fits_in_hwi (ustep)
3775       && cst_and_fits_in_hwi (cstep))
3776     {
3777       ustepi = int_cst_value (ustep);
3778
3779       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3780         return INFTY;
3781     }
3782   else
3783     {
3784       tree rat;
3785
3786       rat = constant_multiple_of (utype, ustep, cstep);
3787
3788       if (!rat)
3789         return INFTY;
3790
3791       if (cst_and_fits_in_hwi (rat))
3792         ratio = int_cst_value (rat);
3793       else if (integer_onep (rat))
3794         ratio = 1;
3795       else if (integer_all_onesp (rat))
3796         ratio = -1;
3797       else
3798         return INFTY;
3799     }
3800
3801   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3802      or ratio == 1, it is better to handle this like
3803
3804      ubase - ratio * cbase + ratio * var
3805
3806      (also holds in the case ratio == -1, TODO.  */
3807
3808   if (cst_and_fits_in_hwi (cbase))
3809     {
3810       offset = - ratio * int_cst_value (cbase);
3811       cost += difference_cost (data,
3812                                ubase, integer_zero_node,
3813                                &symbol_present, &var_present, &offset,
3814                                depends_on);
3815     }
3816   else if (ratio == 1)
3817     {
3818       cost += difference_cost (data,
3819                                ubase, cbase,
3820                                &symbol_present, &var_present, &offset,
3821                                depends_on);
3822     }
3823   else
3824     {
3825       cost += force_var_cost (data, cbase, depends_on);
3826       cost += add_cost (TYPE_MODE (ctype));
3827       cost += difference_cost (data,
3828                                ubase, integer_zero_node,
3829                                &symbol_present, &var_present, &offset,
3830                                depends_on);
3831     }
3832
3833   /* If we are after the increment, the value of the candidate is higher by
3834      one iteration.  */
3835   if (stmt_after_increment (data->current_loop, cand, at))
3836     offset -= ratio * cstepi;
3837
3838   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3839      (symbol/var/const parts may be omitted).  If we are looking for an address,
3840      find the cost of addressing this.  */
3841   if (address_p)
3842     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3843
3844   /* Otherwise estimate the costs for computing the expression.  */
3845   aratio = ratio > 0 ? ratio : -ratio;
3846   if (!symbol_present && !var_present && !offset)
3847     {
3848       if (ratio != 1)
3849         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3850
3851       return cost;
3852     }
3853
3854   if (aratio != 1)
3855     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3856
3857   n_sums = 1;
3858   if (var_present
3859       /* Symbol + offset should be compile-time computable.  */
3860       && (symbol_present || offset))
3861     n_sums++;
3862
3863   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3864
3865 fallback:
3866   {
3867     /* Just get the expression, expand it and measure the cost.  */
3868     tree comp = get_computation_at (data->current_loop, use, cand, at);
3869
3870     if (!comp)
3871       return INFTY;
3872
3873     if (address_p)
3874       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3875
3876     return computation_cost (comp);
3877   }
3878 }
3879
3880 /* Determines the cost of the computation by that USE is expressed
3881    from induction variable CAND.  If ADDRESS_P is true, we just need
3882    to create an address from it, otherwise we want to get it into
3883    register.  A set of invariants we depend on is stored in
3884    DEPENDS_ON.  */
3885
3886 static unsigned
3887 get_computation_cost (struct ivopts_data *data,
3888                       struct iv_use *use, struct iv_cand *cand,
3889                       bool address_p, bitmap *depends_on)
3890 {
3891   return get_computation_cost_at (data,
3892                                   use, cand, address_p, depends_on, use->stmt);
3893 }
3894
3895 /* Determines cost of basing replacement of USE on CAND in a generic
3896    expression.  */
3897
3898 static bool
3899 determine_use_iv_cost_generic (struct ivopts_data *data,
3900                                struct iv_use *use, struct iv_cand *cand)
3901 {
3902   bitmap depends_on;
3903   unsigned cost;
3904
3905   /* The simple case first -- if we need to express value of the preserved
3906      original biv, the cost is 0.  This also prevents us from counting the
3907      cost of increment twice -- once at this use and once in the cost of
3908      the candidate.  */
3909   if (cand->pos == IP_ORIGINAL
3910       && cand->incremented_at == use->stmt)
3911     {
3912       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3913       return true;
3914     }
3915
3916   cost = get_computation_cost (data, use, cand, false, &depends_on);
3917   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3918
3919   return cost != INFTY;
3920 }
3921
3922 /* Determines cost of basing replacement of USE on CAND in an address.  */
3923
3924 static bool
3925 determine_use_iv_cost_address (struct ivopts_data *data,
3926                                struct iv_use *use, struct iv_cand *cand)
3927 {
3928   bitmap depends_on;
3929   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3930
3931   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3932
3933   return cost != INFTY;
3934 }
3935
3936 /* Computes value of induction variable IV in iteration NITER.  */
3937
3938 static tree
3939 iv_value (struct iv *iv, tree niter)
3940 {
3941   tree val;
3942   tree type = TREE_TYPE (iv->base);
3943
3944   niter = fold_convert (type, niter);
3945   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3946
3947   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3948 }
3949
3950 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3951
3952 static tree
3953 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3954 {
3955   tree val = iv_value (cand->iv, niter);
3956   tree type = TREE_TYPE (cand->iv->base);
3957
3958   if (stmt_after_increment (loop, cand, at))
3959     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3960
3961   return val;
3962 }
3963
3964 /* Returns period of induction variable iv.  */
3965
3966 static tree
3967 iv_period (struct iv *iv)
3968 {
3969   tree step = iv->step, period, type;
3970   tree pow2div;
3971
3972   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3973
3974   /* Period of the iv is gcd (step, type range).  Since type range is power
3975      of two, it suffices to determine the maximum power of two that divides
3976      step.  */
3977   pow2div = num_ending_zeros (step);
3978   type = unsigned_type_for (TREE_TYPE (step));
3979
3980   period = build_low_bits_mask (type,
3981                                 (TYPE_PRECISION (type)
3982                                  - tree_low_cst (pow2div, 1)));
3983
3984   return period;
3985 }
3986
3987 /* Returns the comparison operator used when eliminating the iv USE.  */
3988
3989 static enum tree_code
3990 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3991 {
3992   struct loop *loop = data->current_loop;
3993   basic_block ex_bb;
3994   edge exit;
3995
3996   ex_bb = bb_for_stmt (use->stmt);
3997   exit = EDGE_SUCC (ex_bb, 0);
3998   if (flow_bb_inside_loop_p (loop, exit->dest))
3999     exit = EDGE_SUCC (ex_bb, 1);
4000
4001   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4002 }
4003
4004 /* Check whether it is possible to express the condition in USE by comparison
4005    of candidate CAND.  If so, store the value compared with to BOUND.  */
4006
4007 static bool
4008 may_eliminate_iv (struct ivopts_data *data,
4009                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4010 {
4011   basic_block ex_bb;
4012   edge exit;
4013   struct tree_niter_desc *niter;
4014   tree nit, nit_type;
4015   tree wider_type, period, per_type;
4016   struct loop *loop = data->current_loop;
4017
4018   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4019     return false;
4020
4021   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4022      for other conditions inside loop body.  */
4023   ex_bb = bb_for_stmt (use->stmt);
4024   if (use->stmt != last_stmt (ex_bb)
4025       || TREE_CODE (use->stmt) != COND_EXPR)
4026     return false;
4027   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4028     return false;
4029
4030   exit = EDGE_SUCC (ex_bb, 0);
4031   if (flow_bb_inside_loop_p (loop, exit->dest))
4032     exit = EDGE_SUCC (ex_bb, 1);
4033   if (flow_bb_inside_loop_p (loop, exit->dest))
4034     return false;
4035
4036   niter = niter_for_exit (data, exit);
4037   if (!niter
4038       || !zero_p (niter->may_be_zero))
4039     return false;
4040
4041   nit = niter->niter;
4042   nit_type = TREE_TYPE (nit);
4043
4044   /* Determine whether we may use the variable to test whether niter iterations
4045      elapsed.  This is the case iff the period of the induction variable is
4046      greater than the number of iterations.  */
4047   period = iv_period (cand->iv);
4048   if (!period)
4049     return false;
4050   per_type = TREE_TYPE (period);
4051
4052   wider_type = TREE_TYPE (period);
4053   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4054     wider_type = per_type;
4055   else
4056     wider_type = nit_type;
4057
4058   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4059                                       fold_convert (wider_type, period),
4060                                       fold_convert (wider_type, nit))))
4061     return false;
4062
4063   *bound = cand_value_at (loop, cand, use->stmt, nit);
4064   return true;
4065 }
4066
4067 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4068
4069 static bool
4070 determine_use_iv_cost_condition (struct ivopts_data *data,
4071                                  struct iv_use *use, struct iv_cand *cand)
4072 {
4073   tree bound = NULL_TREE, op, cond;
4074   bitmap depends_on = NULL;
4075   unsigned cost;
4076
4077   /* Only consider real candidates.  */
4078   if (!cand->iv)
4079     {
4080       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4081       return false;
4082     }
4083
4084   if (may_eliminate_iv (data, use, cand, &bound))
4085     {
4086       cost = force_var_cost (data, bound, &depends_on);
4087
4088       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4089       return cost != INFTY;
4090     }
4091
4092   /* The induction variable elimination failed; just express the original
4093      giv.  If it is compared with an invariant, note that we cannot get
4094      rid of it.  */
4095   cost = get_computation_cost (data, use, cand, false, &depends_on);
4096
4097   cond = *use->op_p;
4098   if (TREE_CODE (cond) != SSA_NAME)
4099     {
4100       op = TREE_OPERAND (cond, 0);
4101       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4102         op = TREE_OPERAND (cond, 1);
4103       if (TREE_CODE (op) == SSA_NAME)
4104         {
4105           op = get_iv (data, op)->base;
4106           fd_ivopts_data = data;
4107           walk_tree (&op, find_depends, &depends_on, NULL);
4108         }
4109     }
4110
4111   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4112   return cost != INFTY;
4113 }
4114
4115 /* Checks whether it is possible to replace the final value of USE by
4116    a direct computation.  If so, the formula is stored to *VALUE.  */
4117
4118 static bool
4119 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4120                          tree *value)
4121 {
4122   struct loop *loop = data->current_loop;
4123   edge exit;
4124   struct tree_niter_desc *niter;
4125
4126   exit = single_dom_exit (loop);
4127   if (!exit)
4128     return false;
4129
4130   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4131                               bb_for_stmt (use->stmt)));
4132
4133   niter = niter_for_single_dom_exit (data);
4134   if (!niter
4135       || !zero_p (niter->may_be_zero))
4136     return false;
4137
4138   *value = iv_value (use->iv, niter->niter);
4139
4140   return true;
4141 }
4142
4143 /* Determines cost of replacing final value of USE using CAND.  */
4144
4145 static bool
4146 determine_use_iv_cost_outer (struct ivopts_data *data,
4147                              struct iv_use *use, struct iv_cand *cand)
4148 {
4149   bitmap depends_on;
4150   unsigned cost;
4151   edge exit;
4152   tree value = NULL_TREE;
4153   struct loop *loop = data->current_loop;
4154
4155   /* The simple case first -- if we need to express value of the preserved
4156      original biv, the cost is 0.  This also prevents us from counting the
4157      cost of increment twice -- once at this use and once in the cost of
4158      the candidate.  */
4159   if (cand->pos == IP_ORIGINAL
4160       && cand->incremented_at == use->stmt)
4161     {
4162       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4163       return true;
4164     }
4165
4166   if (!cand->iv)
4167     {
4168       if (!may_replace_final_value (data, use, &value))
4169         {
4170           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4171           return false;
4172         }
4173
4174       depends_on = NULL;
4175       cost = force_var_cost (data, value, &depends_on);
4176
4177       cost /= AVG_LOOP_NITER (loop);
4178
4179       set_use_iv_cost (data, use, cand, cost, depends_on, value);
4180       return cost != INFTY;
4181     }
4182
4183   exit = single_dom_exit (loop);
4184   if (exit)
4185     {
4186       /* If there is just a single exit, we may use value of the candidate
4187          after we take it to determine the value of use.  */
4188       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4189                                       last_stmt (exit->src));
4190       if (cost != INFTY)
4191         cost /= AVG_LOOP_NITER (loop);
4192     }
4193   else
4194     {
4195       /* Otherwise we just need to compute the iv.  */
4196       cost = get_computation_cost (data, use, cand, false, &depends_on);
4197     }
4198
4199   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4200
4201   return cost != INFTY;
4202 }
4203
4204 /* Determines cost of basing replacement of USE on CAND.  Returns false
4205    if USE cannot be based on CAND.  */
4206
4207 static bool
4208 determine_use_iv_cost (struct ivopts_data *data,
4209                        struct iv_use *use, struct iv_cand *cand)
4210 {
4211   switch (use->type)
4212     {
4213     case USE_NONLINEAR_EXPR:
4214       return determine_use_iv_cost_generic (data, use, cand);
4215
4216     case USE_OUTER:
4217       return determine_use_iv_cost_outer (data, use, cand);
4218
4219     case USE_ADDRESS:
4220       return determine_use_iv_cost_address (data, use, cand);
4221
4222     case USE_COMPARE:
4223       return determine_use_iv_cost_condition (data, use, cand);
4224
4225     default:
4226       gcc_unreachable ();
4227     }
4228 }
4229
4230 /* Determines costs of basing the use of the iv on an iv candidate.  */
4231
4232 static void
4233 determine_use_iv_costs (struct ivopts_data *data)
4234 {
4235   unsigned i, j;
4236   struct iv_use *use;
4237   struct iv_cand *cand;
4238   bitmap to_clear = BITMAP_ALLOC (NULL);
4239
4240   alloc_use_cost_map (data);
4241
4242   for (i = 0; i < n_iv_uses (data); i++)
4243     {
4244       use = iv_use (data, i);
4245
4246       if (data->consider_all_candidates)
4247         {
4248           for (j = 0; j < n_iv_cands (data); j++)
4249             {
4250               cand = iv_cand (data, j);
4251               determine_use_iv_cost (data, use, cand);
4252             }
4253         }
4254       else
4255         {
4256           bitmap_iterator bi;
4257
4258           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4259             {
4260               cand = iv_cand (data, j);
4261               if (!determine_use_iv_cost (data, use, cand))
4262                 bitmap_set_bit (to_clear, j);
4263             }
4264
4265           /* Remove the candidates for that the cost is infinite from
4266              the list of related candidates.  */
4267           bitmap_and_compl_into (use->related_cands, to_clear);
4268           bitmap_clear (to_clear);
4269         }
4270     }
4271
4272   BITMAP_FREE (to_clear);
4273
4274   if (dump_file && (dump_flags & TDF_DETAILS))
4275     {
4276       fprintf (dump_file, "Use-candidate costs:\n");
4277
4278       for (i = 0; i < n_iv_uses (data); i++)
4279         {
4280           use = iv_use (data, i);
4281
4282           fprintf (dump_file, "Use %d:\n", i);
4283           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4284           for (j = 0; j < use->n_map_members; j++)
4285             {
4286               if (!use->cost_map[j].cand
4287                   || use->cost_map[j].cost == INFTY)
4288                 continue;
4289
4290               fprintf (dump_file, "  %d\t%d\t",
4291                        use->cost_map[j].cand->id,
4292                        use->cost_map[j].cost);
4293               if (use->cost_map[j].depends_on)
4294                 bitmap_print (dump_file,
4295                               use->cost_map[j].depends_on, "","");
4296               fprintf (dump_file, "\n");
4297             }
4298
4299           fprintf (dump_file, "\n");
4300         }
4301       fprintf (dump_file, "\n");
4302     }
4303 }
4304
4305 /* Determines cost of the candidate CAND.  */
4306
4307 static void
4308 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4309 {
4310   unsigned cost_base, cost_step;
4311   tree base;
4312
4313   if (!cand->iv)
4314     {
4315       cand->cost = 0;
4316       return;
4317     }
4318
4319   /* There are two costs associated with the candidate -- its increment
4320      and its initialization.  The second is almost negligible for any loop
4321      that rolls enough, so we take it just very little into account.  */
4322
4323   base = cand->iv->base;
4324   cost_base = force_var_cost (data, base, NULL);
4325   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4326
4327   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4328
4329   /* Prefer the original iv unless we may gain something by replacing it;
4330      this is not really relevant for artificial ivs created by other
4331      passes.  */
4332   if (cand->pos == IP_ORIGINAL
4333       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4334     cand->cost--;
4335
4336   /* Prefer not to insert statements into latch unless there are some
4337      already (so that we do not create unnecessary jumps).  */
4338   if (cand->pos == IP_END
4339       && empty_block_p (ip_end_pos (data->current_loop)))
4340     cand->cost++;
4341 }
4342
4343 /* Determines costs of computation of the candidates.  */
4344
4345 static void
4346 determine_iv_costs (struct ivopts_data *data)
4347 {
4348   unsigned i;
4349
4350   if (dump_file && (dump_flags & TDF_DETAILS))
4351     {
4352       fprintf (dump_file, "Candidate costs:\n");
4353       fprintf (dump_file, "  cand\tcost\n");
4354     }
4355
4356   for (i = 0; i < n_iv_cands (data); i++)
4357     {
4358       struct iv_cand *cand = iv_cand (data, i);
4359
4360       determine_iv_cost (data, cand);
4361
4362       if (dump_file && (dump_flags & TDF_DETAILS))
4363         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4364     }
4365
4366 if (dump_file && (dump_flags & TDF_DETAILS))
4367       fprintf (dump_file, "\n");
4368 }
4369
4370 /* Calculates cost for having SIZE induction variables.  */
4371
4372 static unsigned
4373 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4374 {
4375   return global_cost_for_size (size,
4376                                loop_data (data->current_loop)->regs_used,
4377                                n_iv_uses (data));
4378 }
4379
4380 /* For each size of the induction variable set determine the penalty.  */
4381
4382 static void
4383 determine_set_costs (struct ivopts_data *data)
4384 {
4385   unsigned j, n;
4386   tree phi, op;
4387   struct loop *loop = data->current_loop;
4388   bitmap_iterator bi;
4389
4390   /* We use the following model (definitely improvable, especially the
4391      cost function -- TODO):
4392
4393      We estimate the number of registers available (using MD data), name it A.
4394
4395      We estimate the number of registers used by the loop, name it U.  This
4396      number is obtained as the number of loop phi nodes (not counting virtual
4397      registers and bivs) + the number of variables from outside of the loop.
4398
4399      We set a reserve R (free regs that are used for temporary computations,
4400      etc.).  For now the reserve is a constant 3.
4401
4402      Let I be the number of induction variables.
4403
4404      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4405         make a lot of ivs without a reason).
4406      -- if A - R < U + I <= A, the cost is I * PRES_COST
4407      -- if U + I > A, the cost is I * PRES_COST and
4408         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4409
4410   if (dump_file && (dump_flags & TDF_DETAILS))
4411     {
4412       fprintf (dump_file, "Global costs:\n");
4413       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4414       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4415       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4416       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4417     }
4418
4419   n = 0;
4420   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4421     {
4422       op = PHI_RESULT (phi);
4423
4424       if (!is_gimple_reg (op))
4425         continue;
4426
4427       if (get_iv (data, op))
4428         continue;
4429
4430       n++;
4431     }
4432
4433   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4434     {
4435       struct version_info *info = ver_info (data, j);
4436
4437       if (info->inv_id && info->has_nonlin_use)
4438         n++;
4439     }
4440
4441   loop_data (loop)->regs_used = n;
4442   if (dump_file && (dump_flags & TDF_DETAILS))
4443     fprintf (dump_file, "  regs_used %d\n", n);
4444
4445   if (dump_file && (dump_flags & TDF_DETAILS))
4446     {
4447       fprintf (dump_file, "  cost for size:\n");
4448       fprintf (dump_file, "  ivs\tcost\n");
4449       for (j = 0; j <= 2 * target_avail_regs; j++)
4450         fprintf (dump_file, "  %d\t%d\n", j,
4451                  ivopts_global_cost_for_size (data, j));
4452       fprintf (dump_file, "\n");
4453     }
4454 }
4455
4456 /* Returns true if A is a cheaper cost pair than B.  */
4457
4458 static bool
4459 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4460 {
4461   if (!a)
4462     return false;
4463
4464   if (!b)
4465     return true;
4466
4467   if (a->cost < b->cost)
4468     return true;
4469
4470   if (a->cost > b->cost)
4471     return false;
4472
4473   /* In case the costs are the same, prefer the cheaper candidate.  */
4474   if (a->cand->cost < b->cand->cost)
4475     return true;
4476
4477   return false;
4478 }
4479
4480 /* Computes the cost field of IVS structure.  */
4481
4482 static void
4483 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4484 {
4485   unsigned cost = 0;
4486
4487   cost += ivs->cand_use_cost;
4488   cost += ivs->cand_cost;
4489   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4490
4491   ivs->cost = cost;
4492 }
4493
4494 /* Remove invariants in set INVS to set IVS.  */
4495
4496 static void
4497 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4498 {
4499   bitmap_iterator bi;
4500   unsigned iid;
4501
4502   if (!invs)
4503     return;
4504
4505   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4506     {
4507       ivs->n_invariant_uses[iid]--;
4508       if (ivs->n_invariant_uses[iid] == 0)
4509         ivs->n_regs--;
4510     }
4511 }
4512
4513 /* Set USE not to be expressed by any candidate in IVS.  */
4514
4515 static void
4516 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4517                  struct iv_use *use)
4518 {
4519   unsigned uid = use->id, cid;
4520   struct cost_pair *cp;
4521
4522   cp = ivs->cand_for_use[uid];
4523   if (!cp)
4524     return;
4525   cid = cp->cand->id;
4526
4527   ivs->bad_uses++;
4528   ivs->cand_for_use[uid] = NULL;
4529   ivs->n_cand_uses[cid]--;
4530
4531   if (ivs->n_cand_uses[cid] == 0)
4532     {
4533       bitmap_clear_bit (ivs->cands, cid);
4534       /* Do not count the pseudocandidates.  */
4535       if (cp->cand->iv)
4536         ivs->n_regs--;
4537       ivs->n_cands--;
4538       ivs->cand_cost -= cp->cand->cost;
4539
4540       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4541     }
4542
4543   ivs->cand_use_cost -= cp->cost;
4544
4545   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4546   iv_ca_recount_cost (data, ivs);
4547 }
4548
4549 /* Add invariants in set INVS to set IVS.  */
4550
4551 static void
4552 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4553 {
4554   bitmap_iterator bi;
4555   unsigned iid;
4556
4557   if (!invs)
4558     return;
4559
4560   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4561     {
4562       ivs->n_invariant_uses[iid]++;
4563       if (ivs->n_invariant_uses[iid] == 1)
4564         ivs->n_regs++;
4565     }
4566 }
4567
4568 /* Set cost pair for USE in set IVS to CP.  */
4569
4570 static void
4571 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4572               struct iv_use *use, struct cost_pair *cp)
4573 {
4574   unsigned uid = use->id, cid;
4575
4576   if (ivs->cand_for_use[uid] == cp)
4577     return;
4578
4579   if (ivs->cand_for_use[uid])
4580     iv_ca_set_no_cp (data, ivs, use);
4581
4582   if (cp)
4583     {
4584       cid = cp->cand->id;
4585
4586       ivs->bad_uses--;
4587       ivs->cand_for_use[uid] = cp;
4588       ivs->n_cand_uses[cid]++;
4589       if (ivs->n_cand_uses[cid] == 1)
4590         {
4591           bitmap_set_bit (ivs->cands, cid);
4592           /* Do not count the pseudocandidates.  */
4593           if (cp->cand->iv)
4594             ivs->n_regs++;
4595           ivs->n_cands++;
4596           ivs->cand_cost += cp->cand->cost;
4597
4598           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4599         }
4600
4601       ivs->cand_use_cost += cp->cost;
4602       iv_ca_set_add_invariants (ivs, cp->depends_on);
4603       iv_ca_recount_cost (data, ivs);
4604     }
4605 }
4606
4607 /* Extend set IVS by expressing USE by some of the candidates in it
4608    if possible.  */
4609
4610 static void
4611 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4612                struct iv_use *use)
4613 {
4614   struct cost_pair *best_cp = NULL, *cp;
4615   bitmap_iterator bi;
4616   unsigned i;
4617
4618   gcc_assert (ivs->upto >= use->id);
4619
4620   if (ivs->upto == use->id)
4621     {
4622       ivs->upto++;
4623       ivs->bad_uses++;
4624     }
4625
4626   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4627     {
4628       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4629
4630       if (cheaper_cost_pair (cp, best_cp))
4631         best_cp = cp;
4632     }
4633
4634   iv_ca_set_cp (data, ivs, use, best_cp);
4635 }
4636
4637 /* Get cost for assignment IVS.  */
4638
4639 static unsigned
4640 iv_ca_cost (struct iv_ca *ivs)
4641 {
4642   return (ivs->bad_uses ? INFTY : ivs->cost);
4643 }
4644
4645 /* Returns true if all dependences of CP are among invariants in IVS.  */
4646
4647 static bool
4648 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4649 {
4650   unsigned i;
4651   bitmap_iterator bi;
4652
4653   if (!cp->depends_on)
4654     return true;
4655
4656   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4657     {
4658       if (ivs->n_invariant_uses[i] == 0)
4659         return false;
4660     }
4661
4662   return true;
4663 }
4664
4665 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4666    it before NEXT_CHANGE.  */
4667
4668 static struct iv_ca_delta *
4669 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4670                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4671 {
4672   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4673
4674   change->use = use;
4675   change->old_cp = old_cp;
4676   change->new_cp = new_cp;
4677   change->next_change = next_change;
4678
4679   return change;
4680 }
4681
4682 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4683    are rewritten.  */
4684
4685 static struct iv_ca_delta *
4686 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4687 {
4688   struct iv_ca_delta *last;
4689
4690   if (!l2)
4691     return l1;
4692
4693   if (!l1)
4694     return l2;
4695
4696   for (last = l1; last->next_change; last = last->next_change)
4697     continue;
4698   last->next_change = l2;
4699
4700   return l1;
4701 }
4702
4703 /* Returns candidate by that USE is expressed in IVS.  */
4704
4705 static struct cost_pair *
4706 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4707 {
4708   return ivs->cand_for_use[use->id];
4709 }
4710
4711 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4712
4713 static struct iv_ca_delta *
4714 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4715 {
4716   struct iv_ca_delta *act, *next, *prev = NULL;
4717   struct cost_pair *tmp;
4718
4719   for (act = delta; act; act = next)
4720     {
4721       next = act->next_change;
4722       act->next_change = prev;
4723       prev = act;
4724
4725       tmp = act->old_cp;
4726       act->old_cp = act->new_cp;
4727       act->new_cp = tmp;
4728     }
4729
4730   return prev;
4731 }
4732
4733 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4734    reverted instead.  */
4735
4736 static void
4737 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4738                     struct iv_ca_delta *delta, bool forward)
4739 {
4740   struct cost_pair *from, *to;
4741   struct iv_ca_delta *act;
4742
4743   if (!forward)
4744     delta = iv_ca_delta_reverse (delta);
4745
4746   for (act = delta; act; act = act->next_change)
4747     {
4748       from = act->old_cp;
4749       to = act->new_cp;
4750       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4751       iv_ca_set_cp (data, ivs, act->use, to);
4752     }
4753
4754   if (!forward)
4755     iv_ca_delta_reverse (delta);
4756 }
4757
4758 /* Returns true if CAND is used in IVS.  */
4759
4760 static bool
4761 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4762 {
4763   return ivs->n_cand_uses[cand->id] > 0;
4764 }
4765
4766 /* Returns number of induction variable candidates in the set IVS.  */
4767
4768 static unsigned
4769 iv_ca_n_cands (struct iv_ca *ivs)
4770 {
4771   return ivs->n_cands;
4772 }
4773
4774 /* Free the list of changes DELTA.  */
4775
4776 static void
4777 iv_ca_delta_free (struct iv_ca_delta **delta)
4778 {
4779   struct iv_ca_delta *act, *next;
4780
4781   for (act = *delta; act; act = next)
4782     {
4783       next = act->next_change;
4784       free (act);
4785     }
4786
4787   *delta = NULL;
4788 }
4789
4790 /* Allocates new iv candidates assignment.  */
4791
4792 static struct iv_ca *
4793 iv_ca_new (struct ivopts_data *data)
4794 {
4795   struct iv_ca *nw = XNEW (struct iv_ca);
4796
4797   nw->upto = 0;
4798   nw->bad_uses = 0;
4799   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4800   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4801   nw->cands = BITMAP_ALLOC (NULL);
4802   nw->n_cands = 0;
4803   nw->n_regs = 0;
4804   nw->cand_use_cost = 0;
4805   nw->cand_cost = 0;
4806   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4807   nw->cost = 0;
4808
4809   return nw;
4810 }
4811
4812 /* Free memory occupied by the set IVS.  */
4813
4814 static void
4815 iv_ca_free (struct iv_ca **ivs)
4816 {
4817   free ((*ivs)->cand_for_use);
4818   free ((*ivs)->n_cand_uses);
4819   BITMAP_FREE ((*ivs)->cands);
4820   free ((*ivs)->n_invariant_uses);
4821   free (*ivs);
4822   *ivs = NULL;
4823 }
4824
4825 /* Dumps IVS to FILE.  */
4826
4827 static void
4828 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4829 {
4830   const char *pref = "  invariants ";
4831   unsigned i;
4832
4833   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4834   bitmap_print (file, ivs->cands, "  candidates ","\n");
4835
4836   for (i = 1; i <= data->max_inv_id; i++)
4837     if (ivs->n_invariant_uses[i])
4838       {
4839         fprintf (file, "%s%d", pref, i);
4840         pref = ", ";
4841       }
4842   fprintf (file, "\n");
4843 }
4844
4845 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4846    new set, and store differences in DELTA.  Number of induction variables
4847    in the new set is stored to N_IVS.  */
4848
4849 static unsigned
4850 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4851               struct iv_cand *cand, struct iv_ca_delta **delta,
4852               unsigned *n_ivs)
4853 {
4854   unsigned i, cost;
4855   struct iv_use *use;
4856   struct cost_pair *old_cp, *new_cp;
4857
4858   *delta = NULL;
4859   for (i = 0; i < ivs->upto; i++)
4860     {
4861       use = iv_use (data, i);
4862       old_cp = iv_ca_cand_for_use (ivs, use);
4863
4864       if (old_cp
4865           && old_cp->cand == cand)
4866         continue;
4867
4868       new_cp = get_use_iv_cost (data, use, cand);
4869       if (!new_cp)
4870         continue;
4871
4872       if (!iv_ca_has_deps (ivs, new_cp))
4873         continue;
4874
4875       if (!cheaper_cost_pair (new_cp, old_cp))
4876         continue;
4877
4878       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4879     }
4880
4881   iv_ca_delta_commit (data, ivs, *delta, true);
4882   cost = iv_ca_cost (ivs);
4883   if (n_ivs)
4884     *n_ivs = iv_ca_n_cands (ivs);
4885   iv_ca_delta_commit (data, ivs, *delta, false);
4886
4887   return cost;
4888 }
4889
4890 /* Try narrowing set IVS by removing CAND.  Return the cost of
4891    the new set and store the differences in DELTA.  */
4892
4893 static unsigned
4894 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4895               struct iv_cand *cand, struct iv_ca_delta **delta)
4896 {
4897   unsigned i, ci;
4898   struct iv_use *use;
4899   struct cost_pair *old_cp, *new_cp, *cp;
4900   bitmap_iterator bi;
4901   struct iv_cand *cnd;
4902   unsigned cost;
4903
4904   *delta = NULL;
4905   for (i = 0; i < n_iv_uses (data); i++)
4906     {
4907       use = iv_use (data, i);
4908
4909       old_cp = iv_ca_cand_for_use (ivs, use);
4910       if (old_cp->cand != cand)
4911         continue;
4912
4913       new_cp = NULL;
4914
4915       if (data->consider_all_candidates)
4916         {
4917           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4918             {
4919               if (ci == cand->id)
4920                 continue;
4921
4922               cnd = iv_cand (data, ci);
4923
4924               cp = get_use_iv_cost (data, use, cnd);
4925               if (!cp)
4926                 continue;
4927               if (!iv_ca_has_deps (ivs, cp))
4928                 continue;
4929
4930               if (!cheaper_cost_pair (cp, new_cp))
4931                 continue;
4932
4933               new_cp = cp;
4934             }
4935         }
4936       else
4937         {
4938           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4939             {
4940               if (ci == cand->id)
4941                 continue;
4942
4943               cnd = iv_cand (data, ci);
4944
4945               cp = get_use_iv_cost (data, use, cnd);
4946               if (!cp)
4947                 continue;
4948               if (!iv_ca_has_deps (ivs, cp))
4949                 continue;
4950
4951               if (!cheaper_cost_pair (cp, new_cp))
4952                 continue;
4953
4954               new_cp = cp;
4955             }
4956         }
4957
4958       if (!new_cp)
4959         {
4960           iv_ca_delta_free (delta);
4961           return INFTY;
4962         }
4963
4964       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4965     }
4966
4967   iv_ca_delta_commit (data, ivs, *delta, true);
4968   cost = iv_ca_cost (ivs);
4969   iv_ca_delta_commit (data, ivs, *delta, false);
4970
4971   return cost;
4972 }
4973
4974 /* Try optimizing the set of candidates IVS by removing candidates different
4975    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4976    differences in DELTA.  */
4977
4978 static unsigned
4979 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4980              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4981 {
4982   bitmap_iterator bi;
4983   struct iv_ca_delta *act_delta, *best_delta;
4984   unsigned i, best_cost, acost;
4985   struct iv_cand *cand;
4986
4987   best_delta = NULL;
4988   best_cost = iv_ca_cost (ivs);
4989
4990   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4991     {
4992       cand = iv_cand (data, i);
4993
4994       if (cand == except_cand)
4995         continue;
4996
4997       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4998
4999       if (acost < best_cost)
5000         {
5001           best_cost = acost;
5002           iv_ca_delta_free (&best_delta);
5003           best_delta = act_delta;
5004         }
5005       else
5006         iv_ca_delta_free (&act_delta);
5007     }
5008
5009   if (!best_delta)
5010     {
5011       *delta = NULL;
5012       return best_cost;
5013     }
5014
5015   /* Recurse to possibly remove other unnecessary ivs.  */
5016   iv_ca_delta_commit (data, ivs, best_delta, true);
5017   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5018   iv_ca_delta_commit (data, ivs, best_delta, false);
5019   *delta = iv_ca_delta_join (best_delta, *delta);
5020   return best_cost;
5021 }
5022
5023 /* Tries to extend the sets IVS in the best possible way in order
5024    to express the USE.  */
5025
5026 static bool
5027 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5028                   struct iv_use *use)
5029 {
5030   unsigned best_cost, act_cost;
5031   unsigned i;
5032   bitmap_iterator bi;
5033   struct iv_cand *cand;
5034   struct iv_ca_delta *best_delta = NULL, *act_delta;
5035   struct cost_pair *cp;
5036
5037   iv_ca_add_use (data, ivs, use);
5038   best_cost = iv_ca_cost (ivs);
5039
5040   cp = iv_ca_cand_for_use (ivs, use);
5041   if (cp)
5042     {
5043       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5044       iv_ca_set_no_cp (data, ivs, use);
5045     }
5046
5047   /* First try important candidates.  Only if it fails, try the specific ones.
5048      Rationale -- in loops with many variables the best choice often is to use
5049      just one generic biv.  If we added here many ivs specific to the uses,
5050      the optimization algorithm later would be likely to get stuck in a local
5051      minimum, thus causing us to create too many ivs.  The approach from
5052      few ivs to more seems more likely to be successful -- starting from few
5053      ivs, replacing an expensive use by a specific iv should always be a
5054      win.  */
5055   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5056     {
5057       cand = iv_cand (data, i);
5058
5059       if (iv_ca_cand_used_p (ivs, cand))
5060         continue;
5061
5062       cp = get_use_iv_cost (data, use, cand);
5063       if (!cp)
5064         continue;
5065
5066       iv_ca_set_cp (data, ivs, use, cp);
5067       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5068       iv_ca_set_no_cp (data, ivs, use);
5069       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5070
5071       if (act_cost < best_cost)
5072         {
5073           best_cost = act_cost;
5074
5075           iv_ca_delta_free (&best_delta);
5076           best_delta = act_delta;
5077         }
5078       else
5079         iv_ca_delta_free (&act_delta);
5080     }
5081
5082   if (best_cost == INFTY)
5083     {
5084       for (i = 0; i < use->n_map_members; i++)
5085         {
5086           cp = use->cost_map + i;
5087           cand = cp->cand;
5088           if (!cand)
5089             continue;
5090
5091           /* Already tried this.  */
5092           if (cand->important)
5093             continue;
5094
5095           if (iv_ca_cand_used_p (ivs, cand))
5096             continue;
5097
5098           act_delta = NULL;
5099           iv_ca_set_cp (data, ivs, use, cp);
5100           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5101           iv_ca_set_no_cp (data, ivs, use);
5102           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5103                                        cp, act_delta);
5104
5105           if (act_cost < best_cost)
5106             {
5107               best_cost = act_cost;
5108
5109               if (best_delta)
5110                 iv_ca_delta_free (&best_delta);
5111               best_delta = act_delta;
5112             }
5113           else
5114             iv_ca_delta_free (&act_delta);
5115         }
5116     }
5117
5118   iv_ca_delta_commit (data, ivs, best_delta, true);
5119   iv_ca_delta_free (&best_delta);
5120
5121   return (best_cost != INFTY);
5122 }
5123
5124 /* Finds an initial assignment of candidates to uses.  */
5125
5126 static struct iv_ca *
5127 get_initial_solution (struct ivopts_data *data)
5128 {
5129   struct iv_ca *ivs = iv_ca_new (data);
5130   unsigned i;
5131
5132   for (i = 0; i < n_iv_uses (data); i++)
5133     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5134       {
5135         iv_ca_free (&ivs);
5136         return NULL;
5137       }
5138
5139   return ivs;
5140 }
5141
5142 /* Tries to improve set of induction variables IVS.  */
5143
5144 static bool
5145 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5146 {
5147   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5148   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5149   struct iv_cand *cand;
5150
5151   /* Try extending the set of induction variables by one.  */
5152   for (i = 0; i < n_iv_cands (data); i++)
5153     {
5154       cand = iv_cand (data, i);
5155
5156       if (iv_ca_cand_used_p (ivs, cand))
5157         continue;
5158
5159       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5160       if (!act_delta)
5161         continue;
5162
5163       /* If we successfully added the candidate and the set is small enough,
5164          try optimizing it by removing other candidates.  */
5165       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5166         {
5167           iv_ca_delta_commit (data, ivs, act_delta, true);
5168           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5169           iv_ca_delta_commit (data, ivs, act_delta, false);
5170           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5171         }
5172
5173       if (acost < best_cost)
5174         {
5175           best_cost = acost;
5176           iv_ca_delta_free (&best_delta);
5177           best_delta = act_delta;
5178         }
5179       else
5180         iv_ca_delta_free (&act_delta);
5181     }
5182
5183   if (!best_delta)
5184     {
5185       /* Try removing the candidates from the set instead.  */
5186       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5187
5188       /* Nothing more we can do.  */
5189       if (!best_delta)
5190         return false;
5191     }
5192
5193   iv_ca_delta_commit (data, ivs, best_delta, true);
5194   gcc_assert (best_cost == iv_ca_cost (ivs));
5195   iv_ca_delta_free (&best_delta);
5196   return true;
5197 }
5198
5199 /* Attempts to find the optimal set of induction variables.  We do simple
5200    greedy heuristic -- we try to replace at most one candidate in the selected
5201    solution and remove the unused ivs while this improves the cost.  */
5202
5203 static struct iv_ca *
5204 find_optimal_iv_set (struct ivopts_data *data)
5205 {
5206   unsigned i;
5207   struct iv_ca *set;
5208   struct iv_use *use;
5209
5210   /* Get the initial solution.  */
5211   set = get_initial_solution (data);
5212   if (!set)
5213     {
5214       if (dump_file && (dump_flags & TDF_DETAILS))
5215         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5216       return NULL;
5217     }
5218
5219   if (dump_file && (dump_flags & TDF_DETAILS))
5220     {
5221       fprintf (dump_file, "Initial set of candidates:\n");
5222       iv_ca_dump (data, dump_file, set);
5223     }
5224
5225   while (try_improve_iv_set (data, set))
5226     {
5227       if (dump_file && (dump_flags & TDF_DETAILS))
5228         {
5229           fprintf (dump_file, "Improved to:\n");
5230           iv_ca_dump (data, dump_file, set);
5231         }
5232     }
5233
5234   if (dump_file && (dump_flags & TDF_DETAILS))
5235     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5236
5237   for (i = 0; i < n_iv_uses (data); i++)
5238     {
5239       use = iv_use (data, i);
5240       use->selected = iv_ca_cand_for_use (set, use)->cand;
5241     }
5242
5243   return set;
5244 }
5245
5246 /* Creates a new induction variable corresponding to CAND.  */
5247
5248 static void
5249 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5250 {
5251   block_stmt_iterator incr_pos;
5252   tree base;
5253   bool after = false;
5254
5255   if (!cand->iv)
5256     return;
5257
5258   switch (cand->pos)
5259     {
5260     case IP_NORMAL:
5261       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5262       break;
5263
5264     case IP_END:
5265       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5266       after = true;
5267       break;
5268
5269     case IP_ORIGINAL:
5270       /* Mark that the iv is preserved.  */
5271       name_info (data, cand->var_before)->preserve_biv = true;
5272       name_info (data, cand->var_after)->preserve_biv = true;
5273
5274       /* Rewrite the increment so that it uses var_before directly.  */
5275       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5276
5277       return;
5278     }
5279
5280   gimple_add_tmp_var (cand->var_before);
5281   add_referenced_tmp_var (cand->var_before);
5282
5283   base = unshare_expr (cand->iv->base);
5284
5285   create_iv (base, unshare_expr (cand->iv->step),
5286              cand->var_before, data->current_loop,
5287              &incr_pos, after, &cand->var_before, &cand->var_after);
5288 }
5289
5290 /* Creates new induction variables described in SET.  */
5291
5292 static void
5293 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5294 {
5295   unsigned i;
5296   struct iv_cand *cand;
5297   bitmap_iterator bi;
5298
5299   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5300     {
5301       cand = iv_cand (data, i);
5302       create_new_iv (data, cand);
5303     }
5304 }
5305
5306 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5307    is true, remove also the ssa name defined by the statement.  */
5308
5309 static void
5310 remove_statement (tree stmt, bool including_defined_name)
5311 {
5312   if (TREE_CODE (stmt) == PHI_NODE)
5313     {
5314       if (!including_defined_name)
5315         {
5316           /* Prevent the ssa name defined by the statement from being removed.  */
5317           SET_PHI_RESULT (stmt, NULL);
5318         }
5319       remove_phi_node (stmt, NULL_TREE);
5320     }
5321   else
5322     {
5323       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5324
5325       bsi_remove (&bsi, true);
5326     }
5327 }
5328
5329 /* Rewrites USE (definition of iv used in a nonlinear expression)
5330    using candidate CAND.  */
5331
5332 static void
5333 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5334                             struct iv_use *use, struct iv_cand *cand)
5335 {
5336   tree comp;
5337   tree op, stmts, tgt, ass;
5338   block_stmt_iterator bsi, pbsi;
5339
5340   /* An important special case -- if we are asked to express value of
5341      the original iv by itself, just exit; there is no need to
5342      introduce a new computation (that might also need casting the
5343      variable to unsigned and back).  */
5344   if (cand->pos == IP_ORIGINAL
5345       && cand->incremented_at == use->stmt)
5346     {
5347       tree step, ctype, utype;
5348       enum tree_code incr_code = PLUS_EXPR;
5349
5350       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5351       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5352
5353       step = cand->iv->step;
5354       ctype = TREE_TYPE (step);
5355       utype = TREE_TYPE (cand->var_after);
5356       if (TREE_CODE (step) == NEGATE_EXPR)
5357         {
5358           incr_code = MINUS_EXPR;
5359           step = TREE_OPERAND (step, 0);
5360         }
5361
5362       /* Check whether we may leave the computation unchanged.
5363          This is the case only if it does not rely on other
5364          computations in the loop -- otherwise, the computation
5365          we rely upon may be removed in remove_unused_ivs,
5366          thus leading to ICE.  */
5367       op = TREE_OPERAND (use->stmt, 1);
5368       if (TREE_CODE (op) == PLUS_EXPR
5369           || TREE_CODE (op) == MINUS_EXPR)
5370         {
5371           if (TREE_OPERAND (op, 0) == cand->var_before)
5372             op = TREE_OPERAND (op, 1);
5373           else if (TREE_CODE (op) == PLUS_EXPR
5374                    && TREE_OPERAND (op, 1) == cand->var_before)
5375             op = TREE_OPERAND (op, 0);
5376           else
5377             op = NULL_TREE;
5378         }
5379       else
5380         op = NULL_TREE;
5381
5382       if (op
5383           && (TREE_CODE (op) == INTEGER_CST
5384               || operand_equal_p (op, step, 0)))
5385         return;
5386
5387       /* Otherwise, add the necessary computations to express
5388          the iv.  */
5389       op = fold_convert (ctype, cand->var_before);
5390       comp = fold_convert (utype,
5391                            build2 (incr_code, ctype, op,
5392                                    unshare_expr (step)));
5393     }
5394   else
5395     comp = get_computation (data->current_loop, use, cand);
5396
5397   switch (TREE_CODE (use->stmt))
5398     {
5399     case PHI_NODE:
5400       tgt = PHI_RESULT (use->stmt);
5401
5402       /* If we should keep the biv, do not replace it.  */
5403       if (name_info (data, tgt)->preserve_biv)
5404         return;
5405
5406       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5407       while (!bsi_end_p (pbsi)
5408              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5409         {
5410           bsi = pbsi;
5411           bsi_next (&pbsi);
5412         }
5413       break;
5414
5415     case MODIFY_EXPR:
5416       tgt = TREE_OPERAND (use->stmt, 0);
5417       bsi = bsi_for_stmt (use->stmt);
5418       break;
5419
5420     default:
5421       gcc_unreachable ();
5422     }
5423
5424   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5425
5426   if (TREE_CODE (use->stmt) == PHI_NODE)
5427     {
5428       if (stmts)
5429         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5430       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5431       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5432       remove_statement (use->stmt, false);
5433       SSA_NAME_DEF_STMT (tgt) = ass;
5434     }
5435   else
5436     {
5437       if (stmts)
5438         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5439       TREE_OPERAND (use->stmt, 1) = op;
5440     }
5441 }
5442
5443 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5444    for_each_index.  */
5445
5446 static bool
5447 idx_remove_ssa_names (tree base, tree *idx,
5448                       void *data ATTRIBUTE_UNUSED)
5449 {
5450   tree *op;
5451
5452   if (TREE_CODE (*idx) == SSA_NAME)
5453     *idx = SSA_NAME_VAR (*idx);
5454
5455   if (TREE_CODE (base) == ARRAY_REF)
5456     {
5457       op = &TREE_OPERAND (base, 2);
5458       if (*op
5459           && TREE_CODE (*op) == SSA_NAME)
5460         *op = SSA_NAME_VAR (*op);
5461       op = &TREE_OPERAND (base, 3);
5462       if (*op
5463           && TREE_CODE (*op) == SSA_NAME)
5464         *op = SSA_NAME_VAR (*op);
5465     }
5466
5467   return true;
5468 }
5469
5470 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5471
5472 static tree
5473 unshare_and_remove_ssa_names (tree ref)
5474 {
5475   ref = unshare_expr (ref);
5476   for_each_index (&ref, idx_remove_ssa_names, NULL);
5477
5478   return ref;
5479 }
5480
5481 /* Extract the alias analysis info for the memory reference REF.  There are
5482    several ways how this information may be stored and what precisely is
5483    its semantics depending on the type of the reference, but there always is
5484    somewhere hidden one _DECL node that is used to determine the set of
5485    virtual operands for the reference.  The code below deciphers this jungle
5486    and extracts this single useful piece of information.  */
5487
5488 static tree
5489 get_ref_tag (tree ref, tree orig)
5490 {
5491   tree var = get_base_address (ref);
5492   tree aref = NULL_TREE, tag, sv;
5493   HOST_WIDE_INT offset, size, maxsize;
5494
5495   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5496     {
5497       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5498       if (ref)
5499         break;
5500     }
5501
5502   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5503     return unshare_expr (sv);
5504
5505   if (!var)
5506     return NULL_TREE;
5507
5508   if (TREE_CODE (var) == INDIRECT_REF)
5509     {
5510       /* In case the base is a dereference of a pointer, first check its name
5511          mem tag, and if it does not have one, use type mem tag.  */
5512       var = TREE_OPERAND (var, 0);
5513       if (TREE_CODE (var) != SSA_NAME)
5514         return NULL_TREE;
5515
5516       if (SSA_NAME_PTR_INFO (var))
5517         {
5518           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5519           if (tag)
5520             return tag;
5521         }
5522
5523       var = SSA_NAME_VAR (var);
5524       tag = var_ann (var)->type_mem_tag;
5525       gcc_assert (tag != NULL_TREE);
5526       return tag;
5527     }
5528   else
5529     {
5530       if (!DECL_P (var))
5531         return NULL_TREE;
5532
5533       tag = var_ann (var)->type_mem_tag;
5534       if (tag)
5535         return tag;
5536
5537       return var;
5538     }
5539 }
5540
5541 /* Copies the reference information from OLD_REF to NEW_REF.  */
5542
5543 static void
5544 copy_ref_info (tree new_ref, tree old_ref)
5545 {
5546   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5547     copy_mem_ref_info (new_ref, old_ref);
5548   else
5549     {
5550       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5551       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5552     }
5553 }
5554
5555 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5556
5557 static void
5558 rewrite_use_address (struct ivopts_data *data,
5559                      struct iv_use *use, struct iv_cand *cand)
5560 {
5561   struct affine_tree_combination aff;
5562   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5563   tree ref;
5564
5565   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5566   unshare_aff_combination (&aff);
5567
5568   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5569   copy_ref_info (ref, *use->op_p);
5570   *use->op_p = ref;
5571 }
5572
5573 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5574    candidate CAND.  */
5575
5576 static void
5577 rewrite_use_compare (struct ivopts_data *data,
5578                      struct iv_use *use, struct iv_cand *cand)
5579 {
5580   tree comp;
5581   tree *op_p, cond, op, stmts, bound;
5582   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5583   enum tree_code compare;
5584   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5585
5586   bound = cp->value;
5587   if (bound)
5588     {
5589       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5590       tree var_type = TREE_TYPE (var);
5591
5592       compare = iv_elimination_compare (data, use);
5593       bound = fold_convert (var_type, bound);
5594       op = force_gimple_operand (unshare_expr (bound), &stmts,
5595                                  true, NULL_TREE);
5596
5597       if (stmts)
5598         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5599
5600       *use->op_p = build2 (compare, boolean_type_node, var, op);
5601       update_stmt (use->stmt);
5602       return;
5603     }
5604
5605   /* The induction variable elimination failed; just express the original
5606      giv.  */
5607   comp = get_computation (data->current_loop, use, cand);
5608
5609   cond = *use->op_p;
5610   op_p = &TREE_OPERAND (cond, 0);
5611   if (TREE_CODE (*op_p) != SSA_NAME
5612       || zero_p (get_iv (data, *op_p)->step))
5613     op_p = &TREE_OPERAND (cond, 1);
5614
5615   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5616   if (stmts)
5617     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5618
5619   *op_p = op;
5620 }
5621
5622 /* Ensure that operand *OP_P may be used at the end of EXIT without
5623    violating loop closed ssa form.  */
5624
5625 static void
5626 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5627 {
5628   basic_block def_bb;
5629   struct loop *def_loop;
5630   tree phi, use;
5631
5632   use = USE_FROM_PTR (op_p);
5633   if (TREE_CODE (use) != SSA_NAME)
5634     return;
5635
5636   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5637   if (!def_bb)
5638     return;
5639
5640   def_loop = def_bb->loop_father;
5641   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5642     return;
5643
5644   /* Try finding a phi node that copies the value out of the loop.  */
5645   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5646     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5647       break;
5648
5649   if (!phi)
5650     {
5651       /* Create such a phi node.  */
5652       tree new_name = duplicate_ssa_name (use, NULL);
5653
5654       phi = create_phi_node (new_name, exit->dest);
5655       SSA_NAME_DEF_STMT (new_name) = phi;
5656       add_phi_arg (phi, use, exit);
5657     }
5658
5659   SET_USE (op_p, PHI_RESULT (phi));
5660 }
5661
5662 /* Ensure that operands of STMT may be used at the end of EXIT without
5663    violating loop closed ssa form.  */
5664
5665 static void
5666 protect_loop_closed_ssa_form (edge exit, tree stmt)
5667 {
5668   ssa_op_iter iter;
5669   use_operand_p use_p;
5670
5671   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5672     protect_loop_closed_ssa_form_use (exit, use_p);
5673 }
5674
5675 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5676    so that they are emitted on the correct place, and so that the loop closed
5677    ssa form is preserved.  */
5678
5679 void
5680 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5681 {
5682   tree_stmt_iterator tsi;
5683   block_stmt_iterator bsi;
5684   tree phi, stmt, def, next;
5685
5686   if (!single_pred_p (exit->dest))
5687     split_loop_exit_edge (exit);
5688
5689   /* Ensure there is label in exit->dest, so that we can
5690      insert after it.  */
5691   tree_block_label (exit->dest);
5692   bsi = bsi_after_labels (exit->dest);
5693
5694   if (TREE_CODE (stmts) == STATEMENT_LIST)
5695     {
5696       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5697         {
5698           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5699           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5700         }
5701     }
5702   else
5703     {
5704       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5705       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5706     }
5707
5708   if (!op)
5709     return;
5710
5711   for (phi = phi_nodes (exit->dest); phi; phi = next)
5712     {
5713       next = PHI_CHAIN (phi);
5714
5715       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5716         {
5717           def = PHI_RESULT (phi);
5718           remove_statement (phi, false);
5719           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5720                         def, op);
5721           SSA_NAME_DEF_STMT (def) = stmt;
5722           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5723         }
5724     }
5725 }
5726
5727 /* Rewrites the final value of USE (that is only needed outside of the loop)
5728    using candidate CAND.  */
5729
5730 static void
5731 rewrite_use_outer (struct ivopts_data *data,
5732                    struct iv_use *use, struct iv_cand *cand)
5733 {
5734   edge exit;
5735   tree value, op, stmts, tgt;
5736   tree phi;
5737
5738   switch (TREE_CODE (use->stmt))
5739     {
5740     case PHI_NODE:
5741       tgt = PHI_RESULT (use->stmt);
5742       break;
5743     case MODIFY_EXPR:
5744       tgt = TREE_OPERAND (use->stmt, 0);
5745       break;
5746     default:
5747       gcc_unreachable ();
5748     }
5749
5750   exit = single_dom_exit (data->current_loop);
5751
5752   if (exit)
5753     {
5754       if (!cand->iv)
5755         {
5756           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5757           value = unshare_expr (cp->value);
5758         }
5759       else
5760         value = get_computation_at (data->current_loop,
5761                                     use, cand, last_stmt (exit->src));
5762
5763       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5764
5765       /* If we will preserve the iv anyway and we would need to perform
5766          some computation to replace the final value, do nothing.  */
5767       if (stmts && name_info (data, tgt)->preserve_biv)
5768         return;
5769
5770       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5771         {
5772           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5773
5774           if (USE_FROM_PTR (use_p) == tgt)
5775             SET_USE (use_p, op);
5776         }
5777
5778       if (stmts)
5779         compute_phi_arg_on_exit (exit, stmts, op);
5780
5781       /* Enable removal of the statement.  We cannot remove it directly,
5782          since we may still need the aliasing information attached to the
5783          ssa name defined by it.  */
5784       name_info (data, tgt)->iv->have_use_for = false;
5785       return;
5786     }
5787
5788   /* If the variable is going to be preserved anyway, there is nothing to
5789      do.  */
5790   if (name_info (data, tgt)->preserve_biv)
5791     return;
5792
5793   /* Otherwise we just need to compute the iv.  */
5794   rewrite_use_nonlinear_expr (data, use, cand);
5795 }
5796
5797 /* Rewrites USE using candidate CAND.  */
5798
5799 static void
5800 rewrite_use (struct ivopts_data *data,
5801              struct iv_use *use, struct iv_cand *cand)
5802 {
5803   switch (use->type)
5804     {
5805       case USE_NONLINEAR_EXPR:
5806         rewrite_use_nonlinear_expr (data, use, cand);
5807         break;
5808
5809       case USE_OUTER:
5810         rewrite_use_outer (data, use, cand);
5811         break;
5812
5813       case USE_ADDRESS:
5814         rewrite_use_address (data, use, cand);
5815         break;
5816
5817       case USE_COMPARE:
5818         rewrite_use_compare (data, use, cand);
5819         break;
5820
5821       default:
5822         gcc_unreachable ();
5823     }
5824   mark_new_vars_to_rename (use->stmt);
5825 }
5826
5827 /* Rewrite the uses using the selected induction variables.  */
5828
5829 static void
5830 rewrite_uses (struct ivopts_data *data)
5831 {
5832   unsigned i;
5833   struct iv_cand *cand;
5834   struct iv_use *use;
5835
5836   for (i = 0; i < n_iv_uses (data); i++)
5837     {
5838       use = iv_use (data, i);
5839       cand = use->selected;
5840       gcc_assert (cand);
5841
5842       rewrite_use (data, use, cand);
5843     }
5844 }
5845
5846 /* Removes the ivs that are not used after rewriting.  */
5847
5848 static void
5849 remove_unused_ivs (struct ivopts_data *data)
5850 {
5851   unsigned j;
5852   bitmap_iterator bi;
5853
5854   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5855     {
5856       struct version_info *info;
5857
5858       info = ver_info (data, j);
5859       if (info->iv
5860           && !zero_p (info->iv->step)
5861           && !info->inv_id
5862           && !info->iv->have_use_for
5863           && !info->preserve_biv)
5864         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5865     }
5866 }
5867
5868 /* Frees data allocated by the optimization of a single loop.  */
5869
5870 static void
5871 free_loop_data (struct ivopts_data *data)
5872 {
5873   unsigned i, j;
5874   bitmap_iterator bi;
5875   tree obj;
5876
5877   htab_empty (data->niters);
5878
5879   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5880     {
5881       struct version_info *info;
5882
5883       info = ver_info (data, i);
5884       if (info->iv)
5885         free (info->iv);
5886       info->iv = NULL;
5887       info->has_nonlin_use = false;
5888       info->preserve_biv = false;
5889       info->inv_id = 0;
5890     }
5891   bitmap_clear (data->relevant);
5892   bitmap_clear (data->important_candidates);
5893
5894   for (i = 0; i < n_iv_uses (data); i++)
5895     {
5896       struct iv_use *use = iv_use (data, i);
5897
5898       free (use->iv);
5899       BITMAP_FREE (use->related_cands);
5900       for (j = 0; j < use->n_map_members; j++)
5901         if (use->cost_map[j].depends_on)
5902           BITMAP_FREE (use->cost_map[j].depends_on);
5903       free (use->cost_map);
5904       free (use);
5905     }
5906   VEC_truncate (iv_use_p, data->iv_uses, 0);
5907
5908   for (i = 0; i < n_iv_cands (data); i++)
5909     {
5910       struct iv_cand *cand = iv_cand (data, i);
5911
5912       if (cand->iv)
5913         free (cand->iv);
5914       if (cand->depends_on)
5915         BITMAP_FREE (cand->depends_on);
5916       free (cand);
5917     }
5918   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5919
5920   if (data->version_info_size < num_ssa_names)
5921     {
5922       data->version_info_size = 2 * num_ssa_names;
5923       free (data->version_info);
5924       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5925     }
5926
5927   data->max_inv_id = 0;
5928
5929   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5930     SET_DECL_RTL (obj, NULL_RTX);
5931
5932   VEC_truncate (tree, decl_rtl_to_reset, 0);
5933 }
5934
5935 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5936    loop tree.  */
5937
5938 static void
5939 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5940 {
5941   unsigned i;
5942
5943   for (i = 1; i < loops->num; i++)
5944     if (loops->parray[i])
5945       {
5946         free (loops->parray[i]->aux);
5947         loops->parray[i]->aux = NULL;
5948       }
5949
5950   free_loop_data (data);
5951   free (data->version_info);
5952   BITMAP_FREE (data->relevant);
5953   BITMAP_FREE (data->important_candidates);
5954   htab_delete (data->niters);
5955
5956   VEC_free (tree, heap, decl_rtl_to_reset);
5957   VEC_free (iv_use_p, heap, data->iv_uses);
5958   VEC_free (iv_cand_p, heap, data->iv_candidates);
5959 }
5960
5961 /* Optimizes the LOOP.  Returns true if anything changed.  */
5962
5963 static bool
5964 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5965 {
5966   bool changed = false;
5967   struct iv_ca *iv_ca;
5968   edge exit;
5969
5970   data->current_loop = loop;
5971
5972   if (dump_file && (dump_flags & TDF_DETAILS))
5973     {
5974       fprintf (dump_file, "Processing loop %d\n", loop->num);
5975
5976       exit = single_dom_exit (loop);
5977       if (exit)
5978         {
5979           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5980                    exit->src->index, exit->dest->index);
5981           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5982           fprintf (dump_file, "\n");
5983         }
5984
5985       fprintf (dump_file, "\n");
5986     }
5987
5988   /* For each ssa name determines whether it behaves as an induction variable
5989      in some loop.  */
5990   if (!find_induction_variables (data))
5991     goto finish;
5992
5993   /* Finds interesting uses (item 1).  */
5994   find_interesting_uses (data);
5995   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5996     goto finish;
5997
5998   /* Finds candidates for the induction variables (item 2).  */
5999   find_iv_candidates (data);
6000
6001   /* Calculates the costs (item 3, part 1).  */
6002   determine_use_iv_costs (data);
6003   determine_iv_costs (data);
6004   determine_set_costs (data);
6005
6006   /* Find the optimal set of induction variables (item 3, part 2).  */
6007   iv_ca = find_optimal_iv_set (data);
6008   if (!iv_ca)
6009     goto finish;
6010   changed = true;
6011
6012   /* Create the new induction variables (item 4, part 1).  */
6013   create_new_ivs (data, iv_ca);
6014   iv_ca_free (&iv_ca);
6015
6016   /* Rewrite the uses (item 4, part 2).  */
6017   rewrite_uses (data);
6018
6019   /* Remove the ivs that are unused after rewriting.  */
6020   remove_unused_ivs (data);
6021
6022   /* We have changed the structure of induction variables; it might happen
6023      that definitions in the scev database refer to some of them that were
6024      eliminated.  */
6025   scev_reset ();
6026
6027 finish:
6028   free_loop_data (data);
6029
6030   return changed;
6031 }
6032
6033 /* Main entry point.  Optimizes induction variables in LOOPS.  */
6034
6035 void
6036 tree_ssa_iv_optimize (struct loops *loops)
6037 {
6038   struct loop *loop;
6039   struct ivopts_data data;
6040
6041   tree_ssa_iv_optimize_init (loops, &data);
6042
6043   /* Optimize the loops starting with the innermost ones.  */
6044   loop = loops->tree_root;
6045   while (loop->inner)
6046     loop = loop->inner;
6047
6048   /* Scan the loops, inner ones first.  */
6049   while (loop != loops->tree_root)
6050     {
6051       if (dump_file && (dump_flags & TDF_DETAILS))
6052         flow_loop_dump (loop, dump_file, NULL, 1);
6053
6054       tree_ssa_iv_optimize_loop (&data, loop);
6055
6056       if (loop->next)
6057         {
6058           loop = loop->next;
6059           while (loop->inner)
6060             loop = loop->inner;
6061         }
6062       else
6063         loop = loop->outer;
6064     }
6065
6066   tree_ssa_iv_optimize_finalize (loops, &data);
6067 }