gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93
  94 /* The infinite cost.  */
  95 #define INFTY 10000000
  96
  97 /* The expected number of loop iterations.  TODO -- use profiling instead of
  98    this.  */
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101
 102 /* Representation of the induction variable.  */
 103 struct iv
 104 {
 105   tree base;            /* Initial value of the iv.  */
 106   tree base_object;     /* A memory object to that the induction variable points.  */
 107   tree step;            /* Step of the iv (constant only).  */
 108   tree ssa_name;        /* The ssa name with the value.  */
 109   bool biv_p;           /* Is it a biv?  */
 110   bool have_use_for;    /* Do we already have a use for it?  */
 111   unsigned use_id;      /* The identifier in the use if it is the case.  */
 112 };
 113
 114 /* Per-ssa version information (induction variable descriptions, etc.).  */
 115 struct version_info
 116 {
 117   tree name;            /* The ssa name.  */
 118   struct iv *iv;        /* Induction variable description.  */
 119   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 120                            an expression that is not an induction variable.  */
 121   unsigned inv_id;      /* Id of an invariant.  */
 122   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 123 };
 124
 125 /* Types of uses.  */
 126 enum use_type
 127 {
 128   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 129   USE_ADDRESS,          /* Use in an address.  */
 130   USE_COMPARE           /* Use is a compare.  */
 131 };
 132
 133 /* The candidate - cost pair.  */
 134 struct cost_pair
 135 {
 136   struct iv_cand *cand; /* The candidate.  */
 137   unsigned cost;        /* The cost.  */
 138   bitmap depends_on;    /* The list of invariants that have to be
 139                            preserved.  */
 140   tree value;           /* For final value elimination, the expression for
 141                            the final value of the iv.  For iv elimination,
 142                            the new bound to compare with.  */
 143 };
 144
 145 /* Use.  */
 146 struct iv_use
 147 {
 148   unsigned id;          /* The id of the use.  */
 149   enum use_type type;   /* Type of the use.  */
 150   struct iv *iv;        /* The induction variable it is based on.  */
 151   tree stmt;            /* Statement in that it occurs.  */
 152   tree *op_p;           /* The place where it occurs.  */
 153   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 154                            important ones.  */
 155
 156   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 157   struct cost_pair *cost_map;
 158                         /* The costs wrto the iv candidates.  */
 159
 160   struct iv_cand *selected;
 161                         /* The selected candidate.  */
 162 };
 163
 164 /* The position where the iv is computed.  */
 165 enum iv_position
 166 {
 167   IP_NORMAL,            /* At the end, just before the exit condition.  */
 168   IP_END,               /* At the end of the latch block.  */
 169   IP_ORIGINAL           /* The original biv.  */
 170 };
 171
 172 /* The induction variable candidate.  */
 173 struct iv_cand
 174 {
 175   unsigned id;          /* The number of the candidate.  */
 176   bool important;       /* Whether this is an "important" candidate, i.e. such
 177                            that it should be considered by all uses.  */
 178   enum iv_position pos; /* Where it is computed.  */
 179   tree incremented_at;  /* For original biv, the statement where it is
 180                            incremented.  */
 181   tree var_before;      /* The variable used for it before increment.  */
 182   tree var_after;       /* The variable used for it after increment.  */
 183   struct iv *iv;        /* The value of the candidate.  NULL for
 184                            "pseudocandidate" used to indicate the possibility
 185                            to replace the final value of an iv by direct
 186                            computation of the value.  */
 187   unsigned cost;        /* Cost of the candidate.  */
 188   bitmap depends_on;    /* The list of invariants that are used in step of the
 189                            biv.  */
 190 };
 191
 192 /* The data used by the induction variable optimizations.  */
 193
 194 typedef struct iv_use *iv_use_p;
 195 DEF_VEC_P(iv_use_p);
 196 DEF_VEC_ALLOC_P(iv_use_p,heap);
 197
 198 typedef struct iv_cand *iv_cand_p;
 199 DEF_VEC_P(iv_cand_p);
 200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 201
 202 struct ivopts_data
 203 {
 204   /* The currently optimized loop.  */
 205   struct loop *current_loop;
 206
 207   /* Number of registers used in it.  */
 208   unsigned regs_used;
 209
 210   /* Numbers of iterations for all exits of the current loop.  */
 211   htab_t niters;
 212
 213   /* The size of version_info array allocated.  */
 214   unsigned version_info_size;
 215
 216   /* The array of information for the ssa names.  */
 217   struct version_info *version_info;
 218
 219   /* The bitmap of indices in version_info whose value was changed.  */
 220   bitmap relevant;
 221
 222   /* The maximum invariant id.  */
 223   unsigned max_inv_id;
 224
 225   /* The uses of induction variables.  */
 226   VEC(iv_use_p,heap) *iv_uses;
 227
 228   /* The candidates.  */
 229   VEC(iv_cand_p,heap) *iv_candidates;
 230
 231   /* A bitmap of important candidates.  */
 232   bitmap important_candidates;
 233
 234   /* Whether to consider just related and important candidates when replacing a
 235      use.  */
 236   bool consider_all_candidates;
 237 };
 238
 239 /* An assignment of iv candidates to uses.  */
 240
 241 struct iv_ca
 242 {
 243   /* The number of uses covered by the assignment.  */
 244   unsigned upto;
 245
 246   /* Number of uses that cannot be expressed by the candidates in the set.  */
 247   unsigned bad_uses;
 248
 249   /* Candidate assigned to a use, together with the related costs.  */
 250   struct cost_pair **cand_for_use;
 251
 252   /* Number of times each candidate is used.  */
 253   unsigned *n_cand_uses;
 254
 255   /* The candidates used.  */
 256   bitmap cands;
 257
 258   /* The number of candidates in the set.  */
 259   unsigned n_cands;
 260
 261   /* Total number of registers needed.  */
 262   unsigned n_regs;
 263
 264   /* Total cost of expressing uses.  */
 265   unsigned cand_use_cost;
 266
 267   /* Total cost of candidates.  */
 268   unsigned cand_cost;
 269
 270   /* Number of times each invariant is used.  */
 271   unsigned *n_invariant_uses;
 272
 273   /* Total cost of the assignment.  */
 274   unsigned cost;
 275 };
 276
 277 /* Difference of two iv candidate assignments.  */
 278
 279 struct iv_ca_delta
 280 {
 281   /* Changed use.  */
 282   struct iv_use *use;
 283
 284   /* An old assignment (for rollback purposes).  */
 285   struct cost_pair *old_cp;
 286
 287   /* A new assignment.  */
 288   struct cost_pair *new_cp;
 289
 290   /* Next change in the list.  */
 291   struct iv_ca_delta *next_change;
 292 };
 293
 294 /* Bound on number of candidates below that all candidates are considered.  */
 295
 296 #define CONSIDER_ALL_CANDIDATES_BOUND \
 297   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 298
 299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 300    optimizing such a loop would help, and it would take ages).  */
 301
 302 #define MAX_CONSIDERED_USES \
 303   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 304
 305 /* If there are at most this number of ivs in the set, try removing unnecessary
 306    ivs from the set always.  */
 307
 308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 309   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 310
 311 /* The list of trees for that the decl_rtl field must be reset is stored
 312    here.  */
 313
 314 static VEC(tree,heap) *decl_rtl_to_reset;
 315
 316 /* Number of uses recorded in DATA.  */
 317
 318 static inline unsigned
 319 n_iv_uses (struct ivopts_data *data)
 320 {
 321   return VEC_length (iv_use_p, data->iv_uses);
 322 }
 323
 324 /* Ith use recorded in DATA.  */
 325
 326 static inline struct iv_use *
 327 iv_use (struct ivopts_data *data, unsigned i)
 328 {
 329   return VEC_index (iv_use_p, data->iv_uses, i);
 330 }
 331
 332 /* Number of candidates recorded in DATA.  */
 333
 334 static inline unsigned
 335 n_iv_cands (struct ivopts_data *data)
 336 {
 337   return VEC_length (iv_cand_p, data->iv_candidates);
 338 }
 339
 340 /* Ith candidate recorded in DATA.  */
 341
 342 static inline struct iv_cand *
 343 iv_cand (struct ivopts_data *data, unsigned i)
 344 {
 345   return VEC_index (iv_cand_p, data->iv_candidates, i);
 346 }
 347
 348 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 349
 350 edge
 351 single_dom_exit (struct loop *loop)
 352 {
 353   edge exit = single_exit (loop);
 354
 355   if (!exit)
 356     return NULL;
 357
 358   if (!just_once_each_iteration_p (loop, exit->src))
 359     return NULL;
 360
 361   return exit;
 362 }
 363
 364 /* Dumps information about the induction variable IV to FILE.  */
 365
 366 extern void dump_iv (FILE *, struct iv *);
 367 void
 368 dump_iv (FILE *file, struct iv *iv)
 369 {
 370   if (iv->ssa_name)
 371     {
 372       fprintf (file, "ssa name ");
 373       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 374       fprintf (file, "\n");
 375     }
 376
 377   fprintf (file, "  type ");
 378   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 379   fprintf (file, "\n");
 380
 381   if (iv->step)
 382     {
 383       fprintf (file, "  base ");
 384       print_generic_expr (file, iv->base, TDF_SLIM);
 385       fprintf (file, "\n");
 386
 387       fprintf (file, "  step ");
 388       print_generic_expr (file, iv->step, TDF_SLIM);
 389       fprintf (file, "\n");
 390     }
 391   else
 392     {
 393       fprintf (file, "  invariant ");
 394       print_generic_expr (file, iv->base, TDF_SLIM);
 395       fprintf (file, "\n");
 396     }
 397
 398   if (iv->base_object)
 399     {
 400       fprintf (file, "  base object ");
 401       print_generic_expr (file, iv->base_object, TDF_SLIM);
 402       fprintf (file, "\n");
 403     }
 404
 405   if (iv->biv_p)
 406     fprintf (file, "  is a biv\n");
 407 }
 408
 409 /* Dumps information about the USE to FILE.  */
 410
 411 extern void dump_use (FILE *, struct iv_use *);
 412 void
 413 dump_use (FILE *file, struct iv_use *use)
 414 {
 415   fprintf (file, "use %d\n", use->id);
 416
 417   switch (use->type)
 418     {
 419     case USE_NONLINEAR_EXPR:
 420       fprintf (file, "  generic\n");
 421       break;
 422
 423     case USE_ADDRESS:
 424       fprintf (file, "  address\n");
 425       break;
 426
 427     case USE_COMPARE:
 428       fprintf (file, "  compare\n");
 429       break;
 430
 431     default:
 432       gcc_unreachable ();
 433     }
 434
 435   fprintf (file, "  in statement ");
 436   print_generic_expr (file, use->stmt, TDF_SLIM);
 437   fprintf (file, "\n");
 438
 439   fprintf (file, "  at position ");
 440   if (use->op_p)
 441     print_generic_expr (file, *use->op_p, TDF_SLIM);
 442   fprintf (file, "\n");
 443
 444   dump_iv (file, use->iv);
 445
 446   if (use->related_cands)
 447     {
 448       fprintf (file, "  related candidates ");
 449       dump_bitmap (file, use->related_cands);
 450     }
 451 }
 452
 453 /* Dumps information about the uses to FILE.  */
 454
 455 extern void dump_uses (FILE *, struct ivopts_data *);
 456 void
 457 dump_uses (FILE *file, struct ivopts_data *data)
 458 {
 459   unsigned i;
 460   struct iv_use *use;
 461
 462   for (i = 0; i < n_iv_uses (data); i++)
 463     {
 464       use = iv_use (data, i);
 465
 466       dump_use (file, use);
 467       fprintf (file, "\n");
 468     }
 469 }
 470
 471 /* Dumps information about induction variable candidate CAND to FILE.  */
 472
 473 extern void dump_cand (FILE *, struct iv_cand *);
 474 void
 475 dump_cand (FILE *file, struct iv_cand *cand)
 476 {
 477   struct iv *iv = cand->iv;
 478
 479   fprintf (file, "candidate %d%s\n",
 480            cand->id, cand->important ? " (important)" : "");
 481
 482   if (cand->depends_on)
 483     {
 484       fprintf (file, "  depends on ");
 485       dump_bitmap (file, cand->depends_on);
 486     }
 487
 488   if (!iv)
 489     {
 490       fprintf (file, "  final value replacement\n");
 491       return;
 492     }
 493
 494   switch (cand->pos)
 495     {
 496     case IP_NORMAL:
 497       fprintf (file, "  incremented before exit test\n");
 498       break;
 499
 500     case IP_END:
 501       fprintf (file, "  incremented at end\n");
 502       break;
 503
 504     case IP_ORIGINAL:
 505       fprintf (file, "  original biv\n");
 506       break;
 507     }
 508
 509   dump_iv (file, iv);
 510 }
 511
 512 /* Returns the info for ssa version VER.  */
 513
 514 static inline struct version_info *
 515 ver_info (struct ivopts_data *data, unsigned ver)
 516 {
 517   return data->version_info + ver;
 518 }
 519
 520 /* Returns the info for ssa name NAME.  */
 521
 522 static inline struct version_info *
 523 name_info (struct ivopts_data *data, tree name)
 524 {
 525   return ver_info (data, SSA_NAME_VERSION (name));
 526 }
 527
 528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 529    emitted in LOOP.  */
 530
 531 static bool
 532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 533 {
 534   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 535
 536   gcc_assert (bb);
 537
 538   if (sbb == loop->latch)
 539     return true;
 540
 541   if (sbb != bb)
 542     return false;
 543
 544   return stmt == last_stmt (bb);
 545 }
 546
 547 /* Returns true if STMT if after the place where the original induction
 548    variable CAND is incremented.  */
 549
 550 static bool
 551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 552 {
 553   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 554   basic_block stmt_bb = bb_for_stmt (stmt);
 555   block_stmt_iterator bsi;
 556
 557   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 558     return false;
 559
 560   if (stmt_bb != cand_bb)
 561     return true;
 562
 563   /* Scan the block from the end, since the original ivs are usually
 564      incremented at the end of the loop body.  */
 565   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 566     {
 567       if (bsi_stmt (bsi) == cand->incremented_at)
 568         return false;
 569       if (bsi_stmt (bsi) == stmt)
 570         return true;
 571     }
 572 }
 573
 574 /* Returns true if STMT if after the place where the induction variable
 575    CAND is incremented in LOOP.  */
 576
 577 static bool
 578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 579 {
 580   switch (cand->pos)
 581     {
 582     case IP_END:
 583       return false;
 584
 585     case IP_NORMAL:
 586       return stmt_after_ip_normal_pos (loop, stmt);
 587
 588     case IP_ORIGINAL:
 589       return stmt_after_ip_original_pos (cand, stmt);
 590
 591     default:
 592       gcc_unreachable ();
 593     }
 594 }
 595
 596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 597
 598 static bool
 599 abnormal_ssa_name_p (tree exp)
 600 {
 601   if (!exp)
 602     return false;
 603
 604   if (TREE_CODE (exp) != SSA_NAME)
 605     return false;
 606
 607   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 608 }
 609
 610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 611    abnormal phi node.  Callback for for_each_index.  */
 612
 613 static bool
 614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 615                                   void *data ATTRIBUTE_UNUSED)
 616 {
 617   if (TREE_CODE (base) == ARRAY_REF)
 618     {
 619       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 620         return false;
 621       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 622         return false;
 623     }
 624
 625   return !abnormal_ssa_name_p (*index);
 626 }
 627
 628 /* Returns true if EXPR contains a ssa name that occurs in an
 629    abnormal phi node.  */
 630
 631 bool
 632 contains_abnormal_ssa_name_p (tree expr)
 633 {
 634   enum tree_code code;
 635   enum tree_code_class class;
 636
 637   if (!expr)
 638     return false;
 639
 640   code = TREE_CODE (expr);
 641   class = TREE_CODE_CLASS (code);
 642
 643   if (code == SSA_NAME)
 644     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 645
 646   if (code == INTEGER_CST
 647       || is_gimple_min_invariant (expr))
 648     return false;
 649
 650   if (code == ADDR_EXPR)
 651     return !for_each_index (&TREE_OPERAND (expr, 0),
 652                             idx_contains_abnormal_ssa_name_p,
 653                             NULL);
 654
 655   switch (class)
 656     {
 657     case tcc_binary:
 658     case tcc_comparison:
 659       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 660         return true;
 661
 662       /* Fallthru.  */
 663     case tcc_unary:
 664       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 665         return true;
 666
 667       break;
 668
 669     default:
 670       gcc_unreachable ();
 671     }
 672
 673   return false;
 674 }
 675
 676 /* Element of the table in that we cache the numbers of iterations obtained
 677    from exits of the loop.  */
 678
 679 struct nfe_cache_elt
 680 {
 681   /* The edge for that the number of iterations is cached.  */
 682   edge exit;
 683
 684   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 685      determined.  */
 686   tree niter;
 687 };
 688
 689 /* Hash function for nfe_cache_elt E.  */
 690
 691 static hashval_t
 692 nfe_hash (const void *e)
 693 {
 694   const struct nfe_cache_elt *elt = e;
 695
 696   return htab_hash_pointer (elt->exit);
 697 }
 698
 699 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 700
 701 static int
 702 nfe_eq (const void *e1, const void *e2)
 703 {
 704   const struct nfe_cache_elt *elt1 = e1;
 705
 706   return elt1->exit == e2;
 707 }
 708
 709 /*  Returns tree describing number of iterations determined from
 710     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 711
 712 static tree
 713 niter_for_exit (struct ivopts_data *data, edge exit)
 714 {
 715   struct nfe_cache_elt *nfe_desc;
 716   struct tree_niter_desc desc;
 717   PTR *slot;
 718
 719   slot = htab_find_slot_with_hash (data->niters, exit,
 720                                    htab_hash_pointer (exit),
 721                                    INSERT);
 722
 723   if (!*slot)
 724     {
 725       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 726       nfe_desc->exit = exit;
 727
 728       /* Try to determine number of iterations.  We must know it
 729          unconditionally (i.e., without possibility of # of iterations
 730          being zero).  Also, we cannot safely work with ssa names that
 731          appear in phi nodes on abnormal edges, so that we do not create
 732          overlapping life ranges for them (PR 27283).  */
 733       if (number_of_iterations_exit (data->current_loop,
 734                                      exit, &desc, true)
 735           && integer_zerop (desc.may_be_zero)
 736           && !contains_abnormal_ssa_name_p (desc.niter))
 737         nfe_desc->niter = desc.niter;
 738       else
 739         nfe_desc->niter = NULL_TREE;
 740     }
 741   else
 742     nfe_desc = *slot;
 743
 744   return nfe_desc->niter;
 745 }
 746
 747 /* Returns tree describing number of iterations determined from
 748    single dominating exit of DATA->current_loop, or NULL if something
 749    goes wrong.  */
 750
 751 static tree
 752 niter_for_single_dom_exit (struct ivopts_data *data)
 753 {
 754   edge exit = single_dom_exit (data->current_loop);
 755
 756   if (!exit)
 757     return NULL;
 758
 759   return niter_for_exit (data, exit);
 760 }
 761
 762 /* Initializes data structures used by the iv optimization pass, stored
 763    in DATA.  */
 764
 765 static void
 766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 767 {
 768   data->version_info_size = 2 * num_ssa_names;
 769   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 770   data->relevant = BITMAP_ALLOC (NULL);
 771   data->important_candidates = BITMAP_ALLOC (NULL);
 772   data->max_inv_id = 0;
 773   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 774   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 775   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 776   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 777 }
 778
 779 /* Returns a memory object to that EXPR points.  In case we are able to
 780    determine that it does not point to any such object, NULL is returned.  */
 781
 782 static tree
 783 determine_base_object (tree expr)
 784 {
 785   enum tree_code code = TREE_CODE (expr);
 786   tree base, obj, op0, op1;
 787
 788   /* If this is a pointer casted to any type, we need to determine
 789      the base object for the pointer; so handle conversions before
 790      throwing away non-pointer expressions.  */
 791   if (TREE_CODE (expr) == NOP_EXPR
 792       || TREE_CODE (expr) == CONVERT_EXPR)
 793     return determine_base_object (TREE_OPERAND (expr, 0));
 794
 795   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 796     return NULL_TREE;
 797
 798   switch (code)
 799     {
 800     case INTEGER_CST:
 801       return NULL_TREE;
 802
 803     case ADDR_EXPR:
 804       obj = TREE_OPERAND (expr, 0);
 805       base = get_base_address (obj);
 806
 807       if (!base)
 808         return expr;
 809
 810       if (TREE_CODE (base) == INDIRECT_REF)
 811         return determine_base_object (TREE_OPERAND (base, 0));
 812
 813       return fold_convert (ptr_type_node,
 814                            build_fold_addr_expr (base));
 815
 816     case PLUS_EXPR:
 817     case MINUS_EXPR:
 818       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 819       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 820
 821       if (!op1)
 822         return op0;
 823
 824       if (!op0)
 825         return (code == PLUS_EXPR
 826                 ? op1
 827                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 828
 829       return fold_build2 (code, ptr_type_node, op0, op1);
 830
 831     default:
 832       return fold_convert (ptr_type_node, expr);
 833     }
 834 }
 835
 836 /* Allocates an induction variable with given initial value BASE and step STEP
 837    for loop LOOP.  */
 838
 839 static struct iv *
 840 alloc_iv (tree base, tree step)
 841 {
 842   struct iv *iv = XCNEW (struct iv);
 843   gcc_assert (step != NULL_TREE);
 844
 845   iv->base = base;
 846   iv->base_object = determine_base_object (base);
 847   iv->step = step;
 848   iv->biv_p = false;
 849   iv->have_use_for = false;
 850   iv->use_id = 0;
 851   iv->ssa_name = NULL_TREE;
 852
 853   return iv;
 854 }
 855
 856 /* Sets STEP and BASE for induction variable IV.  */
 857
 858 static void
 859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 860 {
 861   struct version_info *info = name_info (data, iv);
 862
 863   gcc_assert (!info->iv);
 864
 865   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 866   info->iv = alloc_iv (base, step);
 867   info->iv->ssa_name = iv;
 868 }
 869
 870 /* Finds induction variable declaration for VAR.  */
 871
 872 static struct iv *
 873 get_iv (struct ivopts_data *data, tree var)
 874 {
 875   basic_block bb;
 876   tree type = TREE_TYPE (var);
 877
 878   if (!POINTER_TYPE_P (type)
 879       && !INTEGRAL_TYPE_P (type))
 880     return NULL;
 881
 882   if (!name_info (data, var)->iv)
 883     {
 884       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 885
 886       if (!bb
 887           || !flow_bb_inside_loop_p (data->current_loop, bb))
 888         set_iv (data, var, var, build_int_cst (type, 0));
 889     }
 890
 891   return name_info (data, var)->iv;
 892 }
 893
 894 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 895    not define a simple affine biv with nonzero step.  */
 896
 897 static tree
 898 determine_biv_step (tree phi)
 899 {
 900   struct loop *loop = bb_for_stmt (phi)->loop_father;
 901   tree name = PHI_RESULT (phi);
 902   affine_iv iv;
 903
 904   if (!is_gimple_reg (name))
 905     return NULL_TREE;
 906
 907   if (!simple_iv (loop, phi, name, &iv, true))
 908     return NULL_TREE;
 909
 910   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 911 }
 912
 913 /* Finds basic ivs.  */
 914
 915 static bool
 916 find_bivs (struct ivopts_data *data)
 917 {
 918   tree phi, step, type, base;
 919   bool found = false;
 920   struct loop *loop = data->current_loop;
 921
 922   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 923     {
 924       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 925         continue;
 926
 927       step = determine_biv_step (phi);
 928       if (!step)
 929         continue;
 930
 931       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 932       base = expand_simple_operations (base);
 933       if (contains_abnormal_ssa_name_p (base)
 934           || contains_abnormal_ssa_name_p (step))
 935         continue;
 936
 937       type = TREE_TYPE (PHI_RESULT (phi));
 938       base = fold_convert (type, base);
 939       if (step)
 940         step = fold_convert (type, step);
 941
 942       set_iv (data, PHI_RESULT (phi), base, step);
 943       found = true;
 944     }
 945
 946   return found;
 947 }
 948
 949 /* Marks basic ivs.  */
 950
 951 static void
 952 mark_bivs (struct ivopts_data *data)
 953 {
 954   tree phi, var;
 955   struct iv *iv, *incr_iv;
 956   struct loop *loop = data->current_loop;
 957   basic_block incr_bb;
 958
 959   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 960     {
 961       iv = get_iv (data, PHI_RESULT (phi));
 962       if (!iv)
 963         continue;
 964
 965       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 966       incr_iv = get_iv (data, var);
 967       if (!incr_iv)
 968         continue;
 969
 970       /* If the increment is in the subloop, ignore it.  */
 971       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 972       if (incr_bb->loop_father != data->current_loop
 973           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 974         continue;
 975
 976       iv->biv_p = true;
 977       incr_iv->biv_p = true;
 978     }
 979 }
 980
 981 /* Checks whether STMT defines a linear induction variable and stores its
 982    parameters to IV.  */
 983
 984 static bool
 985 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 986 {
 987   tree lhs;
 988   struct loop *loop = data->current_loop;
 989
 990   iv->base = NULL_TREE;
 991   iv->step = NULL_TREE;
 992
 993   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 994     return false;
 995
 996   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 997   if (TREE_CODE (lhs) != SSA_NAME)
 998     return false;
 999
1000   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1001     return false;
1002   iv->base = expand_simple_operations (iv->base);
1003
1004   if (contains_abnormal_ssa_name_p (iv->base)
1005       || contains_abnormal_ssa_name_p (iv->step))
1006     return false;
1007
1008   return true;
1009 }
1010
1011 /* Finds general ivs in statement STMT.  */
1012
1013 static void
1014 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1015 {
1016   affine_iv iv;
1017
1018   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1019     return;
1020
1021   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1022 }
1023
1024 /* Finds general ivs in basic block BB.  */
1025
1026 static void
1027 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1028 {
1029   block_stmt_iterator bsi;
1030
1031   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1032     find_givs_in_stmt (data, bsi_stmt (bsi));
1033 }
1034
1035 /* Finds general ivs.  */
1036
1037 static void
1038 find_givs (struct ivopts_data *data)
1039 {
1040   struct loop *loop = data->current_loop;
1041   basic_block *body = get_loop_body_in_dom_order (loop);
1042   unsigned i;
1043
1044   for (i = 0; i < loop->num_nodes; i++)
1045     find_givs_in_bb (data, body[i]);
1046   free (body);
1047 }
1048
1049 /* For each ssa name defined in LOOP determines whether it is an induction
1050    variable and if so, its initial value and step.  */
1051
1052 static bool
1053 find_induction_variables (struct ivopts_data *data)
1054 {
1055   unsigned i;
1056   bitmap_iterator bi;
1057
1058   if (!find_bivs (data))
1059     return false;
1060
1061   find_givs (data);
1062   mark_bivs (data);
1063
1064   if (dump_file && (dump_flags & TDF_DETAILS))
1065     {
1066       tree niter = niter_for_single_dom_exit (data);
1067
1068       if (niter)
1069         {
1070           fprintf (dump_file, "  number of iterations ");
1071           print_generic_expr (dump_file, niter, TDF_SLIM);
1072           fprintf (dump_file, "\n\n");
1073         };
1074
1075       fprintf (dump_file, "Induction variables:\n\n");
1076
1077       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1078         {
1079           if (ver_info (data, i)->iv)
1080             dump_iv (dump_file, ver_info (data, i)->iv);
1081         }
1082     }
1083
1084   return true;
1085 }
1086
1087 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1088
1089 static struct iv_use *
1090 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1091             tree stmt, enum use_type use_type)
1092 {
1093   struct iv_use *use = XCNEW (struct iv_use);
1094
1095   use->id = n_iv_uses (data);
1096   use->type = use_type;
1097   use->iv = iv;
1098   use->stmt = stmt;
1099   use->op_p = use_p;
1100   use->related_cands = BITMAP_ALLOC (NULL);
1101
1102   /* To avoid showing ssa name in the dumps, if it was not reset by the
1103      caller.  */
1104   iv->ssa_name = NULL_TREE;
1105
1106   if (dump_file && (dump_flags & TDF_DETAILS))
1107     dump_use (dump_file, use);
1108
1109   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1110
1111   return use;
1112 }
1113
1114 /* Checks whether OP is a loop-level invariant and if so, records it.
1115    NONLINEAR_USE is true if the invariant is used in a way we do not
1116    handle specially.  */
1117
1118 static void
1119 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1120 {
1121   basic_block bb;
1122   struct version_info *info;
1123
1124   if (TREE_CODE (op) != SSA_NAME
1125       || !is_gimple_reg (op))
1126     return;
1127
1128   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1129   if (bb
1130       && flow_bb_inside_loop_p (data->current_loop, bb))
1131     return;
1132
1133   info = name_info (data, op);
1134   info->name = op;
1135   info->has_nonlin_use |= nonlinear_use;
1136   if (!info->inv_id)
1137     info->inv_id = ++data->max_inv_id;
1138   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1139 }
1140
1141 /* Checks whether the use OP is interesting and if so, records it.  */
1142
1143 static struct iv_use *
1144 find_interesting_uses_op (struct ivopts_data *data, tree op)
1145 {
1146   struct iv *iv;
1147   struct iv *civ;
1148   tree stmt;
1149   struct iv_use *use;
1150
1151   if (TREE_CODE (op) != SSA_NAME)
1152     return NULL;
1153
1154   iv = get_iv (data, op);
1155   if (!iv)
1156     return NULL;
1157
1158   if (iv->have_use_for)
1159     {
1160       use = iv_use (data, iv->use_id);
1161
1162       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1163       return use;
1164     }
1165
1166   if (integer_zerop (iv->step))
1167     {
1168       record_invariant (data, op, true);
1169       return NULL;
1170     }
1171   iv->have_use_for = true;
1172
1173   civ = XNEW (struct iv);
1174   *civ = *iv;
1175
1176   stmt = SSA_NAME_DEF_STMT (op);
1177   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1178               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1179
1180   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1181   iv->use_id = use->id;
1182
1183   return use;
1184 }
1185
1186 /* Checks whether the condition *COND_P in STMT is interesting
1187    and if so, records it.  */
1188
1189 static void
1190 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1191 {
1192   tree *op0_p;
1193   tree *op1_p;
1194   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1195   struct iv const_iv;
1196   tree zero = integer_zero_node;
1197
1198   const_iv.step = integer_zero_node;
1199
1200   if (TREE_CODE (*cond_p) != SSA_NAME
1201       && !COMPARISON_CLASS_P (*cond_p))
1202     return;
1203
1204   if (TREE_CODE (*cond_p) == SSA_NAME)
1205     {
1206       op0_p = cond_p;
1207       op1_p = &zero;
1208     }
1209   else
1210     {
1211       op0_p = &TREE_OPERAND (*cond_p, 0);
1212       op1_p = &TREE_OPERAND (*cond_p, 1);
1213     }
1214
1215   if (TREE_CODE (*op0_p) == SSA_NAME)
1216     iv0 = get_iv (data, *op0_p);
1217   else
1218     iv0 = &const_iv;
1219
1220   if (TREE_CODE (*op1_p) == SSA_NAME)
1221     iv1 = get_iv (data, *op1_p);
1222   else
1223     iv1 = &const_iv;
1224
1225   if (/* When comparing with non-invariant value, we may not do any senseful
1226          induction variable elimination.  */
1227       (!iv0 || !iv1)
1228       /* Eliminating condition based on two ivs would be nontrivial.
1229          ??? TODO -- it is not really important to handle this case.  */
1230       || (!integer_zerop (iv0->step)
1231           && !integer_zerop (iv1->step)))
1232     {
1233       find_interesting_uses_op (data, *op0_p);
1234       find_interesting_uses_op (data, *op1_p);
1235       return;
1236     }
1237
1238   if (integer_zerop (iv0->step)
1239       && integer_zerop (iv1->step))
1240     {
1241       /* If both are invariants, this is a work for unswitching.  */
1242       return;
1243     }
1244
1245   civ = XNEW (struct iv);
1246   *civ = integer_zerop (iv0->step) ? *iv1: *iv0;
1247   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1248 }
1249
1250 /* Returns true if expression EXPR is obviously invariant in LOOP,
1251    i.e. if all its operands are defined outside of the LOOP.  */
1252
1253 bool
1254 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1255 {
1256   basic_block def_bb;
1257   unsigned i, len;
1258
1259   if (is_gimple_min_invariant (expr))
1260     return true;
1261
1262   if (TREE_CODE (expr) == SSA_NAME)
1263     {
1264       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1265       if (def_bb
1266           && flow_bb_inside_loop_p (loop, def_bb))
1267         return false;
1268
1269       return true;
1270     }
1271
1272   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1273     return false;
1274
1275   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1276   for (i = 0; i < len; i++)
1277     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1278       return false;
1279
1280   return true;
1281 }
1282
1283 /* Cumulates the steps of indices into DATA and replaces their values with the
1284    initial ones.  Returns false when the value of the index cannot be determined.
1285    Callback for for_each_index.  */
1286
1287 struct ifs_ivopts_data
1288 {
1289   struct ivopts_data *ivopts_data;
1290   tree stmt;
1291   tree step;
1292 };
1293
1294 static bool
1295 idx_find_step (tree base, tree *idx, void *data)
1296 {
1297   struct ifs_ivopts_data *dta = data;
1298   struct iv *iv;
1299   tree step, iv_base, iv_step, lbound, off;
1300   struct loop *loop = dta->ivopts_data->current_loop;
1301
1302   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1303       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1304     return false;
1305
1306   /* If base is a component ref, require that the offset of the reference
1307      be invariant.  */
1308   if (TREE_CODE (base) == COMPONENT_REF)
1309     {
1310       off = component_ref_field_offset (base);
1311       return expr_invariant_in_loop_p (loop, off);
1312     }
1313
1314   /* If base is array, first check whether we will be able to move the
1315      reference out of the loop (in order to take its address in strength
1316      reduction).  In order for this to work we need both lower bound
1317      and step to be loop invariants.  */
1318   if (TREE_CODE (base) == ARRAY_REF)
1319     {
1320       step = array_ref_element_size (base);
1321       lbound = array_ref_low_bound (base);
1322
1323       if (!expr_invariant_in_loop_p (loop, step)
1324           || !expr_invariant_in_loop_p (loop, lbound))
1325         return false;
1326     }
1327
1328   if (TREE_CODE (*idx) != SSA_NAME)
1329     return true;
1330
1331   iv = get_iv (dta->ivopts_data, *idx);
1332   if (!iv)
1333     return false;
1334
1335   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1336           *&x[0], which is not folded and does not trigger the
1337           ARRAY_REF path below.  */
1338   *idx = iv->base;
1339
1340   if (integer_zerop (iv->step))
1341     return true;
1342
1343   if (TREE_CODE (base) == ARRAY_REF)
1344     {
1345       step = array_ref_element_size (base);
1346
1347       /* We only handle addresses whose step is an integer constant.  */
1348       if (TREE_CODE (step) != INTEGER_CST)
1349         return false;
1350     }
1351   else
1352     /* The step for pointer arithmetics already is 1 byte.  */
1353     step = build_int_cst (sizetype, 1);
1354
1355   iv_base = iv->base;
1356   iv_step = iv->step;
1357   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1358                             sizetype, &iv_base, &iv_step, dta->stmt,
1359                             false))
1360     {
1361       /* The index might wrap.  */
1362       return false;
1363     }
1364
1365   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1366   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1367
1368   return true;
1369 }
1370
1371 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1372    object is passed to it in DATA.  */
1373
1374 static bool
1375 idx_record_use (tree base, tree *idx,
1376                 void *data)
1377 {
1378   find_interesting_uses_op (data, *idx);
1379   if (TREE_CODE (base) == ARRAY_REF)
1380     {
1381       find_interesting_uses_op (data, array_ref_element_size (base));
1382       find_interesting_uses_op (data, array_ref_low_bound (base));
1383     }
1384   return true;
1385 }
1386
1387 /* Returns true if memory reference REF may be unaligned.  */
1388
1389 static bool
1390 may_be_unaligned_p (tree ref)
1391 {
1392   tree base;
1393   tree base_type;
1394   HOST_WIDE_INT bitsize;
1395   HOST_WIDE_INT bitpos;
1396   tree toffset;
1397   enum machine_mode mode;
1398   int unsignedp, volatilep;
1399   unsigned base_align;
1400
1401   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1402      thus they are not misaligned.  */
1403   if (TREE_CODE (ref) == TARGET_MEM_REF)
1404     return false;
1405
1406   /* The test below is basically copy of what expr.c:normal_inner_ref
1407      does to check whether the object must be loaded by parts when
1408      STRICT_ALIGNMENT is true.  */
1409   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1410                               &unsignedp, &volatilep, true);
1411   base_type = TREE_TYPE (base);
1412   base_align = TYPE_ALIGN (base_type);
1413
1414   if (mode != BLKmode
1415       && (base_align < GET_MODE_ALIGNMENT (mode)
1416           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1417           || bitpos % BITS_PER_UNIT != 0))
1418     return true;
1419
1420   return false;
1421 }
1422
1423 /* Return true if EXPR may be non-addressable.   */
1424
1425 static bool
1426 may_be_nonaddressable_p (tree expr)
1427 {
1428   switch (TREE_CODE (expr))
1429     {
1430     case COMPONENT_REF:
1431       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1432              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1433
1434     case ARRAY_REF:
1435     case ARRAY_RANGE_REF:
1436       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1437
1438     case VIEW_CONVERT_EXPR:
1439       /* This kind of view-conversions may wrap non-addressable objects
1440          and make them look addressable.  After some processing the
1441          non-addressability may be uncovered again, causing ADDR_EXPRs
1442          of inappropriate objects to be built.  */
1443       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1444              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1445
1446     default:
1447       break;
1448     }
1449
1450   return false;
1451 }
1452
1453 /* Finds addresses in *OP_P inside STMT.  */
1454
1455 static void
1456 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1457 {
1458   tree base = *op_p, step = build_int_cst (sizetype, 0);
1459   struct iv *civ;
1460   struct ifs_ivopts_data ifs_ivopts_data;
1461
1462   /* Do not play with volatile memory references.  A bit too conservative,
1463      perhaps, but safe.  */
1464   if (stmt_ann (stmt)->has_volatile_ops)
1465     goto fail;
1466
1467   /* Ignore bitfields for now.  Not really something terribly complicated
1468      to handle.  TODO.  */
1469   if (TREE_CODE (base) == BIT_FIELD_REF)
1470     goto fail;
1471
1472   if (may_be_nonaddressable_p (base))
1473     goto fail;
1474
1475   if (STRICT_ALIGNMENT
1476       && may_be_unaligned_p (base))
1477     goto fail;
1478
1479   base = unshare_expr (base);
1480
1481   if (TREE_CODE (base) == TARGET_MEM_REF)
1482     {
1483       tree type = build_pointer_type (TREE_TYPE (base));
1484       tree astep;
1485
1486       if (TMR_BASE (base)
1487           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1488         {
1489           civ = get_iv (data, TMR_BASE (base));
1490           if (!civ)
1491             goto fail;
1492
1493           TMR_BASE (base) = civ->base;
1494           step = civ->step;
1495         }
1496       if (TMR_INDEX (base)
1497           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1498         {
1499           civ = get_iv (data, TMR_INDEX (base));
1500           if (!civ)
1501             goto fail;
1502
1503           TMR_INDEX (base) = civ->base;
1504           astep = civ->step;
1505
1506           if (astep)
1507             {
1508               if (TMR_STEP (base))
1509                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1510
1511               step = fold_build2 (PLUS_EXPR, type, step, astep);
1512             }
1513         }
1514
1515       if (integer_zerop (step))
1516         goto fail;
1517       base = tree_mem_ref_addr (type, base);
1518     }
1519   else
1520     {
1521       ifs_ivopts_data.ivopts_data = data;
1522       ifs_ivopts_data.stmt = stmt;
1523       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1524       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1525           || integer_zerop (ifs_ivopts_data.step))
1526         goto fail;
1527       step = ifs_ivopts_data.step;
1528
1529       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1530       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1531
1532       base = build_fold_addr_expr (base);
1533
1534       /* Substituting bases of IVs into the base expression might
1535          have caused folding opportunities.  */
1536       if (TREE_CODE (base) == ADDR_EXPR)
1537         {
1538           tree *ref = &TREE_OPERAND (base, 0);
1539           while (handled_component_p (*ref))
1540             ref = &TREE_OPERAND (*ref, 0);
1541           if (TREE_CODE (*ref) == INDIRECT_REF)
1542             *ref = fold_indirect_ref (*ref);
1543         }
1544     }
1545
1546   civ = alloc_iv (base, step);
1547   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1548   return;
1549
1550 fail:
1551   for_each_index (op_p, idx_record_use, data);
1552 }
1553
1554 /* Finds and records invariants used in STMT.  */
1555
1556 static void
1557 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1558 {
1559   ssa_op_iter iter;
1560   use_operand_p use_p;
1561   tree op;
1562
1563   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1564     {
1565       op = USE_FROM_PTR (use_p);
1566       record_invariant (data, op, false);
1567     }
1568 }
1569
1570 /* Finds interesting uses of induction variables in the statement STMT.  */
1571
1572 static void
1573 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1574 {
1575   struct iv *iv;
1576   tree op, lhs, rhs;
1577   ssa_op_iter iter;
1578   use_operand_p use_p;
1579
1580   find_invariants_stmt (data, stmt);
1581
1582   if (TREE_CODE (stmt) == COND_EXPR)
1583     {
1584       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1585       return;
1586     }
1587
1588   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1589     {
1590       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1591       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1592
1593       if (TREE_CODE (lhs) == SSA_NAME)
1594         {
1595           /* If the statement defines an induction variable, the uses are not
1596              interesting by themselves.  */
1597
1598           iv = get_iv (data, lhs);
1599
1600           if (iv && !integer_zerop (iv->step))
1601             return;
1602         }
1603
1604       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1605         {
1606         case tcc_comparison:
1607           find_interesting_uses_cond (data, stmt,
1608                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1609           return;
1610
1611         case tcc_reference:
1612           find_interesting_uses_address (data, stmt,
1613                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1614           if (REFERENCE_CLASS_P (lhs))
1615             find_interesting_uses_address (data, stmt,
1616                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1617           return;
1618
1619         default: ;
1620         }
1621
1622       if (REFERENCE_CLASS_P (lhs)
1623           && is_gimple_val (rhs))
1624         {
1625           find_interesting_uses_address (data, stmt,
1626                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1627           find_interesting_uses_op (data, rhs);
1628           return;
1629         }
1630
1631       /* TODO -- we should also handle address uses of type
1632
1633          memory = call (whatever);
1634
1635          and
1636
1637          call (memory).  */
1638     }
1639
1640   if (TREE_CODE (stmt) == PHI_NODE
1641       && bb_for_stmt (stmt) == data->current_loop->header)
1642     {
1643       lhs = PHI_RESULT (stmt);
1644       iv = get_iv (data, lhs);
1645
1646       if (iv && !integer_zerop (iv->step))
1647         return;
1648     }
1649
1650   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1651     {
1652       op = USE_FROM_PTR (use_p);
1653
1654       if (TREE_CODE (op) != SSA_NAME)
1655         continue;
1656
1657       iv = get_iv (data, op);
1658       if (!iv)
1659         continue;
1660
1661       find_interesting_uses_op (data, op);
1662     }
1663 }
1664
1665 /* Finds interesting uses of induction variables outside of loops
1666    on loop exit edge EXIT.  */
1667
1668 static void
1669 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1670 {
1671   tree phi, def;
1672
1673   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1674     {
1675       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1676       if (is_gimple_reg (def))
1677         find_interesting_uses_op (data, def);
1678     }
1679 }
1680
1681 /* Finds uses of the induction variables that are interesting.  */
1682
1683 static void
1684 find_interesting_uses (struct ivopts_data *data)
1685 {
1686   basic_block bb;
1687   block_stmt_iterator bsi;
1688   tree phi;
1689   basic_block *body = get_loop_body (data->current_loop);
1690   unsigned i;
1691   struct version_info *info;
1692   edge e;
1693
1694   if (dump_file && (dump_flags & TDF_DETAILS))
1695     fprintf (dump_file, "Uses:\n\n");
1696
1697   for (i = 0; i < data->current_loop->num_nodes; i++)
1698     {
1699       edge_iterator ei;
1700       bb = body[i];
1701
1702       FOR_EACH_EDGE (e, ei, bb->succs)
1703         if (e->dest != EXIT_BLOCK_PTR
1704             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705           find_interesting_uses_outside (data, e);
1706
1707       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708         find_interesting_uses_stmt (data, phi);
1709       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1711     }
1712
1713   if (dump_file && (dump_flags & TDF_DETAILS))
1714     {
1715       bitmap_iterator bi;
1716
1717       fprintf (dump_file, "\n");
1718
1719       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1720         {
1721           info = ver_info (data, i);
1722           if (info->inv_id)
1723             {
1724               fprintf (dump_file, "  ");
1725               print_generic_expr (dump_file, info->name, TDF_SLIM);
1726               fprintf (dump_file, " is invariant (%d)%s\n",
1727                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1728             }
1729         }
1730
1731       fprintf (dump_file, "\n");
1732     }
1733
1734   free (body);
1735 }
1736
1737 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1738    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1739    we are at the top-level of the processed address.  */
1740
1741 static tree
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743                 unsigned HOST_WIDE_INT *offset)
1744 {
1745   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746   enum tree_code code;
1747   tree type, orig_type = TREE_TYPE (expr);
1748   unsigned HOST_WIDE_INT off0, off1, st;
1749   tree orig_expr = expr;
1750
1751   STRIP_NOPS (expr);
1752
1753   type = TREE_TYPE (expr);
1754   code = TREE_CODE (expr);
1755   *offset = 0;
1756
1757   switch (code)
1758     {
1759     case INTEGER_CST:
1760       if (!cst_and_fits_in_hwi (expr)
1761           || integer_zerop (expr))
1762         return orig_expr;
1763
1764       *offset = int_cst_value (expr);
1765       return build_int_cst (orig_type, 0);
1766
1767     case PLUS_EXPR:
1768     case MINUS_EXPR:
1769       op0 = TREE_OPERAND (expr, 0);
1770       op1 = TREE_OPERAND (expr, 1);
1771
1772       op0 = strip_offset_1 (op0, false, false, &off0);
1773       op1 = strip_offset_1 (op1, false, false, &off1);
1774
1775       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776       if (op0 == TREE_OPERAND (expr, 0)
1777           && op1 == TREE_OPERAND (expr, 1))
1778         return orig_expr;
1779
1780       if (integer_zerop (op1))
1781         expr = op0;
1782       else if (integer_zerop (op0))
1783         {
1784           if (code == PLUS_EXPR)
1785             expr = op1;
1786           else
1787             expr = fold_build1 (NEGATE_EXPR, type, op1);
1788         }
1789       else
1790         expr = fold_build2 (code, type, op0, op1);
1791
1792       return fold_convert (orig_type, expr);
1793
1794     case ARRAY_REF:
1795       if (!inside_addr)
1796         return orig_expr;
1797
1798       step = array_ref_element_size (expr);
1799       if (!cst_and_fits_in_hwi (step))
1800         break;
1801
1802       st = int_cst_value (step);
1803       op1 = TREE_OPERAND (expr, 1);
1804       op1 = strip_offset_1 (op1, false, false, &off1);
1805       *offset = off1 * st;
1806
1807       if (top_compref
1808           && integer_zerop (op1))
1809         {
1810           /* Strip the component reference completely.  */
1811           op0 = TREE_OPERAND (expr, 0);
1812           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1813           *offset += off0;
1814           return op0;
1815         }
1816       break;
1817
1818     case COMPONENT_REF:
1819       if (!inside_addr)
1820         return orig_expr;
1821
1822       tmp = component_ref_field_offset (expr);
1823       if (top_compref
1824           && cst_and_fits_in_hwi (tmp))
1825         {
1826           /* Strip the component reference completely.  */
1827           op0 = TREE_OPERAND (expr, 0);
1828           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829           *offset = off0 + int_cst_value (tmp);
1830           return op0;
1831         }
1832       break;
1833
1834     case ADDR_EXPR:
1835       op0 = TREE_OPERAND (expr, 0);
1836       op0 = strip_offset_1 (op0, true, true, &off0);
1837       *offset += off0;
1838
1839       if (op0 == TREE_OPERAND (expr, 0))
1840         return orig_expr;
1841
1842       expr = build_fold_addr_expr (op0);
1843       return fold_convert (orig_type, expr);
1844
1845     case INDIRECT_REF:
1846       inside_addr = false;
1847       break;
1848
1849     default:
1850       return orig_expr;
1851     }
1852
1853   /* Default handling of expressions for that we want to recurse into
1854      the first operand.  */
1855   op0 = TREE_OPERAND (expr, 0);
1856   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1857   *offset += off0;
1858
1859   if (op0 == TREE_OPERAND (expr, 0)
1860       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1861     return orig_expr;
1862
1863   expr = copy_node (expr);
1864   TREE_OPERAND (expr, 0) = op0;
1865   if (op1)
1866     TREE_OPERAND (expr, 1) = op1;
1867
1868   /* Inside address, we might strip the top level component references,
1869      thus changing type of the expression.  Handling of ADDR_EXPR
1870      will fix that.  */
1871   expr = fold_convert (orig_type, expr);
1872
1873   return expr;
1874 }
1875
1876 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1877
1878 static tree
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1880 {
1881   return strip_offset_1 (expr, false, false, offset);
1882 }
1883
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885    We return unsigned type with the same precision, which avoids problems
1886    with overflows.  */
1887
1888 static tree
1889 generic_type_for (tree type)
1890 {
1891   if (POINTER_TYPE_P (type))
1892     return unsigned_type_for (type);
1893
1894   if (TYPE_UNSIGNED (type))
1895     return type;
1896
1897   return unsigned_type_for (type);
1898 }
1899
1900 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1901    the bitmap to that we should store it.  */
1902
1903 static struct ivopts_data *fd_ivopts_data;
1904 static tree
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1906 {
1907   bitmap *depends_on = data;
1908   struct version_info *info;
1909
1910   if (TREE_CODE (*expr_p) != SSA_NAME)
1911     return NULL_TREE;
1912   info = name_info (fd_ivopts_data, *expr_p);
1913
1914   if (!info->inv_id || info->has_nonlin_use)
1915     return NULL_TREE;
1916
1917   if (!*depends_on)
1918     *depends_on = BITMAP_ALLOC (NULL);
1919   bitmap_set_bit (*depends_on, info->inv_id);
1920
1921   return NULL_TREE;
1922 }
1923
1924 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1925    position to POS.  If USE is not NULL, the candidate is set as related to
1926    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1927    replacement of the final value of the iv by a direct computation.  */
1928
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931                  tree base, tree step, bool important, enum iv_position pos,
1932                  struct iv_use *use, tree incremented_at)
1933 {
1934   unsigned i;
1935   struct iv_cand *cand = NULL;
1936   tree type, orig_type;
1937
1938   if (base)
1939     {
1940       orig_type = TREE_TYPE (base);
1941       type = generic_type_for (orig_type);
1942       if (type != orig_type)
1943         {
1944           base = fold_convert (type, base);
1945           step = fold_convert (type, step);
1946         }
1947     }
1948
1949   for (i = 0; i < n_iv_cands (data); i++)
1950     {
1951       cand = iv_cand (data, i);
1952
1953       if (cand->pos != pos)
1954         continue;
1955
1956       if (cand->incremented_at != incremented_at)
1957         continue;
1958
1959       if (!cand->iv)
1960         {
1961           if (!base && !step)
1962             break;
1963
1964           continue;
1965         }
1966
1967       if (!base && !step)
1968         continue;
1969
1970       if (operand_equal_p (base, cand->iv->base, 0)
1971           && operand_equal_p (step, cand->iv->step, 0))
1972         break;
1973     }
1974
1975   if (i == n_iv_cands (data))
1976     {
1977       cand = XCNEW (struct iv_cand);
1978       cand->id = i;
1979
1980       if (!base && !step)
1981         cand->iv = NULL;
1982       else
1983         cand->iv = alloc_iv (base, step);
1984
1985       cand->pos = pos;
1986       if (pos != IP_ORIGINAL && cand->iv)
1987         {
1988           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1989           cand->var_after = cand->var_before;
1990         }
1991       cand->important = important;
1992       cand->incremented_at = incremented_at;
1993       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1994
1995       if (step
1996           && TREE_CODE (step) != INTEGER_CST)
1997         {
1998           fd_ivopts_data = data;
1999           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2000         }
2001
2002       if (dump_file && (dump_flags & TDF_DETAILS))
2003         dump_cand (dump_file, cand);
2004     }
2005
2006   if (important && !cand->important)
2007     {
2008       cand->important = true;
2009       if (dump_file && (dump_flags & TDF_DETAILS))
2010         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2011     }
2012
2013   if (use)
2014     {
2015       bitmap_set_bit (use->related_cands, i);
2016       if (dump_file && (dump_flags & TDF_DETAILS))
2017         fprintf (dump_file, "Candidate %d is related to use %d\n",
2018                  cand->id, use->id);
2019     }
2020
2021   return cand;
2022 }
2023
2024 /* Returns true if incrementing the induction variable at the end of the LOOP
2025    is allowed.
2026
2027    The purpose is to avoid splitting latch edge with a biv increment, thus
2028    creating a jump, possibly confusing other optimization passes and leaving
2029    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2030    is not available (so we do not have a better alternative), or if the latch
2031    edge is already nonempty.  */
2032
2033 static bool
2034 allow_ip_end_pos_p (struct loop *loop)
2035 {
2036   if (!ip_normal_pos (loop))
2037     return true;
2038
2039   if (!empty_block_p (ip_end_pos (loop)))
2040     return true;
2041
2042   return false;
2043 }
2044
2045 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2046    position to POS.  If USE is not NULL, the candidate is set as related to
2047    it.  The candidate computation is scheduled on all available positions.  */
2048
2049 static void
2050 add_candidate (struct ivopts_data *data,
2051                tree base, tree step, bool important, struct iv_use *use)
2052 {
2053   if (ip_normal_pos (data->current_loop))
2054     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2055   if (ip_end_pos (data->current_loop)
2056       && allow_ip_end_pos_p (data->current_loop))
2057     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2058 }
2059
2060 /* Add a standard "0 + 1 * iteration" iv candidate for a
2061    type with SIZE bits.  */
2062
2063 static void
2064 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2065                                      unsigned int size)
2066 {
2067   tree type = lang_hooks.types.type_for_size (size, true);
2068   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2069                  true, NULL);
2070 }
2071
2072 /* Adds standard iv candidates.  */
2073
2074 static void
2075 add_standard_iv_candidates (struct ivopts_data *data)
2076 {
2077   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2078
2079   /* The same for a double-integer type if it is still fast enough.  */
2080   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2081     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2082 }
2083
2084
2085 /* Adds candidates bases on the old induction variable IV.  */
2086
2087 static void
2088 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2089 {
2090   tree phi, def;
2091   struct iv_cand *cand;
2092
2093   add_candidate (data, iv->base, iv->step, true, NULL);
2094
2095   /* The same, but with initial value zero.  */
2096   add_candidate (data,
2097                  build_int_cst (TREE_TYPE (iv->base), 0),
2098                  iv->step, true, NULL);
2099
2100   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2101   if (TREE_CODE (phi) == PHI_NODE)
2102     {
2103       /* Additionally record the possibility of leaving the original iv
2104          untouched.  */
2105       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2106       cand = add_candidate_1 (data,
2107                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2108                               SSA_NAME_DEF_STMT (def));
2109       cand->var_before = iv->ssa_name;
2110       cand->var_after = def;
2111     }
2112 }
2113
2114 /* Adds candidates based on the old induction variables.  */
2115
2116 static void
2117 add_old_ivs_candidates (struct ivopts_data *data)
2118 {
2119   unsigned i;
2120   struct iv *iv;
2121   bitmap_iterator bi;
2122
2123   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2124     {
2125       iv = ver_info (data, i)->iv;
2126       if (iv && iv->biv_p && !integer_zerop (iv->step))
2127         add_old_iv_candidates (data, iv);
2128     }
2129 }
2130
2131 /* Adds candidates based on the value of the induction variable IV and USE.  */
2132
2133 static void
2134 add_iv_value_candidates (struct ivopts_data *data,
2135                          struct iv *iv, struct iv_use *use)
2136 {
2137   unsigned HOST_WIDE_INT offset;
2138   tree base;
2139
2140   add_candidate (data, iv->base, iv->step, false, use);
2141
2142   /* The same, but with initial value zero.  Make such variable important,
2143      since it is generic enough so that possibly many uses may be based
2144      on it.  */
2145   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2146                  iv->step, true, use);
2147
2148   /* Third, try removing the constant offset.  */
2149   base = strip_offset (iv->base, &offset);
2150   if (offset)
2151     add_candidate (data, base, iv->step, false, use);
2152 }
2153
2154 /* Adds candidates based on the uses.  */
2155
2156 static void
2157 add_derived_ivs_candidates (struct ivopts_data *data)
2158 {
2159   unsigned i;
2160
2161   for (i = 0; i < n_iv_uses (data); i++)
2162     {
2163       struct iv_use *use = iv_use (data, i);
2164
2165       if (!use)
2166         continue;
2167
2168       switch (use->type)
2169         {
2170         case USE_NONLINEAR_EXPR:
2171         case USE_COMPARE:
2172         case USE_ADDRESS:
2173           /* Just add the ivs based on the value of the iv used here.  */
2174           add_iv_value_candidates (data, use->iv, use);
2175           break;
2176
2177         default:
2178           gcc_unreachable ();
2179         }
2180     }
2181 }
2182
2183 /* Record important candidates and add them to related_cands bitmaps
2184    if needed.  */
2185
2186 static void
2187 record_important_candidates (struct ivopts_data *data)
2188 {
2189   unsigned i;
2190   struct iv_use *use;
2191
2192   for (i = 0; i < n_iv_cands (data); i++)
2193     {
2194       struct iv_cand *cand = iv_cand (data, i);
2195
2196       if (cand->important)
2197         bitmap_set_bit (data->important_candidates, i);
2198     }
2199
2200   data->consider_all_candidates = (n_iv_cands (data)
2201                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2202
2203   if (data->consider_all_candidates)
2204     {
2205       /* We will not need "related_cands" bitmaps in this case,
2206          so release them to decrease peak memory consumption.  */
2207       for (i = 0; i < n_iv_uses (data); i++)
2208         {
2209           use = iv_use (data, i);
2210           BITMAP_FREE (use->related_cands);
2211         }
2212     }
2213   else
2214     {
2215       /* Add important candidates to the related_cands bitmaps.  */
2216       for (i = 0; i < n_iv_uses (data); i++)
2217         bitmap_ior_into (iv_use (data, i)->related_cands,
2218                          data->important_candidates);
2219     }
2220 }
2221
2222 /* Finds the candidates for the induction variables.  */
2223
2224 static void
2225 find_iv_candidates (struct ivopts_data *data)
2226 {
2227   /* Add commonly used ivs.  */
2228   add_standard_iv_candidates (data);
2229
2230   /* Add old induction variables.  */
2231   add_old_ivs_candidates (data);
2232
2233   /* Add induction variables derived from uses.  */
2234   add_derived_ivs_candidates (data);
2235
2236   /* Record the important candidates.  */
2237   record_important_candidates (data);
2238 }
2239
2240 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2241    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2242    we allocate a simple list to every use.  */
2243
2244 static void
2245 alloc_use_cost_map (struct ivopts_data *data)
2246 {
2247   unsigned i, size, s, j;
2248
2249   for (i = 0; i < n_iv_uses (data); i++)
2250     {
2251       struct iv_use *use = iv_use (data, i);
2252       bitmap_iterator bi;
2253
2254       if (data->consider_all_candidates)
2255         size = n_iv_cands (data);
2256       else
2257         {
2258           s = 0;
2259           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2260             {
2261               s++;
2262             }
2263
2264           /* Round up to the power of two, so that moduling by it is fast.  */
2265           for (size = 1; size < s; size <<= 1)
2266             continue;
2267         }
2268
2269       use->n_map_members = size;
2270       use->cost_map = XCNEWVEC (struct cost_pair, size);
2271     }
2272 }
2273
2274 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2275    on invariants DEPENDS_ON and that the value used in expressing it
2276    is VALUE.*/
2277
2278 static void
2279 set_use_iv_cost (struct ivopts_data *data,
2280                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2281                  bitmap depends_on, tree value)
2282 {
2283   unsigned i, s;
2284
2285   if (cost == INFTY)
2286     {
2287       BITMAP_FREE (depends_on);
2288       return;
2289     }
2290
2291   if (data->consider_all_candidates)
2292     {
2293       use->cost_map[cand->id].cand = cand;
2294       use->cost_map[cand->id].cost = cost;
2295       use->cost_map[cand->id].depends_on = depends_on;
2296       use->cost_map[cand->id].value = value;
2297       return;
2298     }
2299
2300   /* n_map_members is a power of two, so this computes modulo.  */
2301   s = cand->id & (use->n_map_members - 1);
2302   for (i = s; i < use->n_map_members; i++)
2303     if (!use->cost_map[i].cand)
2304       goto found;
2305   for (i = 0; i < s; i++)
2306     if (!use->cost_map[i].cand)
2307       goto found;
2308
2309   gcc_unreachable ();
2310
2311 found:
2312   use->cost_map[i].cand = cand;
2313   use->cost_map[i].cost = cost;
2314   use->cost_map[i].depends_on = depends_on;
2315   use->cost_map[i].value = value;
2316 }
2317
2318 /* Gets cost of (USE, CANDIDATE) pair.  */
2319
2320 static struct cost_pair *
2321 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2322                  struct iv_cand *cand)
2323 {
2324   unsigned i, s;
2325   struct cost_pair *ret;
2326
2327   if (!cand)
2328     return NULL;
2329
2330   if (data->consider_all_candidates)
2331     {
2332       ret = use->cost_map + cand->id;
2333       if (!ret->cand)
2334         return NULL;
2335
2336       return ret;
2337     }
2338
2339   /* n_map_members is a power of two, so this computes modulo.  */
2340   s = cand->id & (use->n_map_members - 1);
2341   for (i = s; i < use->n_map_members; i++)
2342     if (use->cost_map[i].cand == cand)
2343       return use->cost_map + i;
2344
2345   for (i = 0; i < s; i++)
2346     if (use->cost_map[i].cand == cand)
2347       return use->cost_map + i;
2348
2349   return NULL;
2350 }
2351
2352 /* Returns estimate on cost of computing SEQ.  */
2353
2354 static unsigned
2355 seq_cost (rtx seq)
2356 {
2357   unsigned cost = 0;
2358   rtx set;
2359
2360   for (; seq; seq = NEXT_INSN (seq))
2361     {
2362       set = single_set (seq);
2363       if (set)
2364         cost += rtx_cost (set, SET);
2365       else
2366         cost++;
2367     }
2368
2369   return cost;
2370 }
2371
2372 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2373 static rtx
2374 produce_memory_decl_rtl (tree obj, int *regno)
2375 {
2376   rtx x;
2377
2378   gcc_assert (obj);
2379   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2380     {
2381       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2382       x = gen_rtx_SYMBOL_REF (Pmode, name);
2383     }
2384   else
2385     x = gen_raw_REG (Pmode, (*regno)++);
2386
2387   return gen_rtx_MEM (DECL_MODE (obj), x);
2388 }
2389
2390 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2391    walk_tree.  DATA contains the actual fake register number.  */
2392
2393 static tree
2394 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2395 {
2396   tree obj = NULL_TREE;
2397   rtx x = NULL_RTX;
2398   int *regno = data;
2399
2400   switch (TREE_CODE (*expr_p))
2401     {
2402     case ADDR_EXPR:
2403       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2404            handled_component_p (*expr_p);
2405            expr_p = &TREE_OPERAND (*expr_p, 0))
2406         continue;
2407       obj = *expr_p;
2408       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2409         x = produce_memory_decl_rtl (obj, regno);
2410       break;
2411
2412     case SSA_NAME:
2413       *ws = 0;
2414       obj = SSA_NAME_VAR (*expr_p);
2415       if (!DECL_RTL_SET_P (obj))
2416         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2417       break;
2418
2419     case VAR_DECL:
2420     case PARM_DECL:
2421     case RESULT_DECL:
2422       *ws = 0;
2423       obj = *expr_p;
2424
2425       if (DECL_RTL_SET_P (obj))
2426         break;
2427
2428       if (DECL_MODE (obj) == BLKmode)
2429         x = produce_memory_decl_rtl (obj, regno);
2430       else
2431         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2432
2433       break;
2434
2435     default:
2436       break;
2437     }
2438
2439   if (x)
2440     {
2441       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2442       SET_DECL_RTL (obj, x);
2443     }
2444
2445   return NULL_TREE;
2446 }
2447
2448 /* Determines cost of the computation of EXPR.  */
2449
2450 static unsigned
2451 computation_cost (tree expr)
2452 {
2453   rtx seq, rslt;
2454   tree type = TREE_TYPE (expr);
2455   unsigned cost;
2456   /* Avoid using hard regs in ways which may be unsupported.  */
2457   int regno = LAST_VIRTUAL_REGISTER + 1;
2458
2459   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2460   start_sequence ();
2461   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2462   seq = get_insns ();
2463   end_sequence ();
2464
2465   cost = seq_cost (seq);
2466   if (MEM_P (rslt))
2467     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2468
2469   return cost;
2470 }
2471
2472 /* Returns variable containing the value of candidate CAND at statement AT.  */
2473
2474 static tree
2475 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2476 {
2477   if (stmt_after_increment (loop, cand, stmt))
2478     return cand->var_after;
2479   else
2480     return cand->var_before;
2481 }
2482
2483 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2484    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2485
2486 int
2487 tree_int_cst_sign_bit (tree t)
2488 {
2489   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2490   unsigned HOST_WIDE_INT w;
2491
2492   if (bitno < HOST_BITS_PER_WIDE_INT)
2493     w = TREE_INT_CST_LOW (t);
2494   else
2495     {
2496       w = TREE_INT_CST_HIGH (t);
2497       bitno -= HOST_BITS_PER_WIDE_INT;
2498     }
2499
2500   return (w >> bitno) & 1;
2501 }
2502
2503 /* If we can prove that TOP = cst * BOT for some constant cst,
2504    store cst to MUL and return true.  Otherwise return false.
2505    The returned value is always sign-extended, regardless of the
2506    signedness of TOP and BOT.  */
2507
2508 static bool
2509 constant_multiple_of (tree top, tree bot, double_int *mul)
2510 {
2511   tree mby;
2512   enum tree_code code;
2513   double_int res, p0, p1;
2514   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2515
2516   STRIP_NOPS (top);
2517   STRIP_NOPS (bot);
2518
2519   if (operand_equal_p (top, bot, 0))
2520     {
2521       *mul = double_int_one;
2522       return true;
2523     }
2524
2525   code = TREE_CODE (top);
2526   switch (code)
2527     {
2528     case MULT_EXPR:
2529       mby = TREE_OPERAND (top, 1);
2530       if (TREE_CODE (mby) != INTEGER_CST)
2531         return false;
2532
2533       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2534         return false;
2535
2536       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2537                               precision);
2538       return true;
2539
2540     case PLUS_EXPR:
2541     case MINUS_EXPR:
2542       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2543           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2544         return false;
2545
2546       if (code == MINUS_EXPR)
2547         p1 = double_int_neg (p1);
2548       *mul = double_int_sext (double_int_add (p0, p1), precision);
2549       return true;
2550
2551     case INTEGER_CST:
2552       if (TREE_CODE (bot) != INTEGER_CST)
2553         return false;
2554
2555       p0 = double_int_sext (tree_to_double_int (top), precision);
2556       p1 = double_int_sext (tree_to_double_int (bot), precision);
2557       if (double_int_zero_p (p1))
2558         return false;
2559       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2560                               precision);
2561       return double_int_zero_p (res);
2562
2563     default:
2564       return false;
2565     }
2566 }
2567
2568 /* Folds EXPR using the affine expressions framework.  */
2569
2570 static tree
2571 fold_affine_expr (tree expr)
2572 {
2573   tree type = TREE_TYPE (expr);
2574   struct affine_tree_combination comb;
2575
2576   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2577     return expr;
2578
2579   tree_to_aff_combination (expr, type, &comb);
2580   return aff_combination_to_tree (&comb);
2581 }
2582
2583 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2584    same precision that is at least as wide as the precision of TYPE, stores
2585    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2586    type of A and B.  */
2587
2588 static tree
2589 determine_common_wider_type (tree *a, tree *b)
2590 {
2591   tree wider_type = NULL;
2592   tree suba, subb;
2593   tree atype = TREE_TYPE (*a);
2594
2595   if ((TREE_CODE (*a) == NOP_EXPR
2596        || TREE_CODE (*a) == CONVERT_EXPR))
2597     {
2598       suba = TREE_OPERAND (*a, 0);
2599       wider_type = TREE_TYPE (suba);
2600       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2601         return atype;
2602     }
2603   else
2604     return atype;
2605
2606   if ((TREE_CODE (*b) == NOP_EXPR
2607        || TREE_CODE (*b) == CONVERT_EXPR))
2608     {
2609       subb = TREE_OPERAND (*b, 0);
2610       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2611         return atype;
2612     }
2613   else
2614     return atype;
2615
2616   *a = suba;
2617   *b = subb;
2618   return wider_type;
2619 }
2620
2621 /* Determines the expression by that USE is expressed from induction variable
2622    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2623    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2624
2625 static bool
2626 get_computation_aff (struct loop *loop,
2627                      struct iv_use *use, struct iv_cand *cand, tree at,
2628                      struct affine_tree_combination *aff)
2629 {
2630   tree ubase = use->iv->base;
2631   tree ustep = use->iv->step;
2632   tree cbase = cand->iv->base;
2633   tree cstep = cand->iv->step, cstep_common;
2634   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2635   tree common_type, var;
2636   tree uutype;
2637   aff_tree cbase_aff, var_aff;
2638   double_int rat;
2639
2640   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2641     {
2642       /* We do not have a precision to express the values of use.  */
2643       return false;
2644     }
2645
2646   var = var_at_stmt (loop, cand, at);
2647   uutype = unsigned_type_for (utype);
2648
2649   /* If the conversion is not noop, perform it.  */
2650   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2651     {
2652       cstep = fold_convert (uutype, cstep);
2653       cbase = fold_convert (uutype, cbase);
2654       var = fold_convert (uutype, var);
2655     }
2656
2657   if (!constant_multiple_of (ustep, cstep, &rat))
2658     return false;
2659
2660   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2661      type, we achieve better folding by computing their difference in this
2662      wider type, and cast the result to UUTYPE.  We do not need to worry about
2663      overflows, as all the arithmetics will in the end be performed in UUTYPE
2664      anyway.  */
2665   common_type = determine_common_wider_type (&ubase, &cbase);
2666
2667   /* use = ubase - ratio * cbase + ratio * var.  */
2668   tree_to_aff_combination (ubase, common_type, aff);
2669   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2670   tree_to_aff_combination (var, uutype, &var_aff);
2671
2672   /* We need to shift the value if we are after the increment.  */
2673   if (stmt_after_increment (loop, cand, at))
2674     {
2675       aff_tree cstep_aff;
2676
2677       if (common_type != uutype)
2678         cstep_common = fold_convert (common_type, cstep);
2679       else
2680         cstep_common = cstep;
2681
2682       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2683       aff_combination_add (&cbase_aff, &cstep_aff);
2684     }
2685
2686   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2687   aff_combination_add (aff, &cbase_aff);
2688   if (common_type != uutype)
2689     aff_combination_convert (aff, uutype);
2690
2691   aff_combination_scale (&var_aff, rat);
2692   aff_combination_add (aff, &var_aff);
2693
2694   return true;
2695 }
2696
2697 /* Determines the expression by that USE is expressed from induction variable
2698    CAND at statement AT in LOOP.  The computation is unshared.  */
2699
2700 static tree
2701 get_computation_at (struct loop *loop,
2702                     struct iv_use *use, struct iv_cand *cand, tree at)
2703 {
2704   aff_tree aff;
2705   tree type = TREE_TYPE (use->iv->base);
2706
2707   if (!get_computation_aff (loop, use, cand, at, &aff))
2708     return NULL_TREE;
2709   unshare_aff_combination (&aff);
2710   return fold_convert (type, aff_combination_to_tree (&aff));
2711 }
2712
2713 /* Determines the expression by that USE is expressed from induction variable
2714    CAND in LOOP.  The computation is unshared.  */
2715
2716 static tree
2717 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2718 {
2719   return get_computation_at (loop, use, cand, use->stmt);
2720 }
2721
2722 /* Returns cost of addition in MODE.  */
2723
2724 static unsigned
2725 add_cost (enum machine_mode mode)
2726 {
2727   static unsigned costs[NUM_MACHINE_MODES];
2728   rtx seq;
2729   unsigned cost;
2730
2731   if (costs[mode])
2732     return costs[mode];
2733
2734   start_sequence ();
2735   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2736                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2737                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2738                  NULL_RTX);
2739   seq = get_insns ();
2740   end_sequence ();
2741
2742   cost = seq_cost (seq);
2743   if (!cost)
2744     cost = 1;
2745
2746   costs[mode] = cost;
2747
2748   if (dump_file && (dump_flags & TDF_DETAILS))
2749     fprintf (dump_file, "Addition in %s costs %d\n",
2750              GET_MODE_NAME (mode), cost);
2751   return cost;
2752 }
2753
2754 /* Entry in a hashtable of already known costs for multiplication.  */
2755 struct mbc_entry
2756 {
2757   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2758   enum machine_mode mode;       /* In mode.  */
2759   unsigned cost;                /* The cost.  */
2760 };
2761
2762 /* Counts hash value for the ENTRY.  */
2763
2764 static hashval_t
2765 mbc_entry_hash (const void *entry)
2766 {
2767   const struct mbc_entry *e = entry;
2768
2769   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2770 }
2771
2772 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2773
2774 static int
2775 mbc_entry_eq (const void *entry1, const void *entry2)
2776 {
2777   const struct mbc_entry *e1 = entry1;
2778   const struct mbc_entry *e2 = entry2;
2779
2780   return (e1->mode == e2->mode
2781           && e1->cst == e2->cst);
2782 }
2783
2784 /* Returns cost of multiplication by constant CST in MODE.  */
2785
2786 unsigned
2787 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2788 {
2789   static htab_t costs;
2790   struct mbc_entry **cached, act;
2791   rtx seq;
2792   unsigned cost;
2793
2794   if (!costs)
2795     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2796
2797   act.mode = mode;
2798   act.cst = cst;
2799   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2800   if (*cached)
2801     return (*cached)->cost;
2802
2803   *cached = XNEW (struct mbc_entry);
2804   (*cached)->mode = mode;
2805   (*cached)->cst = cst;
2806
2807   start_sequence ();
2808   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2809                gen_int_mode (cst, mode), NULL_RTX, 0);
2810   seq = get_insns ();
2811   end_sequence ();
2812
2813   cost = seq_cost (seq);
2814
2815   if (dump_file && (dump_flags & TDF_DETAILS))
2816     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2817              (int) cst, GET_MODE_NAME (mode), cost);
2818
2819   (*cached)->cost = cost;
2820
2821   return cost;
2822 }
2823
2824 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2825    validity for a memory reference accessing memory of mode MODE.  */
2826
2827 bool
2828 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2829 {
2830 #define MAX_RATIO 128
2831   static sbitmap valid_mult[MAX_MACHINE_MODE];
2832
2833   if (!valid_mult[mode])
2834     {
2835       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2836       rtx addr;
2837       HOST_WIDE_INT i;
2838
2839       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2840       sbitmap_zero (valid_mult[mode]);
2841       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2842       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2843         {
2844           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2845           if (memory_address_p (mode, addr))
2846             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2847         }
2848
2849       if (dump_file && (dump_flags & TDF_DETAILS))
2850         {
2851           fprintf (dump_file, "  allowed multipliers:");
2852           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2853             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2854               fprintf (dump_file, " %d", (int) i);
2855           fprintf (dump_file, "\n");
2856           fprintf (dump_file, "\n");
2857         }
2858     }
2859
2860   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2861     return false;
2862
2863   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2864 }
2865
2866 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2867    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2868    variable is omitted.  Compute the cost for a memory reference that accesses
2869    a memory location of mode MEM_MODE.
2870
2871    TODO -- there must be some better way.  This all is quite crude.  */
2872
2873 static unsigned
2874 get_address_cost (bool symbol_present, bool var_present,
2875                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2876                   enum machine_mode mem_mode)
2877 {
2878   static bool initialized[MAX_MACHINE_MODE];
2879   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2880   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2881   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2882   unsigned cost, acost;
2883   bool offset_p, ratio_p;
2884   HOST_WIDE_INT s_offset;
2885   unsigned HOST_WIDE_INT mask;
2886   unsigned bits;
2887
2888   if (!initialized[mem_mode])
2889     {
2890       HOST_WIDE_INT i;
2891       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2892       int old_cse_not_expected;
2893       unsigned sym_p, var_p, off_p, rat_p, add_c;
2894       rtx seq, addr, base;
2895       rtx reg0, reg1;
2896
2897       initialized[mem_mode] = true;
2898
2899       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2900
2901       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2902       for (i = start; i <= 1 << 20; i <<= 1)
2903         {
2904           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2905           if (!memory_address_p (mem_mode, addr))
2906             break;
2907         }
2908       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2909       off[mem_mode] = max_offset[mem_mode];
2910
2911       for (i = start; i <= 1 << 20; i <<= 1)
2912         {
2913           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2914           if (!memory_address_p (mem_mode, addr))
2915             break;
2916         }
2917       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2918
2919       if (dump_file && (dump_flags & TDF_DETAILS))
2920         {
2921           fprintf (dump_file, "get_address_cost:\n");
2922           fprintf (dump_file, "  min offset %s %d\n",
2923                    GET_MODE_NAME (mem_mode),
2924                    (int) min_offset[mem_mode]);
2925           fprintf (dump_file, "  max offset %s %d\n",
2926                    GET_MODE_NAME (mem_mode),
2927                    (int) max_offset[mem_mode]);
2928         }
2929
2930       rat[mem_mode] = 1;
2931       for (i = 2; i <= MAX_RATIO; i++)
2932         if (multiplier_allowed_in_address_p (i, mem_mode))
2933           {
2934             rat[mem_mode] = i;
2935             break;
2936           }
2937
2938       /* Compute the cost of various addressing modes.  */
2939       acost = 0;
2940       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2941       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2942
2943       for (i = 0; i < 16; i++)
2944         {
2945           sym_p = i & 1;
2946           var_p = (i >> 1) & 1;
2947           off_p = (i >> 2) & 1;
2948           rat_p = (i >> 3) & 1;
2949
2950           addr = reg0;
2951           if (rat_p)
2952             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2953                                    gen_int_mode (rat[mem_mode], Pmode));
2954
2955           if (var_p)
2956             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2957
2958           if (sym_p)
2959             {
2960               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2961               if (off_p)
2962                 base = gen_rtx_fmt_e (CONST, Pmode,
2963                                       gen_rtx_fmt_ee (PLUS, Pmode,
2964                                                       base,
2965                                                       gen_int_mode (off[mem_mode],
2966                                                                     Pmode)));
2967             }
2968           else if (off_p)
2969             base = gen_int_mode (off[mem_mode], Pmode);
2970           else
2971             base = NULL_RTX;
2972
2973           if (base)
2974             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2975
2976           start_sequence ();
2977           /* To avoid splitting addressing modes, pretend that no cse will
2978              follow.  */
2979           old_cse_not_expected = cse_not_expected;
2980           cse_not_expected = true;
2981           addr = memory_address (mem_mode, addr);
2982           cse_not_expected = old_cse_not_expected;
2983           seq = get_insns ();
2984           end_sequence ();
2985
2986           acost = seq_cost (seq);
2987           acost += address_cost (addr, mem_mode);
2988
2989           if (!acost)
2990             acost = 1;
2991           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2992         }
2993
2994       /* On some targets, it is quite expensive to load symbol to a register,
2995          which makes addresses that contain symbols look much more expensive.
2996          However, the symbol will have to be loaded in any case before the
2997          loop (and quite likely we have it in register already), so it does not
2998          make much sense to penalize them too heavily.  So make some final
2999          tweaks for the SYMBOL_PRESENT modes:
3000
3001          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3002          var is cheaper, use this mode with small penalty.
3003          If VAR_PRESENT is true, try whether the mode with
3004          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3005          if this is the case, use it.  */
3006       add_c = add_cost (Pmode);
3007       for (i = 0; i < 8; i++)
3008         {
3009           var_p = i & 1;
3010           off_p = (i >> 1) & 1;
3011           rat_p = (i >> 2) & 1;
3012
3013           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3014           if (var_p)
3015             acost += add_c;
3016
3017           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3018             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3019         }
3020
3021       if (dump_file && (dump_flags & TDF_DETAILS))
3022         {
3023           fprintf (dump_file, "Address costs:\n");
3024
3025           for (i = 0; i < 16; i++)
3026             {
3027               sym_p = i & 1;
3028               var_p = (i >> 1) & 1;
3029               off_p = (i >> 2) & 1;
3030               rat_p = (i >> 3) & 1;
3031
3032               fprintf (dump_file, "  ");
3033               if (sym_p)
3034                 fprintf (dump_file, "sym + ");
3035               if (var_p)
3036                 fprintf (dump_file, "var + ");
3037               if (off_p)
3038                 fprintf (dump_file, "cst + ");
3039               if (rat_p)
3040                 fprintf (dump_file, "rat * ");
3041
3042               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3043               fprintf (dump_file, "index costs %d\n", acost);
3044             }
3045           fprintf (dump_file, "\n");
3046         }
3047     }
3048
3049   bits = GET_MODE_BITSIZE (Pmode);
3050   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3051   offset &= mask;
3052   if ((offset >> (bits - 1) & 1))
3053     offset |= ~mask;
3054   s_offset = offset;
3055
3056   cost = 0;
3057   offset_p = (s_offset != 0
3058               && min_offset[mem_mode] <= s_offset
3059               && s_offset <= max_offset[mem_mode]);
3060   ratio_p = (ratio != 1
3061              && multiplier_allowed_in_address_p (ratio, mem_mode));
3062
3063   if (ratio != 1 && !ratio_p)
3064     cost += multiply_by_cost (ratio, Pmode);
3065
3066   if (s_offset && !offset_p && !symbol_present)
3067     cost += add_cost (Pmode);
3068
3069   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3070   return cost + acost;
3071 }
3072
3073 /* Estimates cost of forcing expression EXPR into a variable.  */
3074
3075 unsigned
3076 force_expr_to_var_cost (tree expr)
3077 {
3078   static bool costs_initialized = false;
3079   static unsigned integer_cost;
3080   static unsigned symbol_cost;
3081   static unsigned address_cost;
3082   tree op0, op1;
3083   unsigned cost0, cost1, cost;
3084   enum machine_mode mode;
3085
3086   if (!costs_initialized)
3087     {
3088       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3089       rtx x = gen_rtx_MEM (DECL_MODE (var),
3090                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3091       tree addr;
3092       tree type = build_pointer_type (integer_type_node);
3093
3094       integer_cost = computation_cost (build_int_cst (integer_type_node,
3095                                                       2000));
3096
3097       SET_DECL_RTL (var, x);
3098       TREE_STATIC (var) = 1;
3099       addr = build1 (ADDR_EXPR, type, var);
3100       symbol_cost = computation_cost (addr) + 1;
3101
3102       address_cost
3103         = computation_cost (build2 (PLUS_EXPR, type,
3104                                     addr,
3105                                     build_int_cst (type, 2000))) + 1;
3106       if (dump_file && (dump_flags & TDF_DETAILS))
3107         {
3108           fprintf (dump_file, "force_expr_to_var_cost:\n");
3109           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3110           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3111           fprintf (dump_file, "  address %d\n", (int) address_cost);
3112           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3113           fprintf (dump_file, "\n");
3114         }
3115
3116       costs_initialized = true;
3117     }
3118
3119   STRIP_NOPS (expr);
3120
3121   if (SSA_VAR_P (expr))
3122     return 0;
3123
3124   if (TREE_INVARIANT (expr))
3125     {
3126       if (TREE_CODE (expr) == INTEGER_CST)
3127         return integer_cost;
3128
3129       if (TREE_CODE (expr) == ADDR_EXPR)
3130         {
3131           tree obj = TREE_OPERAND (expr, 0);
3132
3133           if (TREE_CODE (obj) == VAR_DECL
3134               || TREE_CODE (obj) == PARM_DECL
3135               || TREE_CODE (obj) == RESULT_DECL)
3136             return symbol_cost;
3137         }
3138
3139       return address_cost;
3140     }
3141
3142   switch (TREE_CODE (expr))
3143     {
3144     case PLUS_EXPR:
3145     case MINUS_EXPR:
3146     case MULT_EXPR:
3147       op0 = TREE_OPERAND (expr, 0);
3148       op1 = TREE_OPERAND (expr, 1);
3149       STRIP_NOPS (op0);
3150       STRIP_NOPS (op1);
3151
3152       if (is_gimple_val (op0))
3153         cost0 = 0;
3154       else
3155         cost0 = force_expr_to_var_cost (op0);
3156
3157       if (is_gimple_val (op1))
3158         cost1 = 0;
3159       else
3160         cost1 = force_expr_to_var_cost (op1);
3161
3162       break;
3163
3164     default:
3165       /* Just an arbitrary value, FIXME.  */
3166       return target_spill_cost;
3167     }
3168
3169   mode = TYPE_MODE (TREE_TYPE (expr));
3170   switch (TREE_CODE (expr))
3171     {
3172     case PLUS_EXPR:
3173     case MINUS_EXPR:
3174       cost = add_cost (mode);
3175       break;
3176
3177     case MULT_EXPR:
3178       if (cst_and_fits_in_hwi (op0))
3179         cost = multiply_by_cost (int_cst_value (op0), mode);
3180       else if (cst_and_fits_in_hwi (op1))
3181         cost = multiply_by_cost (int_cst_value (op1), mode);
3182       else
3183         return target_spill_cost;
3184       break;
3185
3186     default:
3187       gcc_unreachable ();
3188     }
3189
3190   cost += cost0;
3191   cost += cost1;
3192
3193   /* Bound the cost by target_spill_cost.  The parts of complicated
3194      computations often are either loop invariant or at least can
3195      be shared between several iv uses, so letting this grow without
3196      limits would not give reasonable results.  */
3197   return cost < target_spill_cost ? cost : target_spill_cost;
3198 }
3199
3200 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3201    invariants the computation depends on.  */
3202
3203 static unsigned
3204 force_var_cost (struct ivopts_data *data,
3205                 tree expr, bitmap *depends_on)
3206 {
3207   if (depends_on)
3208     {
3209       fd_ivopts_data = data;
3210       walk_tree (&expr, find_depends, depends_on, NULL);
3211     }
3212
3213   return force_expr_to_var_cost (expr);
3214 }
3215
3216 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3217    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3218    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3219    invariants the computation depends on.  */
3220
3221 static unsigned
3222 split_address_cost (struct ivopts_data *data,
3223                     tree addr, bool *symbol_present, bool *var_present,
3224                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3225 {
3226   tree core;
3227   HOST_WIDE_INT bitsize;
3228   HOST_WIDE_INT bitpos;
3229   tree toffset;
3230   enum machine_mode mode;
3231   int unsignedp, volatilep;
3232
3233   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3234                               &unsignedp, &volatilep, false);
3235
3236   if (toffset != 0
3237       || bitpos % BITS_PER_UNIT != 0
3238       || TREE_CODE (core) != VAR_DECL)
3239     {
3240       *symbol_present = false;
3241       *var_present = true;
3242       fd_ivopts_data = data;
3243       walk_tree (&addr, find_depends, depends_on, NULL);
3244       return target_spill_cost;
3245     }
3246
3247   *offset += bitpos / BITS_PER_UNIT;
3248   if (TREE_STATIC (core)
3249       || DECL_EXTERNAL (core))
3250     {
3251       *symbol_present = true;
3252       *var_present = false;
3253       return 0;
3254     }
3255
3256   *symbol_present = false;
3257   *var_present = true;
3258   return 0;
3259 }
3260
3261 /* Estimates cost of expressing difference of addresses E1 - E2 as
3262    var + symbol + offset.  The value of offset is added to OFFSET,
3263    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3264    part is missing.  DEPENDS_ON is a set of the invariants the computation
3265    depends on.  */
3266
3267 static unsigned
3268 ptr_difference_cost (struct ivopts_data *data,
3269                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3270                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3271 {
3272   HOST_WIDE_INT diff = 0;
3273   unsigned cost;
3274
3275   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3276
3277   if (ptr_difference_const (e1, e2, &diff))
3278     {
3279       *offset += diff;
3280       *symbol_present = false;
3281       *var_present = false;
3282       return 0;
3283     }
3284
3285   if (e2 == integer_zero_node)
3286     return split_address_cost (data, TREE_OPERAND (e1, 0),
3287                                symbol_present, var_present, offset, depends_on);
3288
3289   *symbol_present = false;
3290   *var_present = true;
3291
3292   cost = force_var_cost (data, e1, depends_on);
3293   cost += force_var_cost (data, e2, depends_on);
3294   cost += add_cost (Pmode);
3295
3296   return cost;
3297 }
3298
3299 /* Estimates cost of expressing difference E1 - E2 as
3300    var + symbol + offset.  The value of offset is added to OFFSET,
3301    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3302    part is missing.  DEPENDS_ON is a set of the invariants the computation
3303    depends on.  */
3304
3305 static unsigned
3306 difference_cost (struct ivopts_data *data,
3307                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3308                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3309 {
3310   unsigned cost;
3311   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3312   unsigned HOST_WIDE_INT off1, off2;
3313
3314   e1 = strip_offset (e1, &off1);
3315   e2 = strip_offset (e2, &off2);
3316   *offset += off1 - off2;
3317
3318   STRIP_NOPS (e1);
3319   STRIP_NOPS (e2);
3320
3321   if (TREE_CODE (e1) == ADDR_EXPR)
3322     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3323                                 depends_on);
3324   *symbol_present = false;
3325
3326   if (operand_equal_p (e1, e2, 0))
3327     {
3328       *var_present = false;
3329       return 0;
3330     }
3331   *var_present = true;
3332   if (integer_zerop (e2))
3333     return force_var_cost (data, e1, depends_on);
3334
3335   if (integer_zerop (e1))
3336     {
3337       cost = force_var_cost (data, e2, depends_on);
3338       cost += multiply_by_cost (-1, mode);
3339
3340       return cost;
3341     }
3342
3343   cost = force_var_cost (data, e1, depends_on);
3344   cost += force_var_cost (data, e2, depends_on);
3345   cost += add_cost (mode);
3346
3347   return cost;
3348 }
3349
3350 /* Determines the cost of the computation by that USE is expressed
3351    from induction variable CAND.  If ADDRESS_P is true, we just need
3352    to create an address from it, otherwise we want to get it into
3353    register.  A set of invariants we depend on is stored in
3354    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3355
3356 static unsigned
3357 get_computation_cost_at (struct ivopts_data *data,
3358                          struct iv_use *use, struct iv_cand *cand,
3359                          bool address_p, bitmap *depends_on, tree at)
3360 {
3361   tree ubase = use->iv->base, ustep = use->iv->step;
3362   tree cbase, cstep;
3363   tree utype = TREE_TYPE (ubase), ctype;
3364   unsigned HOST_WIDE_INT cstepi, offset = 0;
3365   HOST_WIDE_INT ratio, aratio;
3366   bool var_present, symbol_present;
3367   unsigned cost = 0, n_sums;
3368   double_int rat;
3369
3370   *depends_on = NULL;
3371
3372   /* Only consider real candidates.  */
3373   if (!cand->iv)
3374     return INFTY;
3375
3376   cbase = cand->iv->base;
3377   cstep = cand->iv->step;
3378   ctype = TREE_TYPE (cbase);
3379
3380   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3381     {
3382       /* We do not have a precision to express the values of use.  */
3383       return INFTY;
3384     }
3385
3386   if (address_p)
3387     {
3388       /* Do not try to express address of an object with computation based
3389          on address of a different object.  This may cause problems in rtl
3390          level alias analysis (that does not expect this to be happening,
3391          as this is illegal in C), and would be unlikely to be useful
3392          anyway.  */
3393       if (use->iv->base_object
3394           && cand->iv->base_object
3395           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3396         return INFTY;
3397     }
3398
3399   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3400     {
3401       /* TODO -- add direct handling of this case.  */
3402       goto fallback;
3403     }
3404
3405   /* CSTEPI is removed from the offset in case statement is after the
3406      increment.  If the step is not constant, we use zero instead.
3407      This is a bit imprecise (there is the extra addition), but
3408      redundancy elimination is likely to transform the code so that
3409      it uses value of the variable before increment anyway,
3410      so it is not that much unrealistic.  */
3411   if (cst_and_fits_in_hwi (cstep))
3412     cstepi = int_cst_value (cstep);
3413   else
3414     cstepi = 0;
3415
3416   if (!constant_multiple_of (ustep, cstep, &rat))
3417     return INFTY;
3418
3419   if (double_int_fits_in_shwi_p (rat))
3420     ratio = double_int_to_shwi (rat);
3421   else
3422     return INFTY;
3423
3424   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3425      or ratio == 1, it is better to handle this like
3426
3427      ubase - ratio * cbase + ratio * var
3428
3429      (also holds in the case ratio == -1, TODO.  */
3430
3431   if (cst_and_fits_in_hwi (cbase))
3432     {
3433       offset = - ratio * int_cst_value (cbase);
3434       cost += difference_cost (data,
3435                                ubase, integer_zero_node,
3436                                &symbol_present, &var_present, &offset,
3437                                depends_on);
3438     }
3439   else if (ratio == 1)
3440     {
3441       cost += difference_cost (data,
3442                                ubase, cbase,
3443                                &symbol_present, &var_present, &offset,
3444                                depends_on);
3445     }
3446   else
3447     {
3448       cost += force_var_cost (data, cbase, depends_on);
3449       cost += add_cost (TYPE_MODE (ctype));
3450       cost += difference_cost (data,
3451                                ubase, integer_zero_node,
3452                                &symbol_present, &var_present, &offset,
3453                                depends_on);
3454     }
3455
3456   /* If we are after the increment, the value of the candidate is higher by
3457      one iteration.  */
3458   if (stmt_after_increment (data->current_loop, cand, at))
3459     offset -= ratio * cstepi;
3460
3461   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3462      (symbol/var/const parts may be omitted).  If we are looking for an address,
3463      find the cost of addressing this.  */
3464   if (address_p)
3465     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3466                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3467
3468   /* Otherwise estimate the costs for computing the expression.  */
3469   aratio = ratio > 0 ? ratio : -ratio;
3470   if (!symbol_present && !var_present && !offset)
3471     {
3472       if (ratio != 1)
3473         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3474
3475       return cost;
3476     }
3477
3478   if (aratio != 1)
3479     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3480
3481   n_sums = 1;
3482   if (var_present
3483       /* Symbol + offset should be compile-time computable.  */
3484       && (symbol_present || offset))
3485     n_sums++;
3486
3487   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3488
3489 fallback:
3490   {
3491     /* Just get the expression, expand it and measure the cost.  */
3492     tree comp = get_computation_at (data->current_loop, use, cand, at);
3493
3494     if (!comp)
3495       return INFTY;
3496
3497     if (address_p)
3498       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3499
3500     return computation_cost (comp);
3501   }
3502 }
3503
3504 /* Determines the cost of the computation by that USE is expressed
3505    from induction variable CAND.  If ADDRESS_P is true, we just need
3506    to create an address from it, otherwise we want to get it into
3507    register.  A set of invariants we depend on is stored in
3508    DEPENDS_ON.  */
3509
3510 static unsigned
3511 get_computation_cost (struct ivopts_data *data,
3512                       struct iv_use *use, struct iv_cand *cand,
3513                       bool address_p, bitmap *depends_on)
3514 {
3515   return get_computation_cost_at (data,
3516                                   use, cand, address_p, depends_on, use->stmt);
3517 }
3518
3519 /* Determines cost of basing replacement of USE on CAND in a generic
3520    expression.  */
3521
3522 static bool
3523 determine_use_iv_cost_generic (struct ivopts_data *data,
3524                                struct iv_use *use, struct iv_cand *cand)
3525 {
3526   bitmap depends_on;
3527   unsigned cost;
3528
3529   /* The simple case first -- if we need to express value of the preserved
3530      original biv, the cost is 0.  This also prevents us from counting the
3531      cost of increment twice -- once at this use and once in the cost of
3532      the candidate.  */
3533   if (cand->pos == IP_ORIGINAL
3534       && cand->incremented_at == use->stmt)
3535     {
3536       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3537       return true;
3538     }
3539
3540   cost = get_computation_cost (data, use, cand, false, &depends_on);
3541   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3542
3543   return cost != INFTY;
3544 }
3545
3546 /* Determines cost of basing replacement of USE on CAND in an address.  */
3547
3548 static bool
3549 determine_use_iv_cost_address (struct ivopts_data *data,
3550                                struct iv_use *use, struct iv_cand *cand)
3551 {
3552   bitmap depends_on;
3553   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3554
3555   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3556
3557   return cost != INFTY;
3558 }
3559
3560 /* Computes value of induction variable IV in iteration NITER.  */
3561
3562 static tree
3563 iv_value (struct iv *iv, tree niter)
3564 {
3565   tree val;
3566   tree type = TREE_TYPE (iv->base);
3567
3568   niter = fold_convert (type, niter);
3569   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3570
3571   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3572 }
3573
3574 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3575
3576 static tree
3577 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3578 {
3579   tree val = iv_value (cand->iv, niter);
3580   tree type = TREE_TYPE (cand->iv->base);
3581
3582   if (stmt_after_increment (loop, cand, at))
3583     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3584
3585   return val;
3586 }
3587
3588 /* Returns period of induction variable iv.  */
3589
3590 static tree
3591 iv_period (struct iv *iv)
3592 {
3593   tree step = iv->step, period, type;
3594   tree pow2div;
3595
3596   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3597
3598   /* Period of the iv is gcd (step, type range).  Since type range is power
3599      of two, it suffices to determine the maximum power of two that divides
3600      step.  */
3601   pow2div = num_ending_zeros (step);
3602   type = unsigned_type_for (TREE_TYPE (step));
3603
3604   period = build_low_bits_mask (type,
3605                                 (TYPE_PRECISION (type)
3606                                  - tree_low_cst (pow2div, 1)));
3607
3608   return period;
3609 }
3610
3611 /* Returns the comparison operator used when eliminating the iv USE.  */
3612
3613 static enum tree_code
3614 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3615 {
3616   struct loop *loop = data->current_loop;
3617   basic_block ex_bb;
3618   edge exit;
3619
3620   ex_bb = bb_for_stmt (use->stmt);
3621   exit = EDGE_SUCC (ex_bb, 0);
3622   if (flow_bb_inside_loop_p (loop, exit->dest))
3623     exit = EDGE_SUCC (ex_bb, 1);
3624
3625   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3626 }
3627
3628 /* Check whether it is possible to express the condition in USE by comparison
3629    of candidate CAND.  If so, store the value compared with to BOUND.  */
3630
3631 static bool
3632 may_eliminate_iv (struct ivopts_data *data,
3633                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3634 {
3635   basic_block ex_bb;
3636   edge exit;
3637   tree nit, nit_type;
3638   tree wider_type, period, per_type;
3639   struct loop *loop = data->current_loop;
3640
3641   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3642     return false;
3643
3644   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3645      for other conditions inside loop body.  */
3646   ex_bb = bb_for_stmt (use->stmt);
3647   if (use->stmt != last_stmt (ex_bb)
3648       || TREE_CODE (use->stmt) != COND_EXPR)
3649     return false;
3650   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3651     return false;
3652
3653   exit = EDGE_SUCC (ex_bb, 0);
3654   if (flow_bb_inside_loop_p (loop, exit->dest))
3655     exit = EDGE_SUCC (ex_bb, 1);
3656   if (flow_bb_inside_loop_p (loop, exit->dest))
3657     return false;
3658
3659   nit = niter_for_exit (data, exit);
3660   if (!nit)
3661     return false;
3662
3663   nit_type = TREE_TYPE (nit);
3664
3665   /* Determine whether we may use the variable to test whether niter iterations
3666      elapsed.  This is the case iff the period of the induction variable is
3667      greater than the number of iterations.  */
3668   period = iv_period (cand->iv);
3669   if (!period)
3670     return false;
3671   per_type = TREE_TYPE (period);
3672
3673   wider_type = TREE_TYPE (period);
3674   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3675     wider_type = per_type;
3676   else
3677     wider_type = nit_type;
3678
3679   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3680                                       fold_convert (wider_type, period),
3681                                       fold_convert (wider_type, nit))))
3682     return false;
3683
3684   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
3685   return true;
3686 }
3687
3688 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3689
3690 static bool
3691 determine_use_iv_cost_condition (struct ivopts_data *data,
3692                                  struct iv_use *use, struct iv_cand *cand)
3693 {
3694   tree bound = NULL_TREE, op, cond;
3695   bitmap depends_on = NULL;
3696   unsigned cost;
3697
3698   /* Only consider real candidates.  */
3699   if (!cand->iv)
3700     {
3701       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3702       return false;
3703     }
3704
3705   if (may_eliminate_iv (data, use, cand, &bound))
3706     {
3707       cost = force_var_cost (data, bound, &depends_on);
3708
3709       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3710       return cost != INFTY;
3711     }
3712
3713   /* The induction variable elimination failed; just express the original
3714      giv.  If it is compared with an invariant, note that we cannot get
3715      rid of it.  */
3716   cost = get_computation_cost (data, use, cand, false, &depends_on);
3717
3718   cond = *use->op_p;
3719   if (TREE_CODE (cond) != SSA_NAME)
3720     {
3721       op = TREE_OPERAND (cond, 0);
3722       if (TREE_CODE (op) == SSA_NAME
3723           && !integer_zerop (get_iv (data, op)->step))
3724         op = TREE_OPERAND (cond, 1);
3725       if (TREE_CODE (op) == SSA_NAME)
3726         {
3727           op = get_iv (data, op)->base;
3728           fd_ivopts_data = data;
3729           walk_tree (&op, find_depends, &depends_on, NULL);
3730         }
3731     }
3732
3733   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3734   return cost != INFTY;
3735 }
3736
3737 /* Determines cost of basing replacement of USE on CAND.  Returns false
3738    if USE cannot be based on CAND.  */
3739
3740 static bool
3741 determine_use_iv_cost (struct ivopts_data *data,
3742                        struct iv_use *use, struct iv_cand *cand)
3743 {
3744   switch (use->type)
3745     {
3746     case USE_NONLINEAR_EXPR:
3747       return determine_use_iv_cost_generic (data, use, cand);
3748
3749     case USE_ADDRESS:
3750       return determine_use_iv_cost_address (data, use, cand);
3751
3752     case USE_COMPARE:
3753       return determine_use_iv_cost_condition (data, use, cand);
3754
3755     default:
3756       gcc_unreachable ();
3757     }
3758 }
3759
3760 /* Determines costs of basing the use of the iv on an iv candidate.  */
3761
3762 static void
3763 determine_use_iv_costs (struct ivopts_data *data)
3764 {
3765   unsigned i, j;
3766   struct iv_use *use;
3767   struct iv_cand *cand;
3768   bitmap to_clear = BITMAP_ALLOC (NULL);
3769
3770   alloc_use_cost_map (data);
3771
3772   for (i = 0; i < n_iv_uses (data); i++)
3773     {
3774       use = iv_use (data, i);
3775
3776       if (data->consider_all_candidates)
3777         {
3778           for (j = 0; j < n_iv_cands (data); j++)
3779             {
3780               cand = iv_cand (data, j);
3781               determine_use_iv_cost (data, use, cand);
3782             }
3783         }
3784       else
3785         {
3786           bitmap_iterator bi;
3787
3788           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3789             {
3790               cand = iv_cand (data, j);
3791               if (!determine_use_iv_cost (data, use, cand))
3792                 bitmap_set_bit (to_clear, j);
3793             }
3794
3795           /* Remove the candidates for that the cost is infinite from
3796              the list of related candidates.  */
3797           bitmap_and_compl_into (use->related_cands, to_clear);
3798           bitmap_clear (to_clear);
3799         }
3800     }
3801
3802   BITMAP_FREE (to_clear);
3803
3804   if (dump_file && (dump_flags & TDF_DETAILS))
3805     {
3806       fprintf (dump_file, "Use-candidate costs:\n");
3807
3808       for (i = 0; i < n_iv_uses (data); i++)
3809         {
3810           use = iv_use (data, i);
3811
3812           fprintf (dump_file, "Use %d:\n", i);
3813           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3814           for (j = 0; j < use->n_map_members; j++)
3815             {
3816               if (!use->cost_map[j].cand
3817                   || use->cost_map[j].cost == INFTY)
3818                 continue;
3819
3820               fprintf (dump_file, "  %d\t%d\t",
3821                        use->cost_map[j].cand->id,
3822                        use->cost_map[j].cost);
3823               if (use->cost_map[j].depends_on)
3824                 bitmap_print (dump_file,
3825                               use->cost_map[j].depends_on, "","");
3826               fprintf (dump_file, "\n");
3827             }
3828
3829           fprintf (dump_file, "\n");
3830         }
3831       fprintf (dump_file, "\n");
3832     }
3833 }
3834
3835 /* Determines cost of the candidate CAND.  */
3836
3837 static void
3838 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3839 {
3840   unsigned cost_base, cost_step;
3841   tree base;
3842
3843   if (!cand->iv)
3844     {
3845       cand->cost = 0;
3846       return;
3847     }
3848
3849   /* There are two costs associated with the candidate -- its increment
3850      and its initialization.  The second is almost negligible for any loop
3851      that rolls enough, so we take it just very little into account.  */
3852
3853   base = cand->iv->base;
3854   cost_base = force_var_cost (data, base, NULL);
3855   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3856
3857   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3858
3859   /* Prefer the original iv unless we may gain something by replacing it;
3860      this is not really relevant for artificial ivs created by other
3861      passes.  */
3862   if (cand->pos == IP_ORIGINAL
3863       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3864     cand->cost--;
3865
3866   /* Prefer not to insert statements into latch unless there are some
3867      already (so that we do not create unnecessary jumps).  */
3868   if (cand->pos == IP_END
3869       && empty_block_p (ip_end_pos (data->current_loop)))
3870     cand->cost++;
3871 }
3872
3873 /* Determines costs of computation of the candidates.  */
3874
3875 static void
3876 determine_iv_costs (struct ivopts_data *data)
3877 {
3878   unsigned i;
3879
3880   if (dump_file && (dump_flags & TDF_DETAILS))
3881     {
3882       fprintf (dump_file, "Candidate costs:\n");
3883       fprintf (dump_file, "  cand\tcost\n");
3884     }
3885
3886   for (i = 0; i < n_iv_cands (data); i++)
3887     {
3888       struct iv_cand *cand = iv_cand (data, i);
3889
3890       determine_iv_cost (data, cand);
3891
3892       if (dump_file && (dump_flags & TDF_DETAILS))
3893         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3894     }
3895
3896   if (dump_file && (dump_flags & TDF_DETAILS))
3897     fprintf (dump_file, "\n");
3898 }
3899
3900 /* Calculates cost for having SIZE induction variables.  */
3901
3902 static unsigned
3903 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3904 {
3905   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3906 }
3907
3908 /* For each size of the induction variable set determine the penalty.  */
3909
3910 static void
3911 determine_set_costs (struct ivopts_data *data)
3912 {
3913   unsigned j, n;
3914   tree phi, op;
3915   struct loop *loop = data->current_loop;
3916   bitmap_iterator bi;
3917
3918   /* We use the following model (definitely improvable, especially the
3919      cost function -- TODO):
3920
3921      We estimate the number of registers available (using MD data), name it A.
3922
3923      We estimate the number of registers used by the loop, name it U.  This
3924      number is obtained as the number of loop phi nodes (not counting virtual
3925      registers and bivs) + the number of variables from outside of the loop.
3926
3927      We set a reserve R (free regs that are used for temporary computations,
3928      etc.).  For now the reserve is a constant 3.
3929
3930      Let I be the number of induction variables.
3931
3932      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3933         make a lot of ivs without a reason).
3934      -- if A - R < U + I <= A, the cost is I * PRES_COST
3935      -- if U + I > A, the cost is I * PRES_COST and
3936         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3937
3938   if (dump_file && (dump_flags & TDF_DETAILS))
3939     {
3940       fprintf (dump_file, "Global costs:\n");
3941       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3942       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3943       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3944       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3945     }
3946
3947   n = 0;
3948   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3949     {
3950       op = PHI_RESULT (phi);
3951
3952       if (!is_gimple_reg (op))
3953         continue;
3954
3955       if (get_iv (data, op))
3956         continue;
3957
3958       n++;
3959     }
3960
3961   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3962     {
3963       struct version_info *info = ver_info (data, j);
3964
3965       if (info->inv_id && info->has_nonlin_use)
3966         n++;
3967     }
3968
3969   data->regs_used = n;
3970   if (dump_file && (dump_flags & TDF_DETAILS))
3971     fprintf (dump_file, "  regs_used %d\n", n);
3972
3973   if (dump_file && (dump_flags & TDF_DETAILS))
3974     {
3975       fprintf (dump_file, "  cost for size:\n");
3976       fprintf (dump_file, "  ivs\tcost\n");
3977       for (j = 0; j <= 2 * target_avail_regs; j++)
3978         fprintf (dump_file, "  %d\t%d\n", j,
3979                  ivopts_global_cost_for_size (data, j));
3980       fprintf (dump_file, "\n");
3981     }
3982 }
3983
3984 /* Returns true if A is a cheaper cost pair than B.  */
3985
3986 static bool
3987 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3988 {
3989   if (!a)
3990     return false;
3991
3992   if (!b)
3993     return true;
3994
3995   if (a->cost < b->cost)
3996     return true;
3997
3998   if (a->cost > b->cost)
3999     return false;
4000
4001   /* In case the costs are the same, prefer the cheaper candidate.  */
4002   if (a->cand->cost < b->cand->cost)
4003     return true;
4004
4005   return false;
4006 }
4007
4008 /* Computes the cost field of IVS structure.  */
4009
4010 static void
4011 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4012 {
4013   unsigned cost = 0;
4014
4015   cost += ivs->cand_use_cost;
4016   cost += ivs->cand_cost;
4017   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4018
4019   ivs->cost = cost;
4020 }
4021
4022 /* Remove invariants in set INVS to set IVS.  */
4023
4024 static void
4025 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4026 {
4027   bitmap_iterator bi;
4028   unsigned iid;
4029
4030   if (!invs)
4031     return;
4032
4033   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4034     {
4035       ivs->n_invariant_uses[iid]--;
4036       if (ivs->n_invariant_uses[iid] == 0)
4037         ivs->n_regs--;
4038     }
4039 }
4040
4041 /* Set USE not to be expressed by any candidate in IVS.  */
4042
4043 static void
4044 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4045                  struct iv_use *use)
4046 {
4047   unsigned uid = use->id, cid;
4048   struct cost_pair *cp;
4049
4050   cp = ivs->cand_for_use[uid];
4051   if (!cp)
4052     return;
4053   cid = cp->cand->id;
4054
4055   ivs->bad_uses++;
4056   ivs->cand_for_use[uid] = NULL;
4057   ivs->n_cand_uses[cid]--;
4058
4059   if (ivs->n_cand_uses[cid] == 0)
4060     {
4061       bitmap_clear_bit (ivs->cands, cid);
4062       /* Do not count the pseudocandidates.  */
4063       if (cp->cand->iv)
4064         ivs->n_regs--;
4065       ivs->n_cands--;
4066       ivs->cand_cost -= cp->cand->cost;
4067
4068       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4069     }
4070
4071   ivs->cand_use_cost -= cp->cost;
4072
4073   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4074   iv_ca_recount_cost (data, ivs);
4075 }
4076
4077 /* Add invariants in set INVS to set IVS.  */
4078
4079 static void
4080 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4081 {
4082   bitmap_iterator bi;
4083   unsigned iid;
4084
4085   if (!invs)
4086     return;
4087
4088   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4089     {
4090       ivs->n_invariant_uses[iid]++;
4091       if (ivs->n_invariant_uses[iid] == 1)
4092         ivs->n_regs++;
4093     }
4094 }
4095
4096 /* Set cost pair for USE in set IVS to CP.  */
4097
4098 static void
4099 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4100               struct iv_use *use, struct cost_pair *cp)
4101 {
4102   unsigned uid = use->id, cid;
4103
4104   if (ivs->cand_for_use[uid] == cp)
4105     return;
4106
4107   if (ivs->cand_for_use[uid])
4108     iv_ca_set_no_cp (data, ivs, use);
4109
4110   if (cp)
4111     {
4112       cid = cp->cand->id;
4113
4114       ivs->bad_uses--;
4115       ivs->cand_for_use[uid] = cp;
4116       ivs->n_cand_uses[cid]++;
4117       if (ivs->n_cand_uses[cid] == 1)
4118         {
4119           bitmap_set_bit (ivs->cands, cid);
4120           /* Do not count the pseudocandidates.  */
4121           if (cp->cand->iv)
4122             ivs->n_regs++;
4123           ivs->n_cands++;
4124           ivs->cand_cost += cp->cand->cost;
4125
4126           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4127         }
4128
4129       ivs->cand_use_cost += cp->cost;
4130       iv_ca_set_add_invariants (ivs, cp->depends_on);
4131       iv_ca_recount_cost (data, ivs);
4132     }
4133 }
4134
4135 /* Extend set IVS by expressing USE by some of the candidates in it
4136    if possible.  */
4137
4138 static void
4139 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4140                struct iv_use *use)
4141 {
4142   struct cost_pair *best_cp = NULL, *cp;
4143   bitmap_iterator bi;
4144   unsigned i;
4145
4146   gcc_assert (ivs->upto >= use->id);
4147
4148   if (ivs->upto == use->id)
4149     {
4150       ivs->upto++;
4151       ivs->bad_uses++;
4152     }
4153
4154   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4155     {
4156       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4157
4158       if (cheaper_cost_pair (cp, best_cp))
4159         best_cp = cp;
4160     }
4161
4162   iv_ca_set_cp (data, ivs, use, best_cp);
4163 }
4164
4165 /* Get cost for assignment IVS.  */
4166
4167 static unsigned
4168 iv_ca_cost (struct iv_ca *ivs)
4169 {
4170   return (ivs->bad_uses ? INFTY : ivs->cost);
4171 }
4172
4173 /* Returns true if all dependences of CP are among invariants in IVS.  */
4174
4175 static bool
4176 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4177 {
4178   unsigned i;
4179   bitmap_iterator bi;
4180
4181   if (!cp->depends_on)
4182     return true;
4183
4184   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4185     {
4186       if (ivs->n_invariant_uses[i] == 0)
4187         return false;
4188     }
4189
4190   return true;
4191 }
4192
4193 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4194    it before NEXT_CHANGE.  */
4195
4196 static struct iv_ca_delta *
4197 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4198                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4199 {
4200   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4201
4202   change->use = use;
4203   change->old_cp = old_cp;
4204   change->new_cp = new_cp;
4205   change->next_change = next_change;
4206
4207   return change;
4208 }
4209
4210 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4211    are rewritten.  */
4212
4213 static struct iv_ca_delta *
4214 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4215 {
4216   struct iv_ca_delta *last;
4217
4218   if (!l2)
4219     return l1;
4220
4221   if (!l1)
4222     return l2;
4223
4224   for (last = l1; last->next_change; last = last->next_change)
4225     continue;
4226   last->next_change = l2;
4227
4228   return l1;
4229 }
4230
4231 /* Returns candidate by that USE is expressed in IVS.  */
4232
4233 static struct cost_pair *
4234 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4235 {
4236   return ivs->cand_for_use[use->id];
4237 }
4238
4239 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4240
4241 static struct iv_ca_delta *
4242 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4243 {
4244   struct iv_ca_delta *act, *next, *prev = NULL;
4245   struct cost_pair *tmp;
4246
4247   for (act = delta; act; act = next)
4248     {
4249       next = act->next_change;
4250       act->next_change = prev;
4251       prev = act;
4252
4253       tmp = act->old_cp;
4254       act->old_cp = act->new_cp;
4255       act->new_cp = tmp;
4256     }
4257
4258   return prev;
4259 }
4260
4261 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4262    reverted instead.  */
4263
4264 static void
4265 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4266                     struct iv_ca_delta *delta, bool forward)
4267 {
4268   struct cost_pair *from, *to;
4269   struct iv_ca_delta *act;
4270
4271   if (!forward)
4272     delta = iv_ca_delta_reverse (delta);
4273
4274   for (act = delta; act; act = act->next_change)
4275     {
4276       from = act->old_cp;
4277       to = act->new_cp;
4278       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4279       iv_ca_set_cp (data, ivs, act->use, to);
4280     }
4281
4282   if (!forward)
4283     iv_ca_delta_reverse (delta);
4284 }
4285
4286 /* Returns true if CAND is used in IVS.  */
4287
4288 static bool
4289 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4290 {
4291   return ivs->n_cand_uses[cand->id] > 0;
4292 }
4293
4294 /* Returns number of induction variable candidates in the set IVS.  */
4295
4296 static unsigned
4297 iv_ca_n_cands (struct iv_ca *ivs)
4298 {
4299   return ivs->n_cands;
4300 }
4301
4302 /* Free the list of changes DELTA.  */
4303
4304 static void
4305 iv_ca_delta_free (struct iv_ca_delta **delta)
4306 {
4307   struct iv_ca_delta *act, *next;
4308
4309   for (act = *delta; act; act = next)
4310     {
4311       next = act->next_change;
4312       free (act);
4313     }
4314
4315   *delta = NULL;
4316 }
4317
4318 /* Allocates new iv candidates assignment.  */
4319
4320 static struct iv_ca *
4321 iv_ca_new (struct ivopts_data *data)
4322 {
4323   struct iv_ca *nw = XNEW (struct iv_ca);
4324
4325   nw->upto = 0;
4326   nw->bad_uses = 0;
4327   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4328   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4329   nw->cands = BITMAP_ALLOC (NULL);
4330   nw->n_cands = 0;
4331   nw->n_regs = 0;
4332   nw->cand_use_cost = 0;
4333   nw->cand_cost = 0;
4334   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4335   nw->cost = 0;
4336
4337   return nw;
4338 }
4339
4340 /* Free memory occupied by the set IVS.  */
4341
4342 static void
4343 iv_ca_free (struct iv_ca **ivs)
4344 {
4345   free ((*ivs)->cand_for_use);
4346   free ((*ivs)->n_cand_uses);
4347   BITMAP_FREE ((*ivs)->cands);
4348   free ((*ivs)->n_invariant_uses);
4349   free (*ivs);
4350   *ivs = NULL;
4351 }
4352
4353 /* Dumps IVS to FILE.  */
4354
4355 static void
4356 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4357 {
4358   const char *pref = "  invariants ";
4359   unsigned i;
4360
4361   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4362   bitmap_print (file, ivs->cands, "  candidates ","\n");
4363
4364   for (i = 1; i <= data->max_inv_id; i++)
4365     if (ivs->n_invariant_uses[i])
4366       {
4367         fprintf (file, "%s%d", pref, i);
4368         pref = ", ";
4369       }
4370   fprintf (file, "\n");
4371 }
4372
4373 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4374    new set, and store differences in DELTA.  Number of induction variables
4375    in the new set is stored to N_IVS.  */
4376
4377 static unsigned
4378 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4379               struct iv_cand *cand, struct iv_ca_delta **delta,
4380               unsigned *n_ivs)
4381 {
4382   unsigned i, cost;
4383   struct iv_use *use;
4384   struct cost_pair *old_cp, *new_cp;
4385
4386   *delta = NULL;
4387   for (i = 0; i < ivs->upto; i++)
4388     {
4389       use = iv_use (data, i);
4390       old_cp = iv_ca_cand_for_use (ivs, use);
4391
4392       if (old_cp
4393           && old_cp->cand == cand)
4394         continue;
4395
4396       new_cp = get_use_iv_cost (data, use, cand);
4397       if (!new_cp)
4398         continue;
4399
4400       if (!iv_ca_has_deps (ivs, new_cp))
4401         continue;
4402
4403       if (!cheaper_cost_pair (new_cp, old_cp))
4404         continue;
4405
4406       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4407     }
4408
4409   iv_ca_delta_commit (data, ivs, *delta, true);
4410   cost = iv_ca_cost (ivs);
4411   if (n_ivs)
4412     *n_ivs = iv_ca_n_cands (ivs);
4413   iv_ca_delta_commit (data, ivs, *delta, false);
4414
4415   return cost;
4416 }
4417
4418 /* Try narrowing set IVS by removing CAND.  Return the cost of
4419    the new set and store the differences in DELTA.  */
4420
4421 static unsigned
4422 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4423               struct iv_cand *cand, struct iv_ca_delta **delta)
4424 {
4425   unsigned i, ci;
4426   struct iv_use *use;
4427   struct cost_pair *old_cp, *new_cp, *cp;
4428   bitmap_iterator bi;
4429   struct iv_cand *cnd;
4430   unsigned cost;
4431
4432   *delta = NULL;
4433   for (i = 0; i < n_iv_uses (data); i++)
4434     {
4435       use = iv_use (data, i);
4436
4437       old_cp = iv_ca_cand_for_use (ivs, use);
4438       if (old_cp->cand != cand)
4439         continue;
4440
4441       new_cp = NULL;
4442
4443       if (data->consider_all_candidates)
4444         {
4445           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4446             {
4447               if (ci == cand->id)
4448                 continue;
4449
4450               cnd = iv_cand (data, ci);
4451
4452               cp = get_use_iv_cost (data, use, cnd);
4453               if (!cp)
4454                 continue;
4455               if (!iv_ca_has_deps (ivs, cp))
4456                 continue;
4457
4458               if (!cheaper_cost_pair (cp, new_cp))
4459                 continue;
4460
4461               new_cp = cp;
4462             }
4463         }
4464       else
4465         {
4466           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4467             {
4468               if (ci == cand->id)
4469                 continue;
4470
4471               cnd = iv_cand (data, ci);
4472
4473               cp = get_use_iv_cost (data, use, cnd);
4474               if (!cp)
4475                 continue;
4476               if (!iv_ca_has_deps (ivs, cp))
4477                 continue;
4478
4479               if (!cheaper_cost_pair (cp, new_cp))
4480                 continue;
4481
4482               new_cp = cp;
4483             }
4484         }
4485
4486       if (!new_cp)
4487         {
4488           iv_ca_delta_free (delta);
4489           return INFTY;
4490         }
4491
4492       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4493     }
4494
4495   iv_ca_delta_commit (data, ivs, *delta, true);
4496   cost = iv_ca_cost (ivs);
4497   iv_ca_delta_commit (data, ivs, *delta, false);
4498
4499   return cost;
4500 }
4501
4502 /* Try optimizing the set of candidates IVS by removing candidates different
4503    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4504    differences in DELTA.  */
4505
4506 static unsigned
4507 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4508              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4509 {
4510   bitmap_iterator bi;
4511   struct iv_ca_delta *act_delta, *best_delta;
4512   unsigned i, best_cost, acost;
4513   struct iv_cand *cand;
4514
4515   best_delta = NULL;
4516   best_cost = iv_ca_cost (ivs);
4517
4518   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4519     {
4520       cand = iv_cand (data, i);
4521
4522       if (cand == except_cand)
4523         continue;
4524
4525       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4526
4527       if (acost < best_cost)
4528         {
4529           best_cost = acost;
4530           iv_ca_delta_free (&best_delta);
4531           best_delta = act_delta;
4532         }
4533       else
4534         iv_ca_delta_free (&act_delta);
4535     }
4536
4537   if (!best_delta)
4538     {
4539       *delta = NULL;
4540       return best_cost;
4541     }
4542
4543   /* Recurse to possibly remove other unnecessary ivs.  */
4544   iv_ca_delta_commit (data, ivs, best_delta, true);
4545   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4546   iv_ca_delta_commit (data, ivs, best_delta, false);
4547   *delta = iv_ca_delta_join (best_delta, *delta);
4548   return best_cost;
4549 }
4550
4551 /* Tries to extend the sets IVS in the best possible way in order
4552    to express the USE.  */
4553
4554 static bool
4555 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4556                   struct iv_use *use)
4557 {
4558   unsigned best_cost, act_cost;
4559   unsigned i;
4560   bitmap_iterator bi;
4561   struct iv_cand *cand;
4562   struct iv_ca_delta *best_delta = NULL, *act_delta;
4563   struct cost_pair *cp;
4564
4565   iv_ca_add_use (data, ivs, use);
4566   best_cost = iv_ca_cost (ivs);
4567
4568   cp = iv_ca_cand_for_use (ivs, use);
4569   if (cp)
4570     {
4571       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4572       iv_ca_set_no_cp (data, ivs, use);
4573     }
4574
4575   /* First try important candidates.  Only if it fails, try the specific ones.
4576      Rationale -- in loops with many variables the best choice often is to use
4577      just one generic biv.  If we added here many ivs specific to the uses,
4578      the optimization algorithm later would be likely to get stuck in a local
4579      minimum, thus causing us to create too many ivs.  The approach from
4580      few ivs to more seems more likely to be successful -- starting from few
4581      ivs, replacing an expensive use by a specific iv should always be a
4582      win.  */
4583   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4584     {
4585       cand = iv_cand (data, i);
4586
4587       if (iv_ca_cand_used_p (ivs, cand))
4588         continue;
4589
4590       cp = get_use_iv_cost (data, use, cand);
4591       if (!cp)
4592         continue;
4593
4594       iv_ca_set_cp (data, ivs, use, cp);
4595       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4596       iv_ca_set_no_cp (data, ivs, use);
4597       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4598
4599       if (act_cost < best_cost)
4600         {
4601           best_cost = act_cost;
4602
4603           iv_ca_delta_free (&best_delta);
4604           best_delta = act_delta;
4605         }
4606       else
4607         iv_ca_delta_free (&act_delta);
4608     }
4609
4610   if (best_cost == INFTY)
4611     {
4612       for (i = 0; i < use->n_map_members; i++)
4613         {
4614           cp = use->cost_map + i;
4615           cand = cp->cand;
4616           if (!cand)
4617             continue;
4618
4619           /* Already tried this.  */
4620           if (cand->important)
4621             continue;
4622
4623           if (iv_ca_cand_used_p (ivs, cand))
4624             continue;
4625
4626           act_delta = NULL;
4627           iv_ca_set_cp (data, ivs, use, cp);
4628           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4629           iv_ca_set_no_cp (data, ivs, use);
4630           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4631                                        cp, act_delta);
4632
4633           if (act_cost < best_cost)
4634             {
4635               best_cost = act_cost;
4636
4637               if (best_delta)
4638                 iv_ca_delta_free (&best_delta);
4639               best_delta = act_delta;
4640             }
4641           else
4642             iv_ca_delta_free (&act_delta);
4643         }
4644     }
4645
4646   iv_ca_delta_commit (data, ivs, best_delta, true);
4647   iv_ca_delta_free (&best_delta);
4648
4649   return (best_cost != INFTY);
4650 }
4651
4652 /* Finds an initial assignment of candidates to uses.  */
4653
4654 static struct iv_ca *
4655 get_initial_solution (struct ivopts_data *data)
4656 {
4657   struct iv_ca *ivs = iv_ca_new (data);
4658   unsigned i;
4659
4660   for (i = 0; i < n_iv_uses (data); i++)
4661     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4662       {
4663         iv_ca_free (&ivs);
4664         return NULL;
4665       }
4666
4667   return ivs;
4668 }
4669
4670 /* Tries to improve set of induction variables IVS.  */
4671
4672 static bool
4673 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4674 {
4675   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4676   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4677   struct iv_cand *cand;
4678
4679   /* Try extending the set of induction variables by one.  */
4680   for (i = 0; i < n_iv_cands (data); i++)
4681     {
4682       cand = iv_cand (data, i);
4683
4684       if (iv_ca_cand_used_p (ivs, cand))
4685         continue;
4686
4687       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4688       if (!act_delta)
4689         continue;
4690
4691       /* If we successfully added the candidate and the set is small enough,
4692          try optimizing it by removing other candidates.  */
4693       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4694         {
4695           iv_ca_delta_commit (data, ivs, act_delta, true);
4696           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4697           iv_ca_delta_commit (data, ivs, act_delta, false);
4698           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4699         }
4700
4701       if (acost < best_cost)
4702         {
4703           best_cost = acost;
4704           iv_ca_delta_free (&best_delta);
4705           best_delta = act_delta;
4706         }
4707       else
4708         iv_ca_delta_free (&act_delta);
4709     }
4710
4711   if (!best_delta)
4712     {
4713       /* Try removing the candidates from the set instead.  */
4714       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4715
4716       /* Nothing more we can do.  */
4717       if (!best_delta)
4718         return false;
4719     }
4720
4721   iv_ca_delta_commit (data, ivs, best_delta, true);
4722   gcc_assert (best_cost == iv_ca_cost (ivs));
4723   iv_ca_delta_free (&best_delta);
4724   return true;
4725 }
4726
4727 /* Attempts to find the optimal set of induction variables.  We do simple
4728    greedy heuristic -- we try to replace at most one candidate in the selected
4729    solution and remove the unused ivs while this improves the cost.  */
4730
4731 static struct iv_ca *
4732 find_optimal_iv_set (struct ivopts_data *data)
4733 {
4734   unsigned i;
4735   struct iv_ca *set;
4736   struct iv_use *use;
4737
4738   /* Get the initial solution.  */
4739   set = get_initial_solution (data);
4740   if (!set)
4741     {
4742       if (dump_file && (dump_flags & TDF_DETAILS))
4743         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4744       return NULL;
4745     }
4746
4747   if (dump_file && (dump_flags & TDF_DETAILS))
4748     {
4749       fprintf (dump_file, "Initial set of candidates:\n");
4750       iv_ca_dump (data, dump_file, set);
4751     }
4752
4753   while (try_improve_iv_set (data, set))
4754     {
4755       if (dump_file && (dump_flags & TDF_DETAILS))
4756         {
4757           fprintf (dump_file, "Improved to:\n");
4758           iv_ca_dump (data, dump_file, set);
4759         }
4760     }
4761
4762   if (dump_file && (dump_flags & TDF_DETAILS))
4763     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4764
4765   for (i = 0; i < n_iv_uses (data); i++)
4766     {
4767       use = iv_use (data, i);
4768       use->selected = iv_ca_cand_for_use (set, use)->cand;
4769     }
4770
4771   return set;
4772 }
4773
4774 /* Creates a new induction variable corresponding to CAND.  */
4775
4776 static void
4777 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4778 {
4779   block_stmt_iterator incr_pos;
4780   tree base;
4781   bool after = false;
4782
4783   if (!cand->iv)
4784     return;
4785
4786   switch (cand->pos)
4787     {
4788     case IP_NORMAL:
4789       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4790       break;
4791
4792     case IP_END:
4793       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4794       after = true;
4795       break;
4796
4797     case IP_ORIGINAL:
4798       /* Mark that the iv is preserved.  */
4799       name_info (data, cand->var_before)->preserve_biv = true;
4800       name_info (data, cand->var_after)->preserve_biv = true;
4801
4802       /* Rewrite the increment so that it uses var_before directly.  */
4803       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4804
4805       return;
4806     }
4807
4808   gimple_add_tmp_var (cand->var_before);
4809   add_referenced_var (cand->var_before);
4810
4811   base = unshare_expr (cand->iv->base);
4812
4813   create_iv (base, unshare_expr (cand->iv->step),
4814              cand->var_before, data->current_loop,
4815              &incr_pos, after, &cand->var_before, &cand->var_after);
4816 }
4817
4818 /* Creates new induction variables described in SET.  */
4819
4820 static void
4821 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4822 {
4823   unsigned i;
4824   struct iv_cand *cand;
4825   bitmap_iterator bi;
4826
4827   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4828     {
4829       cand = iv_cand (data, i);
4830       create_new_iv (data, cand);
4831     }
4832 }
4833
4834 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4835    is true, remove also the ssa name defined by the statement.  */
4836
4837 static void
4838 remove_statement (tree stmt, bool including_defined_name)
4839 {
4840   if (TREE_CODE (stmt) == PHI_NODE)
4841     {
4842       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4843     }
4844   else
4845     {
4846       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4847
4848       bsi_remove (&bsi, true);
4849     }
4850 }
4851
4852 /* Rewrites USE (definition of iv used in a nonlinear expression)
4853    using candidate CAND.  */
4854
4855 static void
4856 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4857                             struct iv_use *use, struct iv_cand *cand)
4858 {
4859   tree comp;
4860   tree op, stmts, tgt, ass;
4861   block_stmt_iterator bsi, pbsi;
4862
4863   /* An important special case -- if we are asked to express value of
4864      the original iv by itself, just exit; there is no need to
4865      introduce a new computation (that might also need casting the
4866      variable to unsigned and back).  */
4867   if (cand->pos == IP_ORIGINAL
4868       && cand->incremented_at == use->stmt)
4869     {
4870       tree step, ctype, utype;
4871       enum tree_code incr_code = PLUS_EXPR;
4872
4873       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4874       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4875
4876       step = cand->iv->step;
4877       ctype = TREE_TYPE (step);
4878       utype = TREE_TYPE (cand->var_after);
4879       if (TREE_CODE (step) == NEGATE_EXPR)
4880         {
4881           incr_code = MINUS_EXPR;
4882           step = TREE_OPERAND (step, 0);
4883         }
4884
4885       /* Check whether we may leave the computation unchanged.
4886          This is the case only if it does not rely on other
4887          computations in the loop -- otherwise, the computation
4888          we rely upon may be removed in remove_unused_ivs,
4889          thus leading to ICE.  */
4890       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4891       if (TREE_CODE (op) == PLUS_EXPR
4892           || TREE_CODE (op) == MINUS_EXPR)
4893         {
4894           if (TREE_OPERAND (op, 0) == cand->var_before)
4895             op = TREE_OPERAND (op, 1);
4896           else if (TREE_CODE (op) == PLUS_EXPR
4897                    && TREE_OPERAND (op, 1) == cand->var_before)
4898             op = TREE_OPERAND (op, 0);
4899           else
4900             op = NULL_TREE;
4901         }
4902       else
4903         op = NULL_TREE;
4904
4905       if (op
4906           && (TREE_CODE (op) == INTEGER_CST
4907               || operand_equal_p (op, step, 0)))
4908         return;
4909
4910       /* Otherwise, add the necessary computations to express
4911          the iv.  */
4912       op = fold_convert (ctype, cand->var_before);
4913       comp = fold_convert (utype,
4914                            build2 (incr_code, ctype, op,
4915                                    unshare_expr (step)));
4916     }
4917   else
4918     {
4919       comp = get_computation (data->current_loop, use, cand);
4920       gcc_assert (comp != NULL_TREE);
4921     }
4922
4923   switch (TREE_CODE (use->stmt))
4924     {
4925     case PHI_NODE:
4926       tgt = PHI_RESULT (use->stmt);
4927
4928       /* If we should keep the biv, do not replace it.  */
4929       if (name_info (data, tgt)->preserve_biv)
4930         return;
4931
4932       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4933       while (!bsi_end_p (pbsi)
4934              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4935         {
4936           bsi = pbsi;
4937           bsi_next (&pbsi);
4938         }
4939       break;
4940
4941     case GIMPLE_MODIFY_STMT:
4942       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4943       bsi = bsi_for_stmt (use->stmt);
4944       break;
4945
4946     default:
4947       gcc_unreachable ();
4948     }
4949
4950   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4951
4952   if (TREE_CODE (use->stmt) == PHI_NODE)
4953     {
4954       if (stmts)
4955         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4956       ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4957       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4958       remove_statement (use->stmt, false);
4959       SSA_NAME_DEF_STMT (tgt) = ass;
4960     }
4961   else
4962     {
4963       if (stmts)
4964         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4965       GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4966     }
4967 }
4968
4969 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4970    for_each_index.  */
4971
4972 static bool
4973 idx_remove_ssa_names (tree base, tree *idx,
4974                       void *data ATTRIBUTE_UNUSED)
4975 {
4976   tree *op;
4977
4978   if (TREE_CODE (*idx) == SSA_NAME)
4979     *idx = SSA_NAME_VAR (*idx);
4980
4981   if (TREE_CODE (base) == ARRAY_REF)
4982     {
4983       op = &TREE_OPERAND (base, 2);
4984       if (*op
4985           && TREE_CODE (*op) == SSA_NAME)
4986         *op = SSA_NAME_VAR (*op);
4987       op = &TREE_OPERAND (base, 3);
4988       if (*op
4989           && TREE_CODE (*op) == SSA_NAME)
4990         *op = SSA_NAME_VAR (*op);
4991     }
4992
4993   return true;
4994 }
4995
4996 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4997
4998 static tree
4999 unshare_and_remove_ssa_names (tree ref)
5000 {
5001   ref = unshare_expr (ref);
5002   for_each_index (&ref, idx_remove_ssa_names, NULL);
5003
5004   return ref;
5005 }
5006
5007 /* Extract the alias analysis info for the memory reference REF.  There are
5008    several ways how this information may be stored and what precisely is
5009    its semantics depending on the type of the reference, but there always is
5010    somewhere hidden one _DECL node that is used to determine the set of
5011    virtual operands for the reference.  The code below deciphers this jungle
5012    and extracts this single useful piece of information.  */
5013
5014 static tree
5015 get_ref_tag (tree ref, tree orig)
5016 {
5017   tree var = get_base_address (ref);
5018   tree aref = NULL_TREE, tag, sv;
5019   HOST_WIDE_INT offset, size, maxsize;
5020
5021   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5022     {
5023       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5024       if (ref)
5025         break;
5026     }
5027
5028   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5029     return unshare_expr (sv);
5030
5031   if (!var)
5032     return NULL_TREE;
5033
5034   if (TREE_CODE (var) == INDIRECT_REF)
5035     {
5036       /* If the base is a dereference of a pointer, first check its name memory
5037          tag.  If it does not have one, use its symbol memory tag.  */
5038       var = TREE_OPERAND (var, 0);
5039       if (TREE_CODE (var) != SSA_NAME)
5040         return NULL_TREE;
5041
5042       if (SSA_NAME_PTR_INFO (var))
5043         {
5044           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5045           if (tag)
5046             return tag;
5047         }
5048
5049       var = SSA_NAME_VAR (var);
5050       tag = symbol_mem_tag (var);
5051       gcc_assert (tag != NULL_TREE);
5052       return tag;
5053     }
5054   else
5055     {
5056       if (!DECL_P (var))
5057         return NULL_TREE;
5058
5059       tag = symbol_mem_tag (var);
5060       if (tag)
5061         return tag;
5062
5063       return var;
5064     }
5065 }
5066
5067 /* Copies the reference information from OLD_REF to NEW_REF.  */
5068
5069 static void
5070 copy_ref_info (tree new_ref, tree old_ref)
5071 {
5072   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5073     copy_mem_ref_info (new_ref, old_ref);
5074   else
5075     {
5076       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5077       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5078     }
5079 }
5080
5081 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5082
5083 static void
5084 rewrite_use_address (struct ivopts_data *data,
5085                      struct iv_use *use, struct iv_cand *cand)
5086 {
5087   aff_tree aff;
5088   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5089   tree ref;
5090   bool ok;
5091
5092   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5093   gcc_assert (ok);
5094   unshare_aff_combination (&aff);
5095
5096   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5097   copy_ref_info (ref, *use->op_p);
5098   *use->op_p = ref;
5099 }
5100
5101 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5102    candidate CAND.  */
5103
5104 static void
5105 rewrite_use_compare (struct ivopts_data *data,
5106                      struct iv_use *use, struct iv_cand *cand)
5107 {
5108   tree comp;
5109   tree *op_p, cond, op, stmts, bound;
5110   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5111   enum tree_code compare;
5112   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5113
5114   bound = cp->value;
5115   if (bound)
5116     {
5117       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5118       tree var_type = TREE_TYPE (var);
5119
5120       compare = iv_elimination_compare (data, use);
5121       bound = fold_convert (var_type, bound);
5122       op = force_gimple_operand (unshare_expr (bound), &stmts,
5123                                  true, NULL_TREE);
5124
5125       if (stmts)
5126         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5127
5128       *use->op_p = build2 (compare, boolean_type_node, var, op);
5129       update_stmt (use->stmt);
5130       return;
5131     }
5132
5133   /* The induction variable elimination failed; just express the original
5134      giv.  */
5135   comp = get_computation (data->current_loop, use, cand);
5136   gcc_assert (comp != NULL_TREE);
5137
5138   cond = *use->op_p;
5139   op_p = &TREE_OPERAND (cond, 0);
5140   if (TREE_CODE (*op_p) != SSA_NAME
5141       || integer_zerop (get_iv (data, *op_p)->step))
5142     op_p = &TREE_OPERAND (cond, 1);
5143
5144   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5145   if (stmts)
5146     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5147
5148   *op_p = op;
5149 }
5150
5151 /* Rewrites USE using candidate CAND.  */
5152
5153 static void
5154 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5155 {
5156   push_stmt_changes (&use->stmt);
5157
5158   switch (use->type)
5159     {
5160       case USE_NONLINEAR_EXPR:
5161         rewrite_use_nonlinear_expr (data, use, cand);
5162         break;
5163
5164       case USE_ADDRESS:
5165         rewrite_use_address (data, use, cand);
5166         break;
5167
5168       case USE_COMPARE:
5169         rewrite_use_compare (data, use, cand);
5170         break;
5171
5172       default:
5173         gcc_unreachable ();
5174     }
5175
5176   pop_stmt_changes (&use->stmt);
5177 }
5178
5179 /* Rewrite the uses using the selected induction variables.  */
5180
5181 static void
5182 rewrite_uses (struct ivopts_data *data)
5183 {
5184   unsigned i;
5185   struct iv_cand *cand;
5186   struct iv_use *use;
5187
5188   for (i = 0; i < n_iv_uses (data); i++)
5189     {
5190       use = iv_use (data, i);
5191       cand = use->selected;
5192       gcc_assert (cand);
5193
5194       rewrite_use (data, use, cand);
5195     }
5196 }
5197
5198 /* Removes the ivs that are not used after rewriting.  */
5199
5200 static void
5201 remove_unused_ivs (struct ivopts_data *data)
5202 {
5203   unsigned j;
5204   bitmap_iterator bi;
5205
5206   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5207     {
5208       struct version_info *info;
5209
5210       info = ver_info (data, j);
5211       if (info->iv
5212           && !integer_zerop (info->iv->step)
5213           && !info->inv_id
5214           && !info->iv->have_use_for
5215           && !info->preserve_biv)
5216         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5217     }
5218 }
5219
5220 /* Frees data allocated by the optimization of a single loop.  */
5221
5222 static void
5223 free_loop_data (struct ivopts_data *data)
5224 {
5225   unsigned i, j;
5226   bitmap_iterator bi;
5227   tree obj;
5228
5229   htab_empty (data->niters);
5230
5231   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5232     {
5233       struct version_info *info;
5234
5235       info = ver_info (data, i);
5236       if (info->iv)
5237         free (info->iv);
5238       info->iv = NULL;
5239       info->has_nonlin_use = false;
5240       info->preserve_biv = false;
5241       info->inv_id = 0;
5242     }
5243   bitmap_clear (data->relevant);
5244   bitmap_clear (data->important_candidates);
5245
5246   for (i = 0; i < n_iv_uses (data); i++)
5247     {
5248       struct iv_use *use = iv_use (data, i);
5249
5250       free (use->iv);
5251       BITMAP_FREE (use->related_cands);
5252       for (j = 0; j < use->n_map_members; j++)
5253         if (use->cost_map[j].depends_on)
5254           BITMAP_FREE (use->cost_map[j].depends_on);
5255       free (use->cost_map);
5256       free (use);
5257     }
5258   VEC_truncate (iv_use_p, data->iv_uses, 0);
5259
5260   for (i = 0; i < n_iv_cands (data); i++)
5261     {
5262       struct iv_cand *cand = iv_cand (data, i);
5263
5264       if (cand->iv)
5265         free (cand->iv);
5266       if (cand->depends_on)
5267         BITMAP_FREE (cand->depends_on);
5268       free (cand);
5269     }
5270   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5271
5272   if (data->version_info_size < num_ssa_names)
5273     {
5274       data->version_info_size = 2 * num_ssa_names;
5275       free (data->version_info);
5276       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5277     }
5278
5279   data->max_inv_id = 0;
5280
5281   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5282     SET_DECL_RTL (obj, NULL_RTX);
5283
5284   VEC_truncate (tree, decl_rtl_to_reset, 0);
5285 }
5286
5287 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5288    loop tree.  */
5289
5290 static void
5291 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5292 {
5293   free_loop_data (data);
5294   free (data->version_info);
5295   BITMAP_FREE (data->relevant);
5296   BITMAP_FREE (data->important_candidates);
5297   htab_delete (data->niters);
5298
5299   VEC_free (tree, heap, decl_rtl_to_reset);
5300   VEC_free (iv_use_p, heap, data->iv_uses);
5301   VEC_free (iv_cand_p, heap, data->iv_candidates);
5302 }
5303
5304 /* Optimizes the LOOP.  Returns true if anything changed.  */
5305
5306 static bool
5307 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5308 {
5309   bool changed = false;
5310   struct iv_ca *iv_ca;
5311   edge exit;
5312
5313   data->current_loop = loop;
5314
5315   if (dump_file && (dump_flags & TDF_DETAILS))
5316     {
5317       fprintf (dump_file, "Processing loop %d\n", loop->num);
5318
5319       exit = single_dom_exit (loop);
5320       if (exit)
5321         {
5322           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5323                    exit->src->index, exit->dest->index);
5324           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5325           fprintf (dump_file, "\n");
5326         }
5327
5328       fprintf (dump_file, "\n");
5329     }
5330
5331   /* For each ssa name determines whether it behaves as an induction variable
5332      in some loop.  */
5333   if (!find_induction_variables (data))
5334     goto finish;
5335
5336   /* Finds interesting uses (item 1).  */
5337   find_interesting_uses (data);
5338   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5339     goto finish;
5340
5341   /* Finds candidates for the induction variables (item 2).  */
5342   find_iv_candidates (data);
5343
5344   /* Calculates the costs (item 3, part 1).  */
5345   determine_use_iv_costs (data);
5346   determine_iv_costs (data);
5347   determine_set_costs (data);
5348
5349   /* Find the optimal set of induction variables (item 3, part 2).  */
5350   iv_ca = find_optimal_iv_set (data);
5351   if (!iv_ca)
5352     goto finish;
5353   changed = true;
5354
5355   /* Create the new induction variables (item 4, part 1).  */
5356   create_new_ivs (data, iv_ca);
5357   iv_ca_free (&iv_ca);
5358
5359   /* Rewrite the uses (item 4, part 2).  */
5360   rewrite_uses (data);
5361
5362   /* Remove the ivs that are unused after rewriting.  */
5363   remove_unused_ivs (data);
5364
5365   /* We have changed the structure of induction variables; it might happen
5366      that definitions in the scev database refer to some of them that were
5367      eliminated.  */
5368   scev_reset ();
5369
5370 finish:
5371   free_loop_data (data);
5372
5373   return changed;
5374 }
5375
5376 /* Main entry point.  Optimizes induction variables in loops.  */
5377
5378 void
5379 tree_ssa_iv_optimize (void)
5380 {
5381   struct loop *loop;
5382   struct ivopts_data data;
5383   loop_iterator li;
5384
5385   tree_ssa_iv_optimize_init (&data);
5386
5387   /* Optimize the loops starting with the innermost ones.  */
5388   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5389     {
5390       if (dump_file && (dump_flags & TDF_DETAILS))
5391         flow_loop_dump (loop, dump_file, NULL, 1);
5392
5393       tree_ssa_iv_optimize_loop (&data, loop);
5394     }
5395
5396   tree_ssa_iv_optimize_finalize (&data);
5397 }