gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93
  94 /* The infinite cost.  */
  95 #define INFTY 10000000
  96
  97 /* The expected number of loop iterations.  TODO -- use profiling instead of
  98    this.  */
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101
 102 /* Representation of the induction variable.  */
 103 struct iv
 104 {
 105   tree base;            /* Initial value of the iv.  */
 106   tree base_object;     /* A memory object to that the induction variable points.  */
 107   tree step;            /* Step of the iv (constant only).  */
 108   tree ssa_name;        /* The ssa name with the value.  */
 109   bool biv_p;           /* Is it a biv?  */
 110   bool have_use_for;    /* Do we already have a use for it?  */
 111   unsigned use_id;      /* The identifier in the use if it is the case.  */
 112 };
 113
 114 /* Per-ssa version information (induction variable descriptions, etc.).  */
 115 struct version_info
 116 {
 117   tree name;            /* The ssa name.  */
 118   struct iv *iv;        /* Induction variable description.  */
 119   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 120                            an expression that is not an induction variable.  */
 121   unsigned inv_id;      /* Id of an invariant.  */
 122   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 123 };
 124
 125 /* Types of uses.  */
 126 enum use_type
 127 {
 128   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 129   USE_ADDRESS,          /* Use in an address.  */
 130   USE_COMPARE           /* Use is a compare.  */
 131 };
 132
 133 /* The candidate - cost pair.  */
 134 struct cost_pair
 135 {
 136   struct iv_cand *cand; /* The candidate.  */
 137   unsigned cost;        /* The cost.  */
 138   bitmap depends_on;    /* The list of invariants that have to be
 139                            preserved.  */
 140   tree value;           /* For final value elimination, the expression for
 141                            the final value of the iv.  For iv elimination,
 142                            the new bound to compare with.  */
 143 };
 144
 145 /* Use.  */
 146 struct iv_use
 147 {
 148   unsigned id;          /* The id of the use.  */
 149   enum use_type type;   /* Type of the use.  */
 150   struct iv *iv;        /* The induction variable it is based on.  */
 151   tree stmt;            /* Statement in that it occurs.  */
 152   tree *op_p;           /* The place where it occurs.  */
 153   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 154                            important ones.  */
 155
 156   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 157   struct cost_pair *cost_map;
 158                         /* The costs wrto the iv candidates.  */
 159
 160   struct iv_cand *selected;
 161                         /* The selected candidate.  */
 162 };
 163
 164 /* The position where the iv is computed.  */
 165 enum iv_position
 166 {
 167   IP_NORMAL,            /* At the end, just before the exit condition.  */
 168   IP_END,               /* At the end of the latch block.  */
 169   IP_ORIGINAL           /* The original biv.  */
 170 };
 171
 172 /* The induction variable candidate.  */
 173 struct iv_cand
 174 {
 175   unsigned id;          /* The number of the candidate.  */
 176   bool important;       /* Whether this is an "important" candidate, i.e. such
 177                            that it should be considered by all uses.  */
 178   enum iv_position pos; /* Where it is computed.  */
 179   tree incremented_at;  /* For original biv, the statement where it is
 180                            incremented.  */
 181   tree var_before;      /* The variable used for it before increment.  */
 182   tree var_after;       /* The variable used for it after increment.  */
 183   struct iv *iv;        /* The value of the candidate.  NULL for
 184                            "pseudocandidate" used to indicate the possibility
 185                            to replace the final value of an iv by direct
 186                            computation of the value.  */
 187   unsigned cost;        /* Cost of the candidate.  */
 188   bitmap depends_on;    /* The list of invariants that are used in step of the
 189                            biv.  */
 190 };
 191
 192 /* The data used by the induction variable optimizations.  */
 193
 194 typedef struct iv_use *iv_use_p;
 195 DEF_VEC_P(iv_use_p);
 196 DEF_VEC_ALLOC_P(iv_use_p,heap);
 197
 198 typedef struct iv_cand *iv_cand_p;
 199 DEF_VEC_P(iv_cand_p);
 200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 201
 202 struct ivopts_data
 203 {
 204   /* The currently optimized loop.  */
 205   struct loop *current_loop;
 206
 207   /* Number of registers used in it.  */
 208   unsigned regs_used;
 209
 210   /* Numbers of iterations for all exits of the current loop.  */
 211   htab_t niters;
 212
 213   /* The size of version_info array allocated.  */
 214   unsigned version_info_size;
 215
 216   /* The array of information for the ssa names.  */
 217   struct version_info *version_info;
 218
 219   /* The bitmap of indices in version_info whose value was changed.  */
 220   bitmap relevant;
 221
 222   /* The maximum invariant id.  */
 223   unsigned max_inv_id;
 224
 225   /* The uses of induction variables.  */
 226   VEC(iv_use_p,heap) *iv_uses;
 227
 228   /* The candidates.  */
 229   VEC(iv_cand_p,heap) *iv_candidates;
 230
 231   /* A bitmap of important candidates.  */
 232   bitmap important_candidates;
 233
 234   /* Whether to consider just related and important candidates when replacing a
 235      use.  */
 236   bool consider_all_candidates;
 237 };
 238
 239 /* An assignment of iv candidates to uses.  */
 240
 241 struct iv_ca
 242 {
 243   /* The number of uses covered by the assignment.  */
 244   unsigned upto;
 245
 246   /* Number of uses that cannot be expressed by the candidates in the set.  */
 247   unsigned bad_uses;
 248
 249   /* Candidate assigned to a use, together with the related costs.  */
 250   struct cost_pair **cand_for_use;
 251
 252   /* Number of times each candidate is used.  */
 253   unsigned *n_cand_uses;
 254
 255   /* The candidates used.  */
 256   bitmap cands;
 257
 258   /* The number of candidates in the set.  */
 259   unsigned n_cands;
 260
 261   /* Total number of registers needed.  */
 262   unsigned n_regs;
 263
 264   /* Total cost of expressing uses.  */
 265   unsigned cand_use_cost;
 266
 267   /* Total cost of candidates.  */
 268   unsigned cand_cost;
 269
 270   /* Number of times each invariant is used.  */
 271   unsigned *n_invariant_uses;
 272
 273   /* Total cost of the assignment.  */
 274   unsigned cost;
 275 };
 276
 277 /* Difference of two iv candidate assignments.  */
 278
 279 struct iv_ca_delta
 280 {
 281   /* Changed use.  */
 282   struct iv_use *use;
 283
 284   /* An old assignment (for rollback purposes).  */
 285   struct cost_pair *old_cp;
 286
 287   /* A new assignment.  */
 288   struct cost_pair *new_cp;
 289
 290   /* Next change in the list.  */
 291   struct iv_ca_delta *next_change;
 292 };
 293
 294 /* Bound on number of candidates below that all candidates are considered.  */
 295
 296 #define CONSIDER_ALL_CANDIDATES_BOUND \
 297   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 298
 299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 300    optimizing such a loop would help, and it would take ages).  */
 301
 302 #define MAX_CONSIDERED_USES \
 303   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 304
 305 /* If there are at most this number of ivs in the set, try removing unnecessary
 306    ivs from the set always.  */
 307
 308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 309   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 310
 311 /* The list of trees for that the decl_rtl field must be reset is stored
 312    here.  */
 313
 314 static VEC(tree,heap) *decl_rtl_to_reset;
 315
 316 /* Number of uses recorded in DATA.  */
 317
 318 static inline unsigned
 319 n_iv_uses (struct ivopts_data *data)
 320 {
 321   return VEC_length (iv_use_p, data->iv_uses);
 322 }
 323
 324 /* Ith use recorded in DATA.  */
 325
 326 static inline struct iv_use *
 327 iv_use (struct ivopts_data *data, unsigned i)
 328 {
 329   return VEC_index (iv_use_p, data->iv_uses, i);
 330 }
 331
 332 /* Number of candidates recorded in DATA.  */
 333
 334 static inline unsigned
 335 n_iv_cands (struct ivopts_data *data)
 336 {
 337   return VEC_length (iv_cand_p, data->iv_candidates);
 338 }
 339
 340 /* Ith candidate recorded in DATA.  */
 341
 342 static inline struct iv_cand *
 343 iv_cand (struct ivopts_data *data, unsigned i)
 344 {
 345   return VEC_index (iv_cand_p, data->iv_candidates, i);
 346 }
 347
 348 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 349
 350 edge
 351 single_dom_exit (struct loop *loop)
 352 {
 353   edge exit = single_exit (loop);
 354
 355   if (!exit)
 356     return NULL;
 357
 358   if (!just_once_each_iteration_p (loop, exit->src))
 359     return NULL;
 360
 361   return exit;
 362 }
 363
 364 /* Dumps information about the induction variable IV to FILE.  */
 365
 366 extern void dump_iv (FILE *, struct iv *);
 367 void
 368 dump_iv (FILE *file, struct iv *iv)
 369 {
 370   if (iv->ssa_name)
 371     {
 372       fprintf (file, "ssa name ");
 373       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 374       fprintf (file, "\n");
 375     }
 376
 377   fprintf (file, "  type ");
 378   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 379   fprintf (file, "\n");
 380
 381   if (iv->step)
 382     {
 383       fprintf (file, "  base ");
 384       print_generic_expr (file, iv->base, TDF_SLIM);
 385       fprintf (file, "\n");
 386
 387       fprintf (file, "  step ");
 388       print_generic_expr (file, iv->step, TDF_SLIM);
 389       fprintf (file, "\n");
 390     }
 391   else
 392     {
 393       fprintf (file, "  invariant ");
 394       print_generic_expr (file, iv->base, TDF_SLIM);
 395       fprintf (file, "\n");
 396     }
 397
 398   if (iv->base_object)
 399     {
 400       fprintf (file, "  base object ");
 401       print_generic_expr (file, iv->base_object, TDF_SLIM);
 402       fprintf (file, "\n");
 403     }
 404
 405   if (iv->biv_p)
 406     fprintf (file, "  is a biv\n");
 407 }
 408
 409 /* Dumps information about the USE to FILE.  */
 410
 411 extern void dump_use (FILE *, struct iv_use *);
 412 void
 413 dump_use (FILE *file, struct iv_use *use)
 414 {
 415   fprintf (file, "use %d\n", use->id);
 416
 417   switch (use->type)
 418     {
 419     case USE_NONLINEAR_EXPR:
 420       fprintf (file, "  generic\n");
 421       break;
 422
 423     case USE_ADDRESS:
 424       fprintf (file, "  address\n");
 425       break;
 426
 427     case USE_COMPARE:
 428       fprintf (file, "  compare\n");
 429       break;
 430
 431     default:
 432       gcc_unreachable ();
 433     }
 434
 435   fprintf (file, "  in statement ");
 436   print_generic_expr (file, use->stmt, TDF_SLIM);
 437   fprintf (file, "\n");
 438
 439   fprintf (file, "  at position ");
 440   if (use->op_p)
 441     print_generic_expr (file, *use->op_p, TDF_SLIM);
 442   fprintf (file, "\n");
 443
 444   dump_iv (file, use->iv);
 445
 446   if (use->related_cands)
 447     {
 448       fprintf (file, "  related candidates ");
 449       dump_bitmap (file, use->related_cands);
 450     }
 451 }
 452
 453 /* Dumps information about the uses to FILE.  */
 454
 455 extern void dump_uses (FILE *, struct ivopts_data *);
 456 void
 457 dump_uses (FILE *file, struct ivopts_data *data)
 458 {
 459   unsigned i;
 460   struct iv_use *use;
 461
 462   for (i = 0; i < n_iv_uses (data); i++)
 463     {
 464       use = iv_use (data, i);
 465
 466       dump_use (file, use);
 467       fprintf (file, "\n");
 468     }
 469 }
 470
 471 /* Dumps information about induction variable candidate CAND to FILE.  */
 472
 473 extern void dump_cand (FILE *, struct iv_cand *);
 474 void
 475 dump_cand (FILE *file, struct iv_cand *cand)
 476 {
 477   struct iv *iv = cand->iv;
 478
 479   fprintf (file, "candidate %d%s\n",
 480            cand->id, cand->important ? " (important)" : "");
 481
 482   if (cand->depends_on)
 483     {
 484       fprintf (file, "  depends on ");
 485       dump_bitmap (file, cand->depends_on);
 486     }
 487
 488   if (!iv)
 489     {
 490       fprintf (file, "  final value replacement\n");
 491       return;
 492     }
 493
 494   switch (cand->pos)
 495     {
 496     case IP_NORMAL:
 497       fprintf (file, "  incremented before exit test\n");
 498       break;
 499
 500     case IP_END:
 501       fprintf (file, "  incremented at end\n");
 502       break;
 503
 504     case IP_ORIGINAL:
 505       fprintf (file, "  original biv\n");
 506       break;
 507     }
 508
 509   dump_iv (file, iv);
 510 }
 511
 512 /* Returns the info for ssa version VER.  */
 513
 514 static inline struct version_info *
 515 ver_info (struct ivopts_data *data, unsigned ver)
 516 {
 517   return data->version_info + ver;
 518 }
 519
 520 /* Returns the info for ssa name NAME.  */
 521
 522 static inline struct version_info *
 523 name_info (struct ivopts_data *data, tree name)
 524 {
 525   return ver_info (data, SSA_NAME_VERSION (name));
 526 }
 527
 528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 529    emitted in LOOP.  */
 530
 531 static bool
 532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 533 {
 534   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 535
 536   gcc_assert (bb);
 537
 538   if (sbb == loop->latch)
 539     return true;
 540
 541   if (sbb != bb)
 542     return false;
 543
 544   return stmt == last_stmt (bb);
 545 }
 546
 547 /* Returns true if STMT if after the place where the original induction
 548    variable CAND is incremented.  */
 549
 550 static bool
 551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 552 {
 553   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 554   basic_block stmt_bb = bb_for_stmt (stmt);
 555   block_stmt_iterator bsi;
 556
 557   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 558     return false;
 559
 560   if (stmt_bb != cand_bb)
 561     return true;
 562
 563   /* Scan the block from the end, since the original ivs are usually
 564      incremented at the end of the loop body.  */
 565   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 566     {
 567       if (bsi_stmt (bsi) == cand->incremented_at)
 568         return false;
 569       if (bsi_stmt (bsi) == stmt)
 570         return true;
 571     }
 572 }
 573
 574 /* Returns true if STMT if after the place where the induction variable
 575    CAND is incremented in LOOP.  */
 576
 577 static bool
 578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 579 {
 580   switch (cand->pos)
 581     {
 582     case IP_END:
 583       return false;
 584
 585     case IP_NORMAL:
 586       return stmt_after_ip_normal_pos (loop, stmt);
 587
 588     case IP_ORIGINAL:
 589       return stmt_after_ip_original_pos (cand, stmt);
 590
 591     default:
 592       gcc_unreachable ();
 593     }
 594 }
 595
 596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 597
 598 static bool
 599 abnormal_ssa_name_p (tree exp)
 600 {
 601   if (!exp)
 602     return false;
 603
 604   if (TREE_CODE (exp) != SSA_NAME)
 605     return false;
 606
 607   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 608 }
 609
 610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 611    abnormal phi node.  Callback for for_each_index.  */
 612
 613 static bool
 614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 615                                   void *data ATTRIBUTE_UNUSED)
 616 {
 617   if (TREE_CODE (base) == ARRAY_REF)
 618     {
 619       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 620         return false;
 621       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 622         return false;
 623     }
 624
 625   return !abnormal_ssa_name_p (*index);
 626 }
 627
 628 /* Returns true if EXPR contains a ssa name that occurs in an
 629    abnormal phi node.  */
 630
 631 bool
 632 contains_abnormal_ssa_name_p (tree expr)
 633 {
 634   enum tree_code code;
 635   enum tree_code_class class;
 636
 637   if (!expr)
 638     return false;
 639
 640   code = TREE_CODE (expr);
 641   class = TREE_CODE_CLASS (code);
 642
 643   if (code == SSA_NAME)
 644     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 645
 646   if (code == INTEGER_CST
 647       || is_gimple_min_invariant (expr))
 648     return false;
 649
 650   if (code == ADDR_EXPR)
 651     return !for_each_index (&TREE_OPERAND (expr, 0),
 652                             idx_contains_abnormal_ssa_name_p,
 653                             NULL);
 654
 655   switch (class)
 656     {
 657     case tcc_binary:
 658     case tcc_comparison:
 659       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 660         return true;
 661
 662       /* Fallthru.  */
 663     case tcc_unary:
 664       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 665         return true;
 666
 667       break;
 668
 669     default:
 670       gcc_unreachable ();
 671     }
 672
 673   return false;
 674 }
 675
 676 /* Element of the table in that we cache the numbers of iterations obtained
 677    from exits of the loop.  */
 678
 679 struct nfe_cache_elt
 680 {
 681   /* The edge for that the number of iterations is cached.  */
 682   edge exit;
 683
 684   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 685      determined.  */
 686   tree niter;
 687 };
 688
 689 /* Hash function for nfe_cache_elt E.  */
 690
 691 static hashval_t
 692 nfe_hash (const void *e)
 693 {
 694   const struct nfe_cache_elt *elt = e;
 695
 696   return htab_hash_pointer (elt->exit);
 697 }
 698
 699 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 700
 701 static int
 702 nfe_eq (const void *e1, const void *e2)
 703 {
 704   const struct nfe_cache_elt *elt1 = e1;
 705
 706   return elt1->exit == e2;
 707 }
 708
 709 /*  Returns tree describing number of iterations determined from
 710     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 711
 712 static tree
 713 niter_for_exit (struct ivopts_data *data, edge exit)
 714 {
 715   struct nfe_cache_elt *nfe_desc;
 716   struct tree_niter_desc desc;
 717   PTR *slot;
 718
 719   slot = htab_find_slot_with_hash (data->niters, exit,
 720                                    htab_hash_pointer (exit),
 721                                    INSERT);
 722
 723   if (!*slot)
 724     {
 725       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 726       nfe_desc->exit = exit;
 727
 728       /* Try to determine number of iterations.  We must know it
 729          unconditionally (i.e., without possibility of # of iterations
 730          being zero).  Also, we cannot safely work with ssa names that
 731          appear in phi nodes on abnormal edges, so that we do not create
 732          overlapping life ranges for them (PR 27283).  */
 733       if (number_of_iterations_exit (data->current_loop,
 734                                      exit, &desc, true)
 735           && integer_zerop (desc.may_be_zero)
 736           && !contains_abnormal_ssa_name_p (desc.niter))
 737         nfe_desc->niter = desc.niter;
 738       else
 739         nfe_desc->niter = NULL_TREE;
 740     }
 741   else
 742     nfe_desc = *slot;
 743
 744   return nfe_desc->niter;
 745 }
 746
 747 /* Returns tree describing number of iterations determined from
 748    single dominating exit of DATA->current_loop, or NULL if something
 749    goes wrong.  */
 750
 751 static tree
 752 niter_for_single_dom_exit (struct ivopts_data *data)
 753 {
 754   edge exit = single_dom_exit (data->current_loop);
 755
 756   if (!exit)
 757     return NULL;
 758
 759   return niter_for_exit (data, exit);
 760 }
 761
 762 /* Initializes data structures used by the iv optimization pass, stored
 763    in DATA.  */
 764
 765 static void
 766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 767 {
 768   data->version_info_size = 2 * num_ssa_names;
 769   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 770   data->relevant = BITMAP_ALLOC (NULL);
 771   data->important_candidates = BITMAP_ALLOC (NULL);
 772   data->max_inv_id = 0;
 773   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 774   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 775   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 776   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 777 }
 778
 779 /* Returns a memory object to that EXPR points.  In case we are able to
 780    determine that it does not point to any such object, NULL is returned.  */
 781
 782 static tree
 783 determine_base_object (tree expr)
 784 {
 785   enum tree_code code = TREE_CODE (expr);
 786   tree base, obj, op0, op1;
 787
 788   /* If this is a pointer casted to any type, we need to determine
 789      the base object for the pointer; so handle conversions before
 790      throwing away non-pointer expressions.  */
 791   if (TREE_CODE (expr) == NOP_EXPR
 792       || TREE_CODE (expr) == CONVERT_EXPR)
 793     return determine_base_object (TREE_OPERAND (expr, 0));
 794
 795   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 796     return NULL_TREE;
 797
 798   switch (code)
 799     {
 800     case INTEGER_CST:
 801       return NULL_TREE;
 802
 803     case ADDR_EXPR:
 804       obj = TREE_OPERAND (expr, 0);
 805       base = get_base_address (obj);
 806
 807       if (!base)
 808         return expr;
 809
 810       if (TREE_CODE (base) == INDIRECT_REF)
 811         return determine_base_object (TREE_OPERAND (base, 0));
 812
 813       return fold_convert (ptr_type_node,
 814                            build_fold_addr_expr (base));
 815
 816     case PLUS_EXPR:
 817     case MINUS_EXPR:
 818       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 819       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 820
 821       if (!op1)
 822         return op0;
 823
 824       if (!op0)
 825         return (code == PLUS_EXPR
 826                 ? op1
 827                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 828
 829       return fold_build2 (code, ptr_type_node, op0, op1);
 830
 831     default:
 832       return fold_convert (ptr_type_node, expr);
 833     }
 834 }
 835
 836 /* Allocates an induction variable with given initial value BASE and step STEP
 837    for loop LOOP.  */
 838
 839 static struct iv *
 840 alloc_iv (tree base, tree step)
 841 {
 842   struct iv *iv = XCNEW (struct iv);
 843
 844   if (step && integer_zerop (step))
 845     step = NULL_TREE;
 846
 847   iv->base = base;
 848   iv->base_object = determine_base_object (base);
 849   iv->step = step;
 850   iv->biv_p = false;
 851   iv->have_use_for = false;
 852   iv->use_id = 0;
 853   iv->ssa_name = NULL_TREE;
 854
 855   return iv;
 856 }
 857
 858 /* Sets STEP and BASE for induction variable IV.  */
 859
 860 static void
 861 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 862 {
 863   struct version_info *info = name_info (data, iv);
 864
 865   gcc_assert (!info->iv);
 866
 867   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 868   info->iv = alloc_iv (base, step);
 869   info->iv->ssa_name = iv;
 870 }
 871
 872 /* Finds induction variable declaration for VAR.  */
 873
 874 static struct iv *
 875 get_iv (struct ivopts_data *data, tree var)
 876 {
 877   basic_block bb;
 878
 879   if (!name_info (data, var)->iv)
 880     {
 881       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 882
 883       if (!bb
 884           || !flow_bb_inside_loop_p (data->current_loop, bb))
 885         set_iv (data, var, var, NULL_TREE);
 886     }
 887
 888   return name_info (data, var)->iv;
 889 }
 890
 891 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 892    not define a simple affine biv with nonzero step.  */
 893
 894 static tree
 895 determine_biv_step (tree phi)
 896 {
 897   struct loop *loop = bb_for_stmt (phi)->loop_father;
 898   tree name = PHI_RESULT (phi);
 899   affine_iv iv;
 900
 901   if (!is_gimple_reg (name))
 902     return NULL_TREE;
 903
 904   if (!simple_iv (loop, phi, name, &iv, true))
 905     return NULL_TREE;
 906
 907   return (null_or_integer_zerop (iv.step) ? NULL_TREE : iv.step);
 908 }
 909
 910 /* Finds basic ivs.  */
 911
 912 static bool
 913 find_bivs (struct ivopts_data *data)
 914 {
 915   tree phi, step, type, base;
 916   bool found = false;
 917   struct loop *loop = data->current_loop;
 918
 919   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 920     {
 921       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 922         continue;
 923
 924       step = determine_biv_step (phi);
 925       if (!step)
 926         continue;
 927
 928       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 929       base = expand_simple_operations (base);
 930       if (contains_abnormal_ssa_name_p (base)
 931           || contains_abnormal_ssa_name_p (step))
 932         continue;
 933
 934       type = TREE_TYPE (PHI_RESULT (phi));
 935       base = fold_convert (type, base);
 936       if (step)
 937         step = fold_convert (type, step);
 938
 939       set_iv (data, PHI_RESULT (phi), base, step);
 940       found = true;
 941     }
 942
 943   return found;
 944 }
 945
 946 /* Marks basic ivs.  */
 947
 948 static void
 949 mark_bivs (struct ivopts_data *data)
 950 {
 951   tree phi, var;
 952   struct iv *iv, *incr_iv;
 953   struct loop *loop = data->current_loop;
 954   basic_block incr_bb;
 955
 956   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 957     {
 958       iv = get_iv (data, PHI_RESULT (phi));
 959       if (!iv)
 960         continue;
 961
 962       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 963       incr_iv = get_iv (data, var);
 964       if (!incr_iv)
 965         continue;
 966
 967       /* If the increment is in the subloop, ignore it.  */
 968       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 969       if (incr_bb->loop_father != data->current_loop
 970           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 971         continue;
 972
 973       iv->biv_p = true;
 974       incr_iv->biv_p = true;
 975     }
 976 }
 977
 978 /* Checks whether STMT defines a linear induction variable and stores its
 979    parameters to IV.  */
 980
 981 static bool
 982 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 983 {
 984   tree lhs;
 985   struct loop *loop = data->current_loop;
 986
 987   iv->base = NULL_TREE;
 988   iv->step = NULL_TREE;
 989
 990   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 991     return false;
 992
 993   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 994   if (TREE_CODE (lhs) != SSA_NAME)
 995     return false;
 996
 997   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 998     return false;
 999   iv->base = expand_simple_operations (iv->base);
1000
1001   if (contains_abnormal_ssa_name_p (iv->base)
1002       || contains_abnormal_ssa_name_p (iv->step))
1003     return false;
1004
1005   return true;
1006 }
1007
1008 /* Finds general ivs in statement STMT.  */
1009
1010 static void
1011 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1012 {
1013   affine_iv iv;
1014
1015   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1016     return;
1017
1018   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1019 }
1020
1021 /* Finds general ivs in basic block BB.  */
1022
1023 static void
1024 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1025 {
1026   block_stmt_iterator bsi;
1027
1028   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1029     find_givs_in_stmt (data, bsi_stmt (bsi));
1030 }
1031
1032 /* Finds general ivs.  */
1033
1034 static void
1035 find_givs (struct ivopts_data *data)
1036 {
1037   struct loop *loop = data->current_loop;
1038   basic_block *body = get_loop_body_in_dom_order (loop);
1039   unsigned i;
1040
1041   for (i = 0; i < loop->num_nodes; i++)
1042     find_givs_in_bb (data, body[i]);
1043   free (body);
1044 }
1045
1046 /* For each ssa name defined in LOOP determines whether it is an induction
1047    variable and if so, its initial value and step.  */
1048
1049 static bool
1050 find_induction_variables (struct ivopts_data *data)
1051 {
1052   unsigned i;
1053   bitmap_iterator bi;
1054
1055   if (!find_bivs (data))
1056     return false;
1057
1058   find_givs (data);
1059   mark_bivs (data);
1060
1061   if (dump_file && (dump_flags & TDF_DETAILS))
1062     {
1063       tree niter = niter_for_single_dom_exit (data);
1064
1065       if (niter)
1066         {
1067           fprintf (dump_file, "  number of iterations ");
1068           print_generic_expr (dump_file, niter, TDF_SLIM);
1069           fprintf (dump_file, "\n\n");
1070         };
1071
1072       fprintf (dump_file, "Induction variables:\n\n");
1073
1074       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1075         {
1076           if (ver_info (data, i)->iv)
1077             dump_iv (dump_file, ver_info (data, i)->iv);
1078         }
1079     }
1080
1081   return true;
1082 }
1083
1084 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1085
1086 static struct iv_use *
1087 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1088             tree stmt, enum use_type use_type)
1089 {
1090   struct iv_use *use = XCNEW (struct iv_use);
1091
1092   use->id = n_iv_uses (data);
1093   use->type = use_type;
1094   use->iv = iv;
1095   use->stmt = stmt;
1096   use->op_p = use_p;
1097   use->related_cands = BITMAP_ALLOC (NULL);
1098
1099   /* To avoid showing ssa name in the dumps, if it was not reset by the
1100      caller.  */
1101   iv->ssa_name = NULL_TREE;
1102
1103   if (dump_file && (dump_flags & TDF_DETAILS))
1104     dump_use (dump_file, use);
1105
1106   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1107
1108   return use;
1109 }
1110
1111 /* Checks whether OP is a loop-level invariant and if so, records it.
1112    NONLINEAR_USE is true if the invariant is used in a way we do not
1113    handle specially.  */
1114
1115 static void
1116 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1117 {
1118   basic_block bb;
1119   struct version_info *info;
1120
1121   if (TREE_CODE (op) != SSA_NAME
1122       || !is_gimple_reg (op))
1123     return;
1124
1125   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1126   if (bb
1127       && flow_bb_inside_loop_p (data->current_loop, bb))
1128     return;
1129
1130   info = name_info (data, op);
1131   info->name = op;
1132   info->has_nonlin_use |= nonlinear_use;
1133   if (!info->inv_id)
1134     info->inv_id = ++data->max_inv_id;
1135   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1136 }
1137
1138 /* Checks whether the use OP is interesting and if so, records it.  */
1139
1140 static struct iv_use *
1141 find_interesting_uses_op (struct ivopts_data *data, tree op)
1142 {
1143   struct iv *iv;
1144   struct iv *civ;
1145   tree stmt;
1146   struct iv_use *use;
1147
1148   if (TREE_CODE (op) != SSA_NAME)
1149     return NULL;
1150
1151   iv = get_iv (data, op);
1152   if (!iv)
1153     return NULL;
1154
1155   if (iv->have_use_for)
1156     {
1157       use = iv_use (data, iv->use_id);
1158
1159       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1160       return use;
1161     }
1162
1163   if (null_or_integer_zerop (iv->step))
1164     {
1165       record_invariant (data, op, true);
1166       return NULL;
1167     }
1168   iv->have_use_for = true;
1169
1170   civ = XNEW (struct iv);
1171   *civ = *iv;
1172
1173   stmt = SSA_NAME_DEF_STMT (op);
1174   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1175               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1176
1177   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1178   iv->use_id = use->id;
1179
1180   return use;
1181 }
1182
1183 /* Checks whether the condition *COND_P in STMT is interesting
1184    and if so, records it.  */
1185
1186 static void
1187 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1188 {
1189   tree *op0_p;
1190   tree *op1_p;
1191   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1192   struct iv const_iv;
1193   tree zero = integer_zero_node;
1194
1195   const_iv.step = NULL_TREE;
1196
1197   if (TREE_CODE (*cond_p) != SSA_NAME
1198       && !COMPARISON_CLASS_P (*cond_p))
1199     return;
1200
1201   if (TREE_CODE (*cond_p) == SSA_NAME)
1202     {
1203       op0_p = cond_p;
1204       op1_p = &zero;
1205     }
1206   else
1207     {
1208       op0_p = &TREE_OPERAND (*cond_p, 0);
1209       op1_p = &TREE_OPERAND (*cond_p, 1);
1210     }
1211
1212   if (TREE_CODE (*op0_p) == SSA_NAME)
1213     iv0 = get_iv (data, *op0_p);
1214   else
1215     iv0 = &const_iv;
1216
1217   if (TREE_CODE (*op1_p) == SSA_NAME)
1218     iv1 = get_iv (data, *op1_p);
1219   else
1220     iv1 = &const_iv;
1221
1222   if (/* When comparing with non-invariant value, we may not do any senseful
1223          induction variable elimination.  */
1224       (!iv0 || !iv1)
1225       /* Eliminating condition based on two ivs would be nontrivial.
1226          ??? TODO -- it is not really important to handle this case.  */
1227       || (!null_or_integer_zerop (iv0->step)
1228           && !null_or_integer_zerop (iv1->step)))
1229     {
1230       find_interesting_uses_op (data, *op0_p);
1231       find_interesting_uses_op (data, *op1_p);
1232       return;
1233     }
1234
1235   if (null_or_integer_zerop (iv0->step)
1236       && null_or_integer_zerop (iv1->step))
1237     {
1238       /* If both are invariants, this is a work for unswitching.  */
1239       return;
1240     }
1241
1242   civ = XNEW (struct iv);
1243   *civ = null_or_integer_zerop (iv0->step) ? *iv1: *iv0;
1244   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1245 }
1246
1247 /* Returns true if expression EXPR is obviously invariant in LOOP,
1248    i.e. if all its operands are defined outside of the LOOP.  */
1249
1250 bool
1251 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1252 {
1253   basic_block def_bb;
1254   unsigned i, len;
1255
1256   if (is_gimple_min_invariant (expr))
1257     return true;
1258
1259   if (TREE_CODE (expr) == SSA_NAME)
1260     {
1261       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1262       if (def_bb
1263           && flow_bb_inside_loop_p (loop, def_bb))
1264         return false;
1265
1266       return true;
1267     }
1268
1269   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1270     return false;
1271
1272   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1273   for (i = 0; i < len; i++)
1274     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1275       return false;
1276
1277   return true;
1278 }
1279
1280 /* Cumulates the steps of indices into DATA and replaces their values with the
1281    initial ones.  Returns false when the value of the index cannot be determined.
1282    Callback for for_each_index.  */
1283
1284 struct ifs_ivopts_data
1285 {
1286   struct ivopts_data *ivopts_data;
1287   tree stmt;
1288   tree *step_p;
1289 };
1290
1291 static bool
1292 idx_find_step (tree base, tree *idx, void *data)
1293 {
1294   struct ifs_ivopts_data *dta = data;
1295   struct iv *iv;
1296   tree step, iv_base, iv_step, lbound, off;
1297   struct loop *loop = dta->ivopts_data->current_loop;
1298
1299   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1300       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1301     return false;
1302
1303   /* If base is a component ref, require that the offset of the reference
1304      be invariant.  */
1305   if (TREE_CODE (base) == COMPONENT_REF)
1306     {
1307       off = component_ref_field_offset (base);
1308       return expr_invariant_in_loop_p (loop, off);
1309     }
1310
1311   /* If base is array, first check whether we will be able to move the
1312      reference out of the loop (in order to take its address in strength
1313      reduction).  In order for this to work we need both lower bound
1314      and step to be loop invariants.  */
1315   if (TREE_CODE (base) == ARRAY_REF)
1316     {
1317       step = array_ref_element_size (base);
1318       lbound = array_ref_low_bound (base);
1319
1320       if (!expr_invariant_in_loop_p (loop, step)
1321           || !expr_invariant_in_loop_p (loop, lbound))
1322         return false;
1323     }
1324
1325   if (TREE_CODE (*idx) != SSA_NAME)
1326     return true;
1327
1328   iv = get_iv (dta->ivopts_data, *idx);
1329   if (!iv)
1330     return false;
1331
1332   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1333           *&x[0], which is not folded and does not trigger the
1334           ARRAY_REF path below.  */
1335   *idx = iv->base;
1336
1337   if (!iv->step)
1338     return true;
1339
1340   if (TREE_CODE (base) == ARRAY_REF)
1341     {
1342       step = array_ref_element_size (base);
1343
1344       /* We only handle addresses whose step is an integer constant.  */
1345       if (TREE_CODE (step) != INTEGER_CST)
1346         return false;
1347     }
1348   else
1349     /* The step for pointer arithmetics already is 1 byte.  */
1350     step = build_int_cst (sizetype, 1);
1351
1352   iv_base = iv->base;
1353   iv_step = iv->step;
1354   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1355                             sizetype, &iv_base, &iv_step, dta->stmt,
1356                             false))
1357     {
1358       /* The index might wrap.  */
1359       return false;
1360     }
1361
1362   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1363
1364   if (!*dta->step_p)
1365     *dta->step_p = step;
1366   else
1367     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1368
1369   return true;
1370 }
1371
1372 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1373    object is passed to it in DATA.  */
1374
1375 static bool
1376 idx_record_use (tree base, tree *idx,
1377                 void *data)
1378 {
1379   find_interesting_uses_op (data, *idx);
1380   if (TREE_CODE (base) == ARRAY_REF)
1381     {
1382       find_interesting_uses_op (data, array_ref_element_size (base));
1383       find_interesting_uses_op (data, array_ref_low_bound (base));
1384     }
1385   return true;
1386 }
1387
1388 /* Returns true if memory reference REF may be unaligned.  */
1389
1390 static bool
1391 may_be_unaligned_p (tree ref)
1392 {
1393   tree base;
1394   tree base_type;
1395   HOST_WIDE_INT bitsize;
1396   HOST_WIDE_INT bitpos;
1397   tree toffset;
1398   enum machine_mode mode;
1399   int unsignedp, volatilep;
1400   unsigned base_align;
1401
1402   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1403      thus they are not misaligned.  */
1404   if (TREE_CODE (ref) == TARGET_MEM_REF)
1405     return false;
1406
1407   /* The test below is basically copy of what expr.c:normal_inner_ref
1408      does to check whether the object must be loaded by parts when
1409      STRICT_ALIGNMENT is true.  */
1410   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1411                               &unsignedp, &volatilep, true);
1412   base_type = TREE_TYPE (base);
1413   base_align = TYPE_ALIGN (base_type);
1414
1415   if (mode != BLKmode
1416       && (base_align < GET_MODE_ALIGNMENT (mode)
1417           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1418           || bitpos % BITS_PER_UNIT != 0))
1419     return true;
1420
1421   return false;
1422 }
1423
1424 /* Return true if EXPR may be non-addressable.   */
1425
1426 static bool
1427 may_be_nonaddressable_p (tree expr)
1428 {
1429   switch (TREE_CODE (expr))
1430     {
1431     case COMPONENT_REF:
1432       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1433              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1434
1435     case ARRAY_REF:
1436     case ARRAY_RANGE_REF:
1437       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1438
1439     case VIEW_CONVERT_EXPR:
1440       /* This kind of view-conversions may wrap non-addressable objects
1441          and make them look addressable.  After some processing the
1442          non-addressability may be uncovered again, causing ADDR_EXPRs
1443          of inappropriate objects to be built.  */
1444       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1445              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1446
1447     default:
1448       break;
1449     }
1450
1451   return false;
1452 }
1453
1454 /* Finds addresses in *OP_P inside STMT.  */
1455
1456 static void
1457 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1458 {
1459   tree base = *op_p, step = NULL;
1460   struct iv *civ;
1461   struct ifs_ivopts_data ifs_ivopts_data;
1462
1463   /* Do not play with volatile memory references.  A bit too conservative,
1464      perhaps, but safe.  */
1465   if (stmt_ann (stmt)->has_volatile_ops)
1466     goto fail;
1467
1468   /* Ignore bitfields for now.  Not really something terribly complicated
1469      to handle.  TODO.  */
1470   if (TREE_CODE (base) == BIT_FIELD_REF)
1471     goto fail;
1472
1473   if (may_be_nonaddressable_p (base))
1474     goto fail;
1475
1476   if (STRICT_ALIGNMENT
1477       && may_be_unaligned_p (base))
1478     goto fail;
1479
1480   base = unshare_expr (base);
1481
1482   if (TREE_CODE (base) == TARGET_MEM_REF)
1483     {
1484       tree type = build_pointer_type (TREE_TYPE (base));
1485       tree astep;
1486
1487       if (TMR_BASE (base)
1488           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1489         {
1490           civ = get_iv (data, TMR_BASE (base));
1491           if (!civ)
1492             goto fail;
1493
1494           TMR_BASE (base) = civ->base;
1495           step = civ->step;
1496         }
1497       if (TMR_INDEX (base)
1498           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1499         {
1500           civ = get_iv (data, TMR_INDEX (base));
1501           if (!civ)
1502             goto fail;
1503
1504           TMR_INDEX (base) = civ->base;
1505           astep = civ->step;
1506
1507           if (astep)
1508             {
1509               if (TMR_STEP (base))
1510                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1511
1512               if (step)
1513                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1514               else
1515                 step = astep;
1516             }
1517         }
1518
1519       if (null_or_integer_zerop (step))
1520         goto fail;
1521       base = tree_mem_ref_addr (type, base);
1522     }
1523   else
1524     {
1525       ifs_ivopts_data.ivopts_data = data;
1526       ifs_ivopts_data.stmt = stmt;
1527       ifs_ivopts_data.step_p = &step;
1528       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1529           || null_or_integer_zerop (step))
1530         goto fail;
1531
1532       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1533       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1534
1535       base = build_fold_addr_expr (base);
1536
1537       /* Substituting bases of IVs into the base expression might
1538          have caused folding opportunities.  */
1539       if (TREE_CODE (base) == ADDR_EXPR)
1540         {
1541           tree *ref = &TREE_OPERAND (base, 0);
1542           while (handled_component_p (*ref))
1543             ref = &TREE_OPERAND (*ref, 0);
1544           if (TREE_CODE (*ref) == INDIRECT_REF)
1545             *ref = fold_indirect_ref (*ref);
1546         }
1547     }
1548
1549   civ = alloc_iv (base, step);
1550   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1551   return;
1552
1553 fail:
1554   for_each_index (op_p, idx_record_use, data);
1555 }
1556
1557 /* Finds and records invariants used in STMT.  */
1558
1559 static void
1560 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1561 {
1562   ssa_op_iter iter;
1563   use_operand_p use_p;
1564   tree op;
1565
1566   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1567     {
1568       op = USE_FROM_PTR (use_p);
1569       record_invariant (data, op, false);
1570     }
1571 }
1572
1573 /* Finds interesting uses of induction variables in the statement STMT.  */
1574
1575 static void
1576 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1577 {
1578   struct iv *iv;
1579   tree op, lhs, rhs;
1580   ssa_op_iter iter;
1581   use_operand_p use_p;
1582
1583   find_invariants_stmt (data, stmt);
1584
1585   if (TREE_CODE (stmt) == COND_EXPR)
1586     {
1587       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1588       return;
1589     }
1590
1591   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1592     {
1593       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1594       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1595
1596       if (TREE_CODE (lhs) == SSA_NAME)
1597         {
1598           /* If the statement defines an induction variable, the uses are not
1599              interesting by themselves.  */
1600
1601           iv = get_iv (data, lhs);
1602
1603           if (iv && !null_or_integer_zerop (iv->step))
1604             return;
1605         }
1606
1607       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1608         {
1609         case tcc_comparison:
1610           find_interesting_uses_cond (data, stmt,
1611                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1612           return;
1613
1614         case tcc_reference:
1615           find_interesting_uses_address (data, stmt,
1616                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1617           if (REFERENCE_CLASS_P (lhs))
1618             find_interesting_uses_address (data, stmt,
1619                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1620           return;
1621
1622         default: ;
1623         }
1624
1625       if (REFERENCE_CLASS_P (lhs)
1626           && is_gimple_val (rhs))
1627         {
1628           find_interesting_uses_address (data, stmt,
1629                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1630           find_interesting_uses_op (data, rhs);
1631           return;
1632         }
1633
1634       /* TODO -- we should also handle address uses of type
1635
1636          memory = call (whatever);
1637
1638          and
1639
1640          call (memory).  */
1641     }
1642
1643   if (TREE_CODE (stmt) == PHI_NODE
1644       && bb_for_stmt (stmt) == data->current_loop->header)
1645     {
1646       lhs = PHI_RESULT (stmt);
1647       iv = get_iv (data, lhs);
1648
1649       if (iv && !null_or_integer_zerop (iv->step))
1650         return;
1651     }
1652
1653   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1654     {
1655       op = USE_FROM_PTR (use_p);
1656
1657       if (TREE_CODE (op) != SSA_NAME)
1658         continue;
1659
1660       iv = get_iv (data, op);
1661       if (!iv)
1662         continue;
1663
1664       find_interesting_uses_op (data, op);
1665     }
1666 }
1667
1668 /* Finds interesting uses of induction variables outside of loops
1669    on loop exit edge EXIT.  */
1670
1671 static void
1672 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1673 {
1674   tree phi, def;
1675
1676   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1677     {
1678       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1679       find_interesting_uses_op (data, def);
1680     }
1681 }
1682
1683 /* Finds uses of the induction variables that are interesting.  */
1684
1685 static void
1686 find_interesting_uses (struct ivopts_data *data)
1687 {
1688   basic_block bb;
1689   block_stmt_iterator bsi;
1690   tree phi;
1691   basic_block *body = get_loop_body (data->current_loop);
1692   unsigned i;
1693   struct version_info *info;
1694   edge e;
1695
1696   if (dump_file && (dump_flags & TDF_DETAILS))
1697     fprintf (dump_file, "Uses:\n\n");
1698
1699   for (i = 0; i < data->current_loop->num_nodes; i++)
1700     {
1701       edge_iterator ei;
1702       bb = body[i];
1703
1704       FOR_EACH_EDGE (e, ei, bb->succs)
1705         if (e->dest != EXIT_BLOCK_PTR
1706             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1707           find_interesting_uses_outside (data, e);
1708
1709       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1710         find_interesting_uses_stmt (data, phi);
1711       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1712         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1713     }
1714
1715   if (dump_file && (dump_flags & TDF_DETAILS))
1716     {
1717       bitmap_iterator bi;
1718
1719       fprintf (dump_file, "\n");
1720
1721       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1722         {
1723           info = ver_info (data, i);
1724           if (info->inv_id)
1725             {
1726               fprintf (dump_file, "  ");
1727               print_generic_expr (dump_file, info->name, TDF_SLIM);
1728               fprintf (dump_file, " is invariant (%d)%s\n",
1729                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1730             }
1731         }
1732
1733       fprintf (dump_file, "\n");
1734     }
1735
1736   free (body);
1737 }
1738
1739 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1740    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1741    we are at the top-level of the processed address.  */
1742
1743 static tree
1744 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1745                 unsigned HOST_WIDE_INT *offset)
1746 {
1747   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1748   enum tree_code code;
1749   tree type, orig_type = TREE_TYPE (expr);
1750   unsigned HOST_WIDE_INT off0, off1, st;
1751   tree orig_expr = expr;
1752
1753   STRIP_NOPS (expr);
1754
1755   type = TREE_TYPE (expr);
1756   code = TREE_CODE (expr);
1757   *offset = 0;
1758
1759   switch (code)
1760     {
1761     case INTEGER_CST:
1762       if (!cst_and_fits_in_hwi (expr)
1763           || integer_zerop (expr))
1764         return orig_expr;
1765
1766       *offset = int_cst_value (expr);
1767       return build_int_cst (orig_type, 0);
1768
1769     case PLUS_EXPR:
1770     case MINUS_EXPR:
1771       op0 = TREE_OPERAND (expr, 0);
1772       op1 = TREE_OPERAND (expr, 1);
1773
1774       op0 = strip_offset_1 (op0, false, false, &off0);
1775       op1 = strip_offset_1 (op1, false, false, &off1);
1776
1777       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1778       if (op0 == TREE_OPERAND (expr, 0)
1779           && op1 == TREE_OPERAND (expr, 1))
1780         return orig_expr;
1781
1782       if (integer_zerop (op1))
1783         expr = op0;
1784       else if (integer_zerop (op0))
1785         {
1786           if (code == PLUS_EXPR)
1787             expr = op1;
1788           else
1789             expr = fold_build1 (NEGATE_EXPR, type, op1);
1790         }
1791       else
1792         expr = fold_build2 (code, type, op0, op1);
1793
1794       return fold_convert (orig_type, expr);
1795
1796     case ARRAY_REF:
1797       if (!inside_addr)
1798         return orig_expr;
1799
1800       step = array_ref_element_size (expr);
1801       if (!cst_and_fits_in_hwi (step))
1802         break;
1803
1804       st = int_cst_value (step);
1805       op1 = TREE_OPERAND (expr, 1);
1806       op1 = strip_offset_1 (op1, false, false, &off1);
1807       *offset = off1 * st;
1808
1809       if (top_compref
1810           && integer_zerop (op1))
1811         {
1812           /* Strip the component reference completely.  */
1813           op0 = TREE_OPERAND (expr, 0);
1814           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1815           *offset += off0;
1816           return op0;
1817         }
1818       break;
1819
1820     case COMPONENT_REF:
1821       if (!inside_addr)
1822         return orig_expr;
1823
1824       tmp = component_ref_field_offset (expr);
1825       if (top_compref
1826           && cst_and_fits_in_hwi (tmp))
1827         {
1828           /* Strip the component reference completely.  */
1829           op0 = TREE_OPERAND (expr, 0);
1830           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1831           *offset = off0 + int_cst_value (tmp);
1832           return op0;
1833         }
1834       break;
1835
1836     case ADDR_EXPR:
1837       op0 = TREE_OPERAND (expr, 0);
1838       op0 = strip_offset_1 (op0, true, true, &off0);
1839       *offset += off0;
1840
1841       if (op0 == TREE_OPERAND (expr, 0))
1842         return orig_expr;
1843
1844       expr = build_fold_addr_expr (op0);
1845       return fold_convert (orig_type, expr);
1846
1847     case INDIRECT_REF:
1848       inside_addr = false;
1849       break;
1850
1851     default:
1852       return orig_expr;
1853     }
1854
1855   /* Default handling of expressions for that we want to recurse into
1856      the first operand.  */
1857   op0 = TREE_OPERAND (expr, 0);
1858   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1859   *offset += off0;
1860
1861   if (op0 == TREE_OPERAND (expr, 0)
1862       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1863     return orig_expr;
1864
1865   expr = copy_node (expr);
1866   TREE_OPERAND (expr, 0) = op0;
1867   if (op1)
1868     TREE_OPERAND (expr, 1) = op1;
1869
1870   /* Inside address, we might strip the top level component references,
1871      thus changing type of the expression.  Handling of ADDR_EXPR
1872      will fix that.  */
1873   expr = fold_convert (orig_type, expr);
1874
1875   return expr;
1876 }
1877
1878 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1879
1880 static tree
1881 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1882 {
1883   return strip_offset_1 (expr, false, false, offset);
1884 }
1885
1886 /* Returns variant of TYPE that can be used as base for different uses.
1887    We return unsigned type with the same precision, which avoids problems
1888    with overflows.  */
1889
1890 static tree
1891 generic_type_for (tree type)
1892 {
1893   if (POINTER_TYPE_P (type))
1894     return unsigned_type_for (type);
1895
1896   if (TYPE_UNSIGNED (type))
1897     return type;
1898
1899   return unsigned_type_for (type);
1900 }
1901
1902 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1903    the bitmap to that we should store it.  */
1904
1905 static struct ivopts_data *fd_ivopts_data;
1906 static tree
1907 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1908 {
1909   bitmap *depends_on = data;
1910   struct version_info *info;
1911
1912   if (TREE_CODE (*expr_p) != SSA_NAME)
1913     return NULL_TREE;
1914   info = name_info (fd_ivopts_data, *expr_p);
1915
1916   if (!info->inv_id || info->has_nonlin_use)
1917     return NULL_TREE;
1918
1919   if (!*depends_on)
1920     *depends_on = BITMAP_ALLOC (NULL);
1921   bitmap_set_bit (*depends_on, info->inv_id);
1922
1923   return NULL_TREE;
1924 }
1925
1926 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1927    position to POS.  If USE is not NULL, the candidate is set as related to
1928    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1929    replacement of the final value of the iv by a direct computation.  */
1930
1931 static struct iv_cand *
1932 add_candidate_1 (struct ivopts_data *data,
1933                  tree base, tree step, bool important, enum iv_position pos,
1934                  struct iv_use *use, tree incremented_at)
1935 {
1936   unsigned i;
1937   struct iv_cand *cand = NULL;
1938   tree type, orig_type;
1939
1940   if (base)
1941     {
1942       orig_type = TREE_TYPE (base);
1943       type = generic_type_for (orig_type);
1944       if (type != orig_type)
1945         {
1946           base = fold_convert (type, base);
1947           if (step)
1948             step = fold_convert (type, step);
1949         }
1950     }
1951
1952   for (i = 0; i < n_iv_cands (data); i++)
1953     {
1954       cand = iv_cand (data, i);
1955
1956       if (cand->pos != pos)
1957         continue;
1958
1959       if (cand->incremented_at != incremented_at)
1960         continue;
1961
1962       if (!cand->iv)
1963         {
1964           if (!base && !step)
1965             break;
1966
1967           continue;
1968         }
1969
1970       if (!base && !step)
1971         continue;
1972
1973       if (!operand_equal_p (base, cand->iv->base, 0))
1974         continue;
1975
1976       if (null_or_integer_zerop (cand->iv->step))
1977         {
1978           if (null_or_integer_zerop (step))
1979             break;
1980         }
1981       else
1982         {
1983           if (step && operand_equal_p (step, cand->iv->step, 0))
1984             break;
1985         }
1986     }
1987
1988   if (i == n_iv_cands (data))
1989     {
1990       cand = XCNEW (struct iv_cand);
1991       cand->id = i;
1992
1993       if (!base && !step)
1994         cand->iv = NULL;
1995       else
1996         cand->iv = alloc_iv (base, step);
1997
1998       cand->pos = pos;
1999       if (pos != IP_ORIGINAL && cand->iv)
2000         {
2001           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2002           cand->var_after = cand->var_before;
2003         }
2004       cand->important = important;
2005       cand->incremented_at = incremented_at;
2006       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2007
2008       if (step
2009           && TREE_CODE (step) != INTEGER_CST)
2010         {
2011           fd_ivopts_data = data;
2012           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2013         }
2014
2015       if (dump_file && (dump_flags & TDF_DETAILS))
2016         dump_cand (dump_file, cand);
2017     }
2018
2019   if (important && !cand->important)
2020     {
2021       cand->important = true;
2022       if (dump_file && (dump_flags & TDF_DETAILS))
2023         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2024     }
2025
2026   if (use)
2027     {
2028       bitmap_set_bit (use->related_cands, i);
2029       if (dump_file && (dump_flags & TDF_DETAILS))
2030         fprintf (dump_file, "Candidate %d is related to use %d\n",
2031                  cand->id, use->id);
2032     }
2033
2034   return cand;
2035 }
2036
2037 /* Returns true if incrementing the induction variable at the end of the LOOP
2038    is allowed.
2039
2040    The purpose is to avoid splitting latch edge with a biv increment, thus
2041    creating a jump, possibly confusing other optimization passes and leaving
2042    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2043    is not available (so we do not have a better alternative), or if the latch
2044    edge is already nonempty.  */
2045
2046 static bool
2047 allow_ip_end_pos_p (struct loop *loop)
2048 {
2049   if (!ip_normal_pos (loop))
2050     return true;
2051
2052   if (!empty_block_p (ip_end_pos (loop)))
2053     return true;
2054
2055   return false;
2056 }
2057
2058 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2059    position to POS.  If USE is not NULL, the candidate is set as related to
2060    it.  The candidate computation is scheduled on all available positions.  */
2061
2062 static void
2063 add_candidate (struct ivopts_data *data,
2064                tree base, tree step, bool important, struct iv_use *use)
2065 {
2066   if (ip_normal_pos (data->current_loop))
2067     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2068   if (ip_end_pos (data->current_loop)
2069       && allow_ip_end_pos_p (data->current_loop))
2070     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2071 }
2072
2073 /* Add a standard "0 + 1 * iteration" iv candidate for a
2074    type with SIZE bits.  */
2075
2076 static void
2077 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2078                                      unsigned int size)
2079 {
2080   tree type = lang_hooks.types.type_for_size (size, true);
2081   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2082                  true, NULL);
2083 }
2084
2085 /* Adds standard iv candidates.  */
2086
2087 static void
2088 add_standard_iv_candidates (struct ivopts_data *data)
2089 {
2090   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2091
2092   /* The same for a double-integer type if it is still fast enough.  */
2093   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2094     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2095 }
2096
2097
2098 /* Adds candidates bases on the old induction variable IV.  */
2099
2100 static void
2101 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2102 {
2103   tree phi, def;
2104   struct iv_cand *cand;
2105
2106   add_candidate (data, iv->base, iv->step, true, NULL);
2107
2108   /* The same, but with initial value zero.  */
2109   add_candidate (data,
2110                  build_int_cst (TREE_TYPE (iv->base), 0),
2111                  iv->step, true, NULL);
2112
2113   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2114   if (TREE_CODE (phi) == PHI_NODE)
2115     {
2116       /* Additionally record the possibility of leaving the original iv
2117          untouched.  */
2118       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2119       cand = add_candidate_1 (data,
2120                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2121                               SSA_NAME_DEF_STMT (def));
2122       cand->var_before = iv->ssa_name;
2123       cand->var_after = def;
2124     }
2125 }
2126
2127 /* Adds candidates based on the old induction variables.  */
2128
2129 static void
2130 add_old_ivs_candidates (struct ivopts_data *data)
2131 {
2132   unsigned i;
2133   struct iv *iv;
2134   bitmap_iterator bi;
2135
2136   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2137     {
2138       iv = ver_info (data, i)->iv;
2139       if (iv && iv->biv_p && !null_or_integer_zerop (iv->step))
2140         add_old_iv_candidates (data, iv);
2141     }
2142 }
2143
2144 /* Adds candidates based on the value of the induction variable IV and USE.  */
2145
2146 static void
2147 add_iv_value_candidates (struct ivopts_data *data,
2148                          struct iv *iv, struct iv_use *use)
2149 {
2150   unsigned HOST_WIDE_INT offset;
2151   tree base;
2152
2153   add_candidate (data, iv->base, iv->step, false, use);
2154
2155   /* The same, but with initial value zero.  Make such variable important,
2156      since it is generic enough so that possibly many uses may be based
2157      on it.  */
2158   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2159                  iv->step, true, use);
2160
2161   /* Third, try removing the constant offset.  */
2162   base = strip_offset (iv->base, &offset);
2163   if (offset)
2164     add_candidate (data, base, iv->step, false, use);
2165 }
2166
2167 /* Adds candidates based on the uses.  */
2168
2169 static void
2170 add_derived_ivs_candidates (struct ivopts_data *data)
2171 {
2172   unsigned i;
2173
2174   for (i = 0; i < n_iv_uses (data); i++)
2175     {
2176       struct iv_use *use = iv_use (data, i);
2177
2178       if (!use)
2179         continue;
2180
2181       switch (use->type)
2182         {
2183         case USE_NONLINEAR_EXPR:
2184         case USE_COMPARE:
2185         case USE_ADDRESS:
2186           /* Just add the ivs based on the value of the iv used here.  */
2187           add_iv_value_candidates (data, use->iv, use);
2188           break;
2189
2190         default:
2191           gcc_unreachable ();
2192         }
2193     }
2194 }
2195
2196 /* Record important candidates and add them to related_cands bitmaps
2197    if needed.  */
2198
2199 static void
2200 record_important_candidates (struct ivopts_data *data)
2201 {
2202   unsigned i;
2203   struct iv_use *use;
2204
2205   for (i = 0; i < n_iv_cands (data); i++)
2206     {
2207       struct iv_cand *cand = iv_cand (data, i);
2208
2209       if (cand->important)
2210         bitmap_set_bit (data->important_candidates, i);
2211     }
2212
2213   data->consider_all_candidates = (n_iv_cands (data)
2214                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2215
2216   if (data->consider_all_candidates)
2217     {
2218       /* We will not need "related_cands" bitmaps in this case,
2219          so release them to decrease peak memory consumption.  */
2220       for (i = 0; i < n_iv_uses (data); i++)
2221         {
2222           use = iv_use (data, i);
2223           BITMAP_FREE (use->related_cands);
2224         }
2225     }
2226   else
2227     {
2228       /* Add important candidates to the related_cands bitmaps.  */
2229       for (i = 0; i < n_iv_uses (data); i++)
2230         bitmap_ior_into (iv_use (data, i)->related_cands,
2231                          data->important_candidates);
2232     }
2233 }
2234
2235 /* Finds the candidates for the induction variables.  */
2236
2237 static void
2238 find_iv_candidates (struct ivopts_data *data)
2239 {
2240   /* Add commonly used ivs.  */
2241   add_standard_iv_candidates (data);
2242
2243   /* Add old induction variables.  */
2244   add_old_ivs_candidates (data);
2245
2246   /* Add induction variables derived from uses.  */
2247   add_derived_ivs_candidates (data);
2248
2249   /* Record the important candidates.  */
2250   record_important_candidates (data);
2251 }
2252
2253 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2254    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2255    we allocate a simple list to every use.  */
2256
2257 static void
2258 alloc_use_cost_map (struct ivopts_data *data)
2259 {
2260   unsigned i, size, s, j;
2261
2262   for (i = 0; i < n_iv_uses (data); i++)
2263     {
2264       struct iv_use *use = iv_use (data, i);
2265       bitmap_iterator bi;
2266
2267       if (data->consider_all_candidates)
2268         size = n_iv_cands (data);
2269       else
2270         {
2271           s = 0;
2272           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2273             {
2274               s++;
2275             }
2276
2277           /* Round up to the power of two, so that moduling by it is fast.  */
2278           for (size = 1; size < s; size <<= 1)
2279             continue;
2280         }
2281
2282       use->n_map_members = size;
2283       use->cost_map = XCNEWVEC (struct cost_pair, size);
2284     }
2285 }
2286
2287 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2288    on invariants DEPENDS_ON and that the value used in expressing it
2289    is VALUE.*/
2290
2291 static void
2292 set_use_iv_cost (struct ivopts_data *data,
2293                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2294                  bitmap depends_on, tree value)
2295 {
2296   unsigned i, s;
2297
2298   if (cost == INFTY)
2299     {
2300       BITMAP_FREE (depends_on);
2301       return;
2302     }
2303
2304   if (data->consider_all_candidates)
2305     {
2306       use->cost_map[cand->id].cand = cand;
2307       use->cost_map[cand->id].cost = cost;
2308       use->cost_map[cand->id].depends_on = depends_on;
2309       use->cost_map[cand->id].value = value;
2310       return;
2311     }
2312
2313   /* n_map_members is a power of two, so this computes modulo.  */
2314   s = cand->id & (use->n_map_members - 1);
2315   for (i = s; i < use->n_map_members; i++)
2316     if (!use->cost_map[i].cand)
2317       goto found;
2318   for (i = 0; i < s; i++)
2319     if (!use->cost_map[i].cand)
2320       goto found;
2321
2322   gcc_unreachable ();
2323
2324 found:
2325   use->cost_map[i].cand = cand;
2326   use->cost_map[i].cost = cost;
2327   use->cost_map[i].depends_on = depends_on;
2328   use->cost_map[i].value = value;
2329 }
2330
2331 /* Gets cost of (USE, CANDIDATE) pair.  */
2332
2333 static struct cost_pair *
2334 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2335                  struct iv_cand *cand)
2336 {
2337   unsigned i, s;
2338   struct cost_pair *ret;
2339
2340   if (!cand)
2341     return NULL;
2342
2343   if (data->consider_all_candidates)
2344     {
2345       ret = use->cost_map + cand->id;
2346       if (!ret->cand)
2347         return NULL;
2348
2349       return ret;
2350     }
2351
2352   /* n_map_members is a power of two, so this computes modulo.  */
2353   s = cand->id & (use->n_map_members - 1);
2354   for (i = s; i < use->n_map_members; i++)
2355     if (use->cost_map[i].cand == cand)
2356       return use->cost_map + i;
2357
2358   for (i = 0; i < s; i++)
2359     if (use->cost_map[i].cand == cand)
2360       return use->cost_map + i;
2361
2362   return NULL;
2363 }
2364
2365 /* Returns estimate on cost of computing SEQ.  */
2366
2367 static unsigned
2368 seq_cost (rtx seq)
2369 {
2370   unsigned cost = 0;
2371   rtx set;
2372
2373   for (; seq; seq = NEXT_INSN (seq))
2374     {
2375       set = single_set (seq);
2376       if (set)
2377         cost += rtx_cost (set, SET);
2378       else
2379         cost++;
2380     }
2381
2382   return cost;
2383 }
2384
2385 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2386 static rtx
2387 produce_memory_decl_rtl (tree obj, int *regno)
2388 {
2389   rtx x;
2390
2391   gcc_assert (obj);
2392   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2393     {
2394       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2395       x = gen_rtx_SYMBOL_REF (Pmode, name);
2396     }
2397   else
2398     x = gen_raw_REG (Pmode, (*regno)++);
2399
2400   return gen_rtx_MEM (DECL_MODE (obj), x);
2401 }
2402
2403 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2404    walk_tree.  DATA contains the actual fake register number.  */
2405
2406 static tree
2407 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2408 {
2409   tree obj = NULL_TREE;
2410   rtx x = NULL_RTX;
2411   int *regno = data;
2412
2413   switch (TREE_CODE (*expr_p))
2414     {
2415     case ADDR_EXPR:
2416       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2417            handled_component_p (*expr_p);
2418            expr_p = &TREE_OPERAND (*expr_p, 0))
2419         continue;
2420       obj = *expr_p;
2421       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2422         x = produce_memory_decl_rtl (obj, regno);
2423       break;
2424
2425     case SSA_NAME:
2426       *ws = 0;
2427       obj = SSA_NAME_VAR (*expr_p);
2428       if (!DECL_RTL_SET_P (obj))
2429         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2430       break;
2431
2432     case VAR_DECL:
2433     case PARM_DECL:
2434     case RESULT_DECL:
2435       *ws = 0;
2436       obj = *expr_p;
2437
2438       if (DECL_RTL_SET_P (obj))
2439         break;
2440
2441       if (DECL_MODE (obj) == BLKmode)
2442         x = produce_memory_decl_rtl (obj, regno);
2443       else
2444         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2445
2446       break;
2447
2448     default:
2449       break;
2450     }
2451
2452   if (x)
2453     {
2454       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2455       SET_DECL_RTL (obj, x);
2456     }
2457
2458   return NULL_TREE;
2459 }
2460
2461 /* Determines cost of the computation of EXPR.  */
2462
2463 static unsigned
2464 computation_cost (tree expr)
2465 {
2466   rtx seq, rslt;
2467   tree type = TREE_TYPE (expr);
2468   unsigned cost;
2469   /* Avoid using hard regs in ways which may be unsupported.  */
2470   int regno = LAST_VIRTUAL_REGISTER + 1;
2471
2472   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2473   start_sequence ();
2474   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2475   seq = get_insns ();
2476   end_sequence ();
2477
2478   cost = seq_cost (seq);
2479   if (MEM_P (rslt))
2480     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2481
2482   return cost;
2483 }
2484
2485 /* Returns variable containing the value of candidate CAND at statement AT.  */
2486
2487 static tree
2488 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2489 {
2490   if (stmt_after_increment (loop, cand, stmt))
2491     return cand->var_after;
2492   else
2493     return cand->var_before;
2494 }
2495
2496 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2497    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2498
2499 int
2500 tree_int_cst_sign_bit (tree t)
2501 {
2502   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2503   unsigned HOST_WIDE_INT w;
2504
2505   if (bitno < HOST_BITS_PER_WIDE_INT)
2506     w = TREE_INT_CST_LOW (t);
2507   else
2508     {
2509       w = TREE_INT_CST_HIGH (t);
2510       bitno -= HOST_BITS_PER_WIDE_INT;
2511     }
2512
2513   return (w >> bitno) & 1;
2514 }
2515
2516 /* If we can prove that TOP = cst * BOT for some constant cst,
2517    store cst to MUL and return true.  Otherwise return false.
2518    The returned value is always sign-extended, regardless of the
2519    signedness of TOP and BOT.  */
2520
2521 static bool
2522 constant_multiple_of (tree top, tree bot, double_int *mul)
2523 {
2524   tree mby;
2525   enum tree_code code;
2526   double_int res, p0, p1;
2527   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2528
2529   STRIP_NOPS (top);
2530   STRIP_NOPS (bot);
2531
2532   if (operand_equal_p (top, bot, 0))
2533     {
2534       *mul = double_int_one;
2535       return true;
2536     }
2537
2538   code = TREE_CODE (top);
2539   switch (code)
2540     {
2541     case MULT_EXPR:
2542       mby = TREE_OPERAND (top, 1);
2543       if (TREE_CODE (mby) != INTEGER_CST)
2544         return false;
2545
2546       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2547         return false;
2548
2549       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2550                               precision);
2551       return true;
2552
2553     case PLUS_EXPR:
2554     case MINUS_EXPR:
2555       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2556           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2557         return false;
2558
2559       if (code == MINUS_EXPR)
2560         p1 = double_int_neg (p1);
2561       *mul = double_int_sext (double_int_add (p0, p1), precision);
2562       return true;
2563
2564     case INTEGER_CST:
2565       if (TREE_CODE (bot) != INTEGER_CST)
2566         return false;
2567
2568       p0 = double_int_sext (tree_to_double_int (top), precision);
2569       p1 = double_int_sext (tree_to_double_int (bot), precision);
2570       if (double_int_zero_p (p1))
2571         return false;
2572       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2573                               precision);
2574       return double_int_zero_p (res);
2575
2576     default:
2577       return false;
2578     }
2579 }
2580
2581 /* Folds EXPR using the affine expressions framework.  */
2582
2583 static tree
2584 fold_affine_expr (tree expr)
2585 {
2586   tree type = TREE_TYPE (expr);
2587   struct affine_tree_combination comb;
2588
2589   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2590     return expr;
2591
2592   tree_to_aff_combination (expr, type, &comb);
2593   return aff_combination_to_tree (&comb);
2594 }
2595
2596 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2597    same precision that is at least as wide as the precision of TYPE, stores
2598    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2599    type of A and B.  */
2600
2601 static tree
2602 determine_common_wider_type (tree *a, tree *b)
2603 {
2604   tree wider_type = NULL;
2605   tree suba, subb;
2606   tree atype = TREE_TYPE (*a);
2607
2608   if ((TREE_CODE (*a) == NOP_EXPR
2609        || TREE_CODE (*a) == CONVERT_EXPR))
2610     {
2611       suba = TREE_OPERAND (*a, 0);
2612       wider_type = TREE_TYPE (suba);
2613       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2614         return atype;
2615     }
2616   else
2617     return atype;
2618
2619   if ((TREE_CODE (*b) == NOP_EXPR
2620        || TREE_CODE (*b) == CONVERT_EXPR))
2621     {
2622       subb = TREE_OPERAND (*b, 0);
2623       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2624         return atype;
2625     }
2626   else
2627     return atype;
2628
2629   *a = suba;
2630   *b = subb;
2631   return wider_type;
2632 }
2633
2634 /* Determines the expression by that USE is expressed from induction variable
2635    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2636    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2637
2638 static bool
2639 get_computation_aff (struct loop *loop,
2640                      struct iv_use *use, struct iv_cand *cand, tree at,
2641                      struct affine_tree_combination *aff)
2642 {
2643   tree ubase = use->iv->base;
2644   tree ustep = use->iv->step;
2645   tree cbase = cand->iv->base;
2646   tree cstep = cand->iv->step, cstep_common;
2647   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2648   tree common_type, var;
2649   tree uutype;
2650   aff_tree cbase_aff, var_aff;
2651   double_int rat;
2652
2653   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2654     {
2655       /* We do not have a precision to express the values of use.  */
2656       return false;
2657     }
2658
2659   var = var_at_stmt (loop, cand, at);
2660   uutype = unsigned_type_for (utype);
2661
2662   /* If the conversion is not noop, perform it.  */
2663   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2664     {
2665       cstep = fold_convert (uutype, cstep);
2666       cbase = fold_convert (uutype, cbase);
2667       var = fold_convert (uutype, var);
2668     }
2669
2670   if (!constant_multiple_of (ustep, cstep, &rat))
2671     return false;
2672
2673   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2674      type, we achieve better folding by computing their difference in this
2675      wider type, and cast the result to UUTYPE.  We do not need to worry about
2676      overflows, as all the arithmetics will in the end be performed in UUTYPE
2677      anyway.  */
2678   common_type = determine_common_wider_type (&ubase, &cbase);
2679
2680   /* use = ubase - ratio * cbase + ratio * var.  */
2681   tree_to_aff_combination (ubase, common_type, aff);
2682   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2683   tree_to_aff_combination (var, uutype, &var_aff);
2684
2685   /* We need to shift the value if we are after the increment.  */
2686   if (stmt_after_increment (loop, cand, at))
2687     {
2688       aff_tree cstep_aff;
2689
2690       if (common_type != uutype)
2691         cstep_common = fold_convert (common_type, cstep);
2692       else
2693         cstep_common = cstep;
2694
2695       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2696       aff_combination_add (&cbase_aff, &cstep_aff);
2697     }
2698
2699   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2700   aff_combination_add (aff, &cbase_aff);
2701   if (common_type != uutype)
2702     aff_combination_convert (aff, uutype);
2703
2704   aff_combination_scale (&var_aff, rat);
2705   aff_combination_add (aff, &var_aff);
2706
2707   return true;
2708 }
2709
2710 /* Determines the expression by that USE is expressed from induction variable
2711    CAND at statement AT in LOOP.  The computation is unshared.  */
2712
2713 static tree
2714 get_computation_at (struct loop *loop,
2715                     struct iv_use *use, struct iv_cand *cand, tree at)
2716 {
2717   aff_tree aff;
2718   tree type = TREE_TYPE (use->iv->base);
2719
2720   if (!get_computation_aff (loop, use, cand, at, &aff))
2721     return NULL_TREE;
2722   unshare_aff_combination (&aff);
2723   return fold_convert (type, aff_combination_to_tree (&aff));
2724 }
2725
2726 /* Determines the expression by that USE is expressed from induction variable
2727    CAND in LOOP.  The computation is unshared.  */
2728
2729 static tree
2730 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2731 {
2732   return get_computation_at (loop, use, cand, use->stmt);
2733 }
2734
2735 /* Returns cost of addition in MODE.  */
2736
2737 static unsigned
2738 add_cost (enum machine_mode mode)
2739 {
2740   static unsigned costs[NUM_MACHINE_MODES];
2741   rtx seq;
2742   unsigned cost;
2743
2744   if (costs[mode])
2745     return costs[mode];
2746
2747   start_sequence ();
2748   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2749                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2750                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2751                  NULL_RTX);
2752   seq = get_insns ();
2753   end_sequence ();
2754
2755   cost = seq_cost (seq);
2756   if (!cost)
2757     cost = 1;
2758
2759   costs[mode] = cost;
2760
2761   if (dump_file && (dump_flags & TDF_DETAILS))
2762     fprintf (dump_file, "Addition in %s costs %d\n",
2763              GET_MODE_NAME (mode), cost);
2764   return cost;
2765 }
2766
2767 /* Entry in a hashtable of already known costs for multiplication.  */
2768 struct mbc_entry
2769 {
2770   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2771   enum machine_mode mode;       /* In mode.  */
2772   unsigned cost;                /* The cost.  */
2773 };
2774
2775 /* Counts hash value for the ENTRY.  */
2776
2777 static hashval_t
2778 mbc_entry_hash (const void *entry)
2779 {
2780   const struct mbc_entry *e = entry;
2781
2782   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2783 }
2784
2785 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2786
2787 static int
2788 mbc_entry_eq (const void *entry1, const void *entry2)
2789 {
2790   const struct mbc_entry *e1 = entry1;
2791   const struct mbc_entry *e2 = entry2;
2792
2793   return (e1->mode == e2->mode
2794           && e1->cst == e2->cst);
2795 }
2796
2797 /* Returns cost of multiplication by constant CST in MODE.  */
2798
2799 unsigned
2800 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2801 {
2802   static htab_t costs;
2803   struct mbc_entry **cached, act;
2804   rtx seq;
2805   unsigned cost;
2806
2807   if (!costs)
2808     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2809
2810   act.mode = mode;
2811   act.cst = cst;
2812   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2813   if (*cached)
2814     return (*cached)->cost;
2815
2816   *cached = XNEW (struct mbc_entry);
2817   (*cached)->mode = mode;
2818   (*cached)->cst = cst;
2819
2820   start_sequence ();
2821   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2822                gen_int_mode (cst, mode), NULL_RTX, 0);
2823   seq = get_insns ();
2824   end_sequence ();
2825
2826   cost = seq_cost (seq);
2827
2828   if (dump_file && (dump_flags & TDF_DETAILS))
2829     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2830              (int) cst, GET_MODE_NAME (mode), cost);
2831
2832   (*cached)->cost = cost;
2833
2834   return cost;
2835 }
2836
2837 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2838    validity for a memory reference accessing memory of mode MODE.  */
2839
2840 bool
2841 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2842 {
2843 #define MAX_RATIO 128
2844   static sbitmap valid_mult[MAX_MACHINE_MODE];
2845
2846   if (!valid_mult[mode])
2847     {
2848       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2849       rtx addr;
2850       HOST_WIDE_INT i;
2851
2852       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2853       sbitmap_zero (valid_mult[mode]);
2854       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2855       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2856         {
2857           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2858           if (memory_address_p (mode, addr))
2859             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2860         }
2861
2862       if (dump_file && (dump_flags & TDF_DETAILS))
2863         {
2864           fprintf (dump_file, "  allowed multipliers:");
2865           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2866             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2867               fprintf (dump_file, " %d", (int) i);
2868           fprintf (dump_file, "\n");
2869           fprintf (dump_file, "\n");
2870         }
2871     }
2872
2873   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2874     return false;
2875
2876   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2877 }
2878
2879 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2880    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2881    variable is omitted.  Compute the cost for a memory reference that accesses
2882    a memory location of mode MEM_MODE.
2883
2884    TODO -- there must be some better way.  This all is quite crude.  */
2885
2886 static unsigned
2887 get_address_cost (bool symbol_present, bool var_present,
2888                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2889                   enum machine_mode mem_mode)
2890 {
2891   static bool initialized[MAX_MACHINE_MODE];
2892   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2893   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2894   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2895   unsigned cost, acost;
2896   bool offset_p, ratio_p;
2897   HOST_WIDE_INT s_offset;
2898   unsigned HOST_WIDE_INT mask;
2899   unsigned bits;
2900
2901   if (!initialized[mem_mode])
2902     {
2903       HOST_WIDE_INT i;
2904       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2905       int old_cse_not_expected;
2906       unsigned sym_p, var_p, off_p, rat_p, add_c;
2907       rtx seq, addr, base;
2908       rtx reg0, reg1;
2909
2910       initialized[mem_mode] = true;
2911
2912       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2913
2914       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2915       for (i = start; i <= 1 << 20; i <<= 1)
2916         {
2917           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2918           if (!memory_address_p (mem_mode, addr))
2919             break;
2920         }
2921       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2922       off[mem_mode] = max_offset[mem_mode];
2923
2924       for (i = start; i <= 1 << 20; i <<= 1)
2925         {
2926           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2927           if (!memory_address_p (mem_mode, addr))
2928             break;
2929         }
2930       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2931
2932       if (dump_file && (dump_flags & TDF_DETAILS))
2933         {
2934           fprintf (dump_file, "get_address_cost:\n");
2935           fprintf (dump_file, "  min offset %s %d\n",
2936                    GET_MODE_NAME (mem_mode),
2937                    (int) min_offset[mem_mode]);
2938           fprintf (dump_file, "  max offset %s %d\n",
2939                    GET_MODE_NAME (mem_mode),
2940                    (int) max_offset[mem_mode]);
2941         }
2942
2943       rat[mem_mode] = 1;
2944       for (i = 2; i <= MAX_RATIO; i++)
2945         if (multiplier_allowed_in_address_p (i, mem_mode))
2946           {
2947             rat[mem_mode] = i;
2948             break;
2949           }
2950
2951       /* Compute the cost of various addressing modes.  */
2952       acost = 0;
2953       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2954       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2955
2956       for (i = 0; i < 16; i++)
2957         {
2958           sym_p = i & 1;
2959           var_p = (i >> 1) & 1;
2960           off_p = (i >> 2) & 1;
2961           rat_p = (i >> 3) & 1;
2962
2963           addr = reg0;
2964           if (rat_p)
2965             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2966                                    gen_int_mode (rat[mem_mode], Pmode));
2967
2968           if (var_p)
2969             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2970
2971           if (sym_p)
2972             {
2973               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2974               if (off_p)
2975                 base = gen_rtx_fmt_e (CONST, Pmode,
2976                                       gen_rtx_fmt_ee (PLUS, Pmode,
2977                                                       base,
2978                                                       gen_int_mode (off[mem_mode],
2979                                                                     Pmode)));
2980             }
2981           else if (off_p)
2982             base = gen_int_mode (off[mem_mode], Pmode);
2983           else
2984             base = NULL_RTX;
2985
2986           if (base)
2987             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2988
2989           start_sequence ();
2990           /* To avoid splitting addressing modes, pretend that no cse will
2991              follow.  */
2992           old_cse_not_expected = cse_not_expected;
2993           cse_not_expected = true;
2994           addr = memory_address (mem_mode, addr);
2995           cse_not_expected = old_cse_not_expected;
2996           seq = get_insns ();
2997           end_sequence ();
2998
2999           acost = seq_cost (seq);
3000           acost += address_cost (addr, mem_mode);
3001
3002           if (!acost)
3003             acost = 1;
3004           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3005         }
3006
3007       /* On some targets, it is quite expensive to load symbol to a register,
3008          which makes addresses that contain symbols look much more expensive.
3009          However, the symbol will have to be loaded in any case before the
3010          loop (and quite likely we have it in register already), so it does not
3011          make much sense to penalize them too heavily.  So make some final
3012          tweaks for the SYMBOL_PRESENT modes:
3013
3014          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3015          var is cheaper, use this mode with small penalty.
3016          If VAR_PRESENT is true, try whether the mode with
3017          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3018          if this is the case, use it.  */
3019       add_c = add_cost (Pmode);
3020       for (i = 0; i < 8; i++)
3021         {
3022           var_p = i & 1;
3023           off_p = (i >> 1) & 1;
3024           rat_p = (i >> 2) & 1;
3025
3026           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3027           if (var_p)
3028             acost += add_c;
3029
3030           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3031             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3032         }
3033
3034       if (dump_file && (dump_flags & TDF_DETAILS))
3035         {
3036           fprintf (dump_file, "Address costs:\n");
3037
3038           for (i = 0; i < 16; i++)
3039             {
3040               sym_p = i & 1;
3041               var_p = (i >> 1) & 1;
3042               off_p = (i >> 2) & 1;
3043               rat_p = (i >> 3) & 1;
3044
3045               fprintf (dump_file, "  ");
3046               if (sym_p)
3047                 fprintf (dump_file, "sym + ");
3048               if (var_p)
3049                 fprintf (dump_file, "var + ");
3050               if (off_p)
3051                 fprintf (dump_file, "cst + ");
3052               if (rat_p)
3053                 fprintf (dump_file, "rat * ");
3054
3055               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3056               fprintf (dump_file, "index costs %d\n", acost);
3057             }
3058           fprintf (dump_file, "\n");
3059         }
3060     }
3061
3062   bits = GET_MODE_BITSIZE (Pmode);
3063   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3064   offset &= mask;
3065   if ((offset >> (bits - 1) & 1))
3066     offset |= ~mask;
3067   s_offset = offset;
3068
3069   cost = 0;
3070   offset_p = (s_offset != 0
3071               && min_offset[mem_mode] <= s_offset
3072               && s_offset <= max_offset[mem_mode]);
3073   ratio_p = (ratio != 1
3074              && multiplier_allowed_in_address_p (ratio, mem_mode));
3075
3076   if (ratio != 1 && !ratio_p)
3077     cost += multiply_by_cost (ratio, Pmode);
3078
3079   if (s_offset && !offset_p && !symbol_present)
3080     cost += add_cost (Pmode);
3081
3082   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3083   return cost + acost;
3084 }
3085
3086 /* Estimates cost of forcing expression EXPR into a variable.  */
3087
3088 unsigned
3089 force_expr_to_var_cost (tree expr)
3090 {
3091   static bool costs_initialized = false;
3092   static unsigned integer_cost;
3093   static unsigned symbol_cost;
3094   static unsigned address_cost;
3095   tree op0, op1;
3096   unsigned cost0, cost1, cost;
3097   enum machine_mode mode;
3098
3099   if (!costs_initialized)
3100     {
3101       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3102       rtx x = gen_rtx_MEM (DECL_MODE (var),
3103                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3104       tree addr;
3105       tree type = build_pointer_type (integer_type_node);
3106
3107       integer_cost = computation_cost (build_int_cst (integer_type_node,
3108                                                       2000));
3109
3110       SET_DECL_RTL (var, x);
3111       TREE_STATIC (var) = 1;
3112       addr = build1 (ADDR_EXPR, type, var);
3113       symbol_cost = computation_cost (addr) + 1;
3114
3115       address_cost
3116         = computation_cost (build2 (PLUS_EXPR, type,
3117                                     addr,
3118                                     build_int_cst (type, 2000))) + 1;
3119       if (dump_file && (dump_flags & TDF_DETAILS))
3120         {
3121           fprintf (dump_file, "force_expr_to_var_cost:\n");
3122           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3123           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3124           fprintf (dump_file, "  address %d\n", (int) address_cost);
3125           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3126           fprintf (dump_file, "\n");
3127         }
3128
3129       costs_initialized = true;
3130     }
3131
3132   STRIP_NOPS (expr);
3133
3134   if (SSA_VAR_P (expr))
3135     return 0;
3136
3137   if (TREE_INVARIANT (expr))
3138     {
3139       if (TREE_CODE (expr) == INTEGER_CST)
3140         return integer_cost;
3141
3142       if (TREE_CODE (expr) == ADDR_EXPR)
3143         {
3144           tree obj = TREE_OPERAND (expr, 0);
3145
3146           if (TREE_CODE (obj) == VAR_DECL
3147               || TREE_CODE (obj) == PARM_DECL
3148               || TREE_CODE (obj) == RESULT_DECL)
3149             return symbol_cost;
3150         }
3151
3152       return address_cost;
3153     }
3154
3155   switch (TREE_CODE (expr))
3156     {
3157     case PLUS_EXPR:
3158     case MINUS_EXPR:
3159     case MULT_EXPR:
3160       op0 = TREE_OPERAND (expr, 0);
3161       op1 = TREE_OPERAND (expr, 1);
3162       STRIP_NOPS (op0);
3163       STRIP_NOPS (op1);
3164
3165       if (is_gimple_val (op0))
3166         cost0 = 0;
3167       else
3168         cost0 = force_expr_to_var_cost (op0);
3169
3170       if (is_gimple_val (op1))
3171         cost1 = 0;
3172       else
3173         cost1 = force_expr_to_var_cost (op1);
3174
3175       break;
3176
3177     default:
3178       /* Just an arbitrary value, FIXME.  */
3179       return target_spill_cost;
3180     }
3181
3182   mode = TYPE_MODE (TREE_TYPE (expr));
3183   switch (TREE_CODE (expr))
3184     {
3185     case PLUS_EXPR:
3186     case MINUS_EXPR:
3187       cost = add_cost (mode);
3188       break;
3189
3190     case MULT_EXPR:
3191       if (cst_and_fits_in_hwi (op0))
3192         cost = multiply_by_cost (int_cst_value (op0), mode);
3193       else if (cst_and_fits_in_hwi (op1))
3194         cost = multiply_by_cost (int_cst_value (op1), mode);
3195       else
3196         return target_spill_cost;
3197       break;
3198
3199     default:
3200       gcc_unreachable ();
3201     }
3202
3203   cost += cost0;
3204   cost += cost1;
3205
3206   /* Bound the cost by target_spill_cost.  The parts of complicated
3207      computations often are either loop invariant or at least can
3208      be shared between several iv uses, so letting this grow without
3209      limits would not give reasonable results.  */
3210   return cost < target_spill_cost ? cost : target_spill_cost;
3211 }
3212
3213 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3214    invariants the computation depends on.  */
3215
3216 static unsigned
3217 force_var_cost (struct ivopts_data *data,
3218                 tree expr, bitmap *depends_on)
3219 {
3220   if (depends_on)
3221     {
3222       fd_ivopts_data = data;
3223       walk_tree (&expr, find_depends, depends_on, NULL);
3224     }
3225
3226   return force_expr_to_var_cost (expr);
3227 }
3228
3229 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3230    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3231    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3232    invariants the computation depends on.  */
3233
3234 static unsigned
3235 split_address_cost (struct ivopts_data *data,
3236                     tree addr, bool *symbol_present, bool *var_present,
3237                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3238 {
3239   tree core;
3240   HOST_WIDE_INT bitsize;
3241   HOST_WIDE_INT bitpos;
3242   tree toffset;
3243   enum machine_mode mode;
3244   int unsignedp, volatilep;
3245
3246   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3247                               &unsignedp, &volatilep, false);
3248
3249   if (toffset != 0
3250       || bitpos % BITS_PER_UNIT != 0
3251       || TREE_CODE (core) != VAR_DECL)
3252     {
3253       *symbol_present = false;
3254       *var_present = true;
3255       fd_ivopts_data = data;
3256       walk_tree (&addr, find_depends, depends_on, NULL);
3257       return target_spill_cost;
3258     }
3259
3260   *offset += bitpos / BITS_PER_UNIT;
3261   if (TREE_STATIC (core)
3262       || DECL_EXTERNAL (core))
3263     {
3264       *symbol_present = true;
3265       *var_present = false;
3266       return 0;
3267     }
3268
3269   *symbol_present = false;
3270   *var_present = true;
3271   return 0;
3272 }
3273
3274 /* Estimates cost of expressing difference of addresses E1 - E2 as
3275    var + symbol + offset.  The value of offset is added to OFFSET,
3276    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3277    part is missing.  DEPENDS_ON is a set of the invariants the computation
3278    depends on.  */
3279
3280 static unsigned
3281 ptr_difference_cost (struct ivopts_data *data,
3282                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3283                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3284 {
3285   HOST_WIDE_INT diff = 0;
3286   unsigned cost;
3287
3288   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3289
3290   if (ptr_difference_const (e1, e2, &diff))
3291     {
3292       *offset += diff;
3293       *symbol_present = false;
3294       *var_present = false;
3295       return 0;
3296     }
3297
3298   if (e2 == integer_zero_node)
3299     return split_address_cost (data, TREE_OPERAND (e1, 0),
3300                                symbol_present, var_present, offset, depends_on);
3301
3302   *symbol_present = false;
3303   *var_present = true;
3304
3305   cost = force_var_cost (data, e1, depends_on);
3306   cost += force_var_cost (data, e2, depends_on);
3307   cost += add_cost (Pmode);
3308
3309   return cost;
3310 }
3311
3312 /* Estimates cost of expressing difference E1 - E2 as
3313    var + symbol + offset.  The value of offset is added to OFFSET,
3314    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3315    part is missing.  DEPENDS_ON is a set of the invariants the computation
3316    depends on.  */
3317
3318 static unsigned
3319 difference_cost (struct ivopts_data *data,
3320                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3321                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3322 {
3323   unsigned cost;
3324   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3325   unsigned HOST_WIDE_INT off1, off2;
3326
3327   e1 = strip_offset (e1, &off1);
3328   e2 = strip_offset (e2, &off2);
3329   *offset += off1 - off2;
3330
3331   STRIP_NOPS (e1);
3332   STRIP_NOPS (e2);
3333
3334   if (TREE_CODE (e1) == ADDR_EXPR)
3335     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3336                                 depends_on);
3337   *symbol_present = false;
3338
3339   if (operand_equal_p (e1, e2, 0))
3340     {
3341       *var_present = false;
3342       return 0;
3343     }
3344   *var_present = true;
3345   if (integer_zerop (e2))
3346     return force_var_cost (data, e1, depends_on);
3347
3348   if (integer_zerop (e1))
3349     {
3350       cost = force_var_cost (data, e2, depends_on);
3351       cost += multiply_by_cost (-1, mode);
3352
3353       return cost;
3354     }
3355
3356   cost = force_var_cost (data, e1, depends_on);
3357   cost += force_var_cost (data, e2, depends_on);
3358   cost += add_cost (mode);
3359
3360   return cost;
3361 }
3362
3363 /* Determines the cost of the computation by that USE is expressed
3364    from induction variable CAND.  If ADDRESS_P is true, we just need
3365    to create an address from it, otherwise we want to get it into
3366    register.  A set of invariants we depend on is stored in
3367    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3368
3369 static unsigned
3370 get_computation_cost_at (struct ivopts_data *data,
3371                          struct iv_use *use, struct iv_cand *cand,
3372                          bool address_p, bitmap *depends_on, tree at)
3373 {
3374   tree ubase = use->iv->base, ustep = use->iv->step;
3375   tree cbase, cstep;
3376   tree utype = TREE_TYPE (ubase), ctype;
3377   unsigned HOST_WIDE_INT cstepi, offset = 0;
3378   HOST_WIDE_INT ratio, aratio;
3379   bool var_present, symbol_present;
3380   unsigned cost = 0, n_sums;
3381   double_int rat;
3382
3383   *depends_on = NULL;
3384
3385   /* Only consider real candidates.  */
3386   if (!cand->iv)
3387     return INFTY;
3388
3389   cbase = cand->iv->base;
3390   cstep = cand->iv->step;
3391   ctype = TREE_TYPE (cbase);
3392
3393   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3394     {
3395       /* We do not have a precision to express the values of use.  */
3396       return INFTY;
3397     }
3398
3399   if (address_p)
3400     {
3401       /* Do not try to express address of an object with computation based
3402          on address of a different object.  This may cause problems in rtl
3403          level alias analysis (that does not expect this to be happening,
3404          as this is illegal in C), and would be unlikely to be useful
3405          anyway.  */
3406       if (use->iv->base_object
3407           && cand->iv->base_object
3408           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3409         return INFTY;
3410     }
3411
3412   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3413     {
3414       /* TODO -- add direct handling of this case.  */
3415       goto fallback;
3416     }
3417
3418   /* CSTEPI is removed from the offset in case statement is after the
3419      increment.  If the step is not constant, we use zero instead.
3420      This is a bit imprecise (there is the extra addition), but
3421      redundancy elimination is likely to transform the code so that
3422      it uses value of the variable before increment anyway,
3423      so it is not that much unrealistic.  */
3424   if (cst_and_fits_in_hwi (cstep))
3425     cstepi = int_cst_value (cstep);
3426   else
3427     cstepi = 0;
3428
3429   if (!constant_multiple_of (ustep, cstep, &rat))
3430     return INFTY;
3431
3432   if (double_int_fits_in_shwi_p (rat))
3433     ratio = double_int_to_shwi (rat);
3434   else
3435     return INFTY;
3436
3437   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3438      or ratio == 1, it is better to handle this like
3439
3440      ubase - ratio * cbase + ratio * var
3441
3442      (also holds in the case ratio == -1, TODO.  */
3443
3444   if (cst_and_fits_in_hwi (cbase))
3445     {
3446       offset = - ratio * int_cst_value (cbase);
3447       cost += difference_cost (data,
3448                                ubase, integer_zero_node,
3449                                &symbol_present, &var_present, &offset,
3450                                depends_on);
3451     }
3452   else if (ratio == 1)
3453     {
3454       cost += difference_cost (data,
3455                                ubase, cbase,
3456                                &symbol_present, &var_present, &offset,
3457                                depends_on);
3458     }
3459   else
3460     {
3461       cost += force_var_cost (data, cbase, depends_on);
3462       cost += add_cost (TYPE_MODE (ctype));
3463       cost += difference_cost (data,
3464                                ubase, integer_zero_node,
3465                                &symbol_present, &var_present, &offset,
3466                                depends_on);
3467     }
3468
3469   /* If we are after the increment, the value of the candidate is higher by
3470      one iteration.  */
3471   if (stmt_after_increment (data->current_loop, cand, at))
3472     offset -= ratio * cstepi;
3473
3474   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3475      (symbol/var/const parts may be omitted).  If we are looking for an address,
3476      find the cost of addressing this.  */
3477   if (address_p)
3478     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3479                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3480
3481   /* Otherwise estimate the costs for computing the expression.  */
3482   aratio = ratio > 0 ? ratio : -ratio;
3483   if (!symbol_present && !var_present && !offset)
3484     {
3485       if (ratio != 1)
3486         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3487
3488       return cost;
3489     }
3490
3491   if (aratio != 1)
3492     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3493
3494   n_sums = 1;
3495   if (var_present
3496       /* Symbol + offset should be compile-time computable.  */
3497       && (symbol_present || offset))
3498     n_sums++;
3499
3500   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3501
3502 fallback:
3503   {
3504     /* Just get the expression, expand it and measure the cost.  */
3505     tree comp = get_computation_at (data->current_loop, use, cand, at);
3506
3507     if (!comp)
3508       return INFTY;
3509
3510     if (address_p)
3511       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3512
3513     return computation_cost (comp);
3514   }
3515 }
3516
3517 /* Determines the cost of the computation by that USE is expressed
3518    from induction variable CAND.  If ADDRESS_P is true, we just need
3519    to create an address from it, otherwise we want to get it into
3520    register.  A set of invariants we depend on is stored in
3521    DEPENDS_ON.  */
3522
3523 static unsigned
3524 get_computation_cost (struct ivopts_data *data,
3525                       struct iv_use *use, struct iv_cand *cand,
3526                       bool address_p, bitmap *depends_on)
3527 {
3528   return get_computation_cost_at (data,
3529                                   use, cand, address_p, depends_on, use->stmt);
3530 }
3531
3532 /* Determines cost of basing replacement of USE on CAND in a generic
3533    expression.  */
3534
3535 static bool
3536 determine_use_iv_cost_generic (struct ivopts_data *data,
3537                                struct iv_use *use, struct iv_cand *cand)
3538 {
3539   bitmap depends_on;
3540   unsigned cost;
3541
3542   /* The simple case first -- if we need to express value of the preserved
3543      original biv, the cost is 0.  This also prevents us from counting the
3544      cost of increment twice -- once at this use and once in the cost of
3545      the candidate.  */
3546   if (cand->pos == IP_ORIGINAL
3547       && cand->incremented_at == use->stmt)
3548     {
3549       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3550       return true;
3551     }
3552
3553   cost = get_computation_cost (data, use, cand, false, &depends_on);
3554   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3555
3556   return cost != INFTY;
3557 }
3558
3559 /* Determines cost of basing replacement of USE on CAND in an address.  */
3560
3561 static bool
3562 determine_use_iv_cost_address (struct ivopts_data *data,
3563                                struct iv_use *use, struct iv_cand *cand)
3564 {
3565   bitmap depends_on;
3566   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3567
3568   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3569
3570   return cost != INFTY;
3571 }
3572
3573 /* Computes value of induction variable IV in iteration NITER.  */
3574
3575 static tree
3576 iv_value (struct iv *iv, tree niter)
3577 {
3578   tree val;
3579   tree type = TREE_TYPE (iv->base);
3580
3581   niter = fold_convert (type, niter);
3582   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3583
3584   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3585 }
3586
3587 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3588
3589 static tree
3590 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3591 {
3592   tree val = iv_value (cand->iv, niter);
3593   tree type = TREE_TYPE (cand->iv->base);
3594
3595   if (stmt_after_increment (loop, cand, at))
3596     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3597
3598   return val;
3599 }
3600
3601 /* Returns period of induction variable iv.  */
3602
3603 static tree
3604 iv_period (struct iv *iv)
3605 {
3606   tree step = iv->step, period, type;
3607   tree pow2div;
3608
3609   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3610
3611   /* Period of the iv is gcd (step, type range).  Since type range is power
3612      of two, it suffices to determine the maximum power of two that divides
3613      step.  */
3614   pow2div = num_ending_zeros (step);
3615   type = unsigned_type_for (TREE_TYPE (step));
3616
3617   period = build_low_bits_mask (type,
3618                                 (TYPE_PRECISION (type)
3619                                  - tree_low_cst (pow2div, 1)));
3620
3621   return period;
3622 }
3623
3624 /* Returns the comparison operator used when eliminating the iv USE.  */
3625
3626 static enum tree_code
3627 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3628 {
3629   struct loop *loop = data->current_loop;
3630   basic_block ex_bb;
3631   edge exit;
3632
3633   ex_bb = bb_for_stmt (use->stmt);
3634   exit = EDGE_SUCC (ex_bb, 0);
3635   if (flow_bb_inside_loop_p (loop, exit->dest))
3636     exit = EDGE_SUCC (ex_bb, 1);
3637
3638   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3639 }
3640
3641 /* Check whether it is possible to express the condition in USE by comparison
3642    of candidate CAND.  If so, store the value compared with to BOUND.  */
3643
3644 static bool
3645 may_eliminate_iv (struct ivopts_data *data,
3646                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3647 {
3648   basic_block ex_bb;
3649   edge exit;
3650   tree nit, nit_type;
3651   tree wider_type, period, per_type;
3652   struct loop *loop = data->current_loop;
3653
3654   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3655     return false;
3656
3657   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3658      for other conditions inside loop body.  */
3659   ex_bb = bb_for_stmt (use->stmt);
3660   if (use->stmt != last_stmt (ex_bb)
3661       || TREE_CODE (use->stmt) != COND_EXPR)
3662     return false;
3663   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3664     return false;
3665
3666   exit = EDGE_SUCC (ex_bb, 0);
3667   if (flow_bb_inside_loop_p (loop, exit->dest))
3668     exit = EDGE_SUCC (ex_bb, 1);
3669   if (flow_bb_inside_loop_p (loop, exit->dest))
3670     return false;
3671
3672   nit = niter_for_exit (data, exit);
3673   if (!nit)
3674     return false;
3675
3676   nit_type = TREE_TYPE (nit);
3677
3678   /* Determine whether we may use the variable to test whether niter iterations
3679      elapsed.  This is the case iff the period of the induction variable is
3680      greater than the number of iterations.  */
3681   period = iv_period (cand->iv);
3682   if (!period)
3683     return false;
3684   per_type = TREE_TYPE (period);
3685
3686   wider_type = TREE_TYPE (period);
3687   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3688     wider_type = per_type;
3689   else
3690     wider_type = nit_type;
3691
3692   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3693                                       fold_convert (wider_type, period),
3694                                       fold_convert (wider_type, nit))))
3695     return false;
3696
3697   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
3698   return true;
3699 }
3700
3701 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3702
3703 static bool
3704 determine_use_iv_cost_condition (struct ivopts_data *data,
3705                                  struct iv_use *use, struct iv_cand *cand)
3706 {
3707   tree bound = NULL_TREE, op, cond;
3708   bitmap depends_on = NULL;
3709   unsigned cost;
3710
3711   /* Only consider real candidates.  */
3712   if (!cand->iv)
3713     {
3714       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3715       return false;
3716     }
3717
3718   if (may_eliminate_iv (data, use, cand, &bound))
3719     {
3720       cost = force_var_cost (data, bound, &depends_on);
3721
3722       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3723       return cost != INFTY;
3724     }
3725
3726   /* The induction variable elimination failed; just express the original
3727      giv.  If it is compared with an invariant, note that we cannot get
3728      rid of it.  */
3729   cost = get_computation_cost (data, use, cand, false, &depends_on);
3730
3731   cond = *use->op_p;
3732   if (TREE_CODE (cond) != SSA_NAME)
3733     {
3734       op = TREE_OPERAND (cond, 0);
3735       if (TREE_CODE (op) == SSA_NAME
3736           && !null_or_integer_zerop (get_iv (data, op)->step))
3737         op = TREE_OPERAND (cond, 1);
3738       if (TREE_CODE (op) == SSA_NAME)
3739         {
3740           op = get_iv (data, op)->base;
3741           fd_ivopts_data = data;
3742           walk_tree (&op, find_depends, &depends_on, NULL);
3743         }
3744     }
3745
3746   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3747   return cost != INFTY;
3748 }
3749
3750 /* Determines cost of basing replacement of USE on CAND.  Returns false
3751    if USE cannot be based on CAND.  */
3752
3753 static bool
3754 determine_use_iv_cost (struct ivopts_data *data,
3755                        struct iv_use *use, struct iv_cand *cand)
3756 {
3757   switch (use->type)
3758     {
3759     case USE_NONLINEAR_EXPR:
3760       return determine_use_iv_cost_generic (data, use, cand);
3761
3762     case USE_ADDRESS:
3763       return determine_use_iv_cost_address (data, use, cand);
3764
3765     case USE_COMPARE:
3766       return determine_use_iv_cost_condition (data, use, cand);
3767
3768     default:
3769       gcc_unreachable ();
3770     }
3771 }
3772
3773 /* Determines costs of basing the use of the iv on an iv candidate.  */
3774
3775 static void
3776 determine_use_iv_costs (struct ivopts_data *data)
3777 {
3778   unsigned i, j;
3779   struct iv_use *use;
3780   struct iv_cand *cand;
3781   bitmap to_clear = BITMAP_ALLOC (NULL);
3782
3783   alloc_use_cost_map (data);
3784
3785   for (i = 0; i < n_iv_uses (data); i++)
3786     {
3787       use = iv_use (data, i);
3788
3789       if (data->consider_all_candidates)
3790         {
3791           for (j = 0; j < n_iv_cands (data); j++)
3792             {
3793               cand = iv_cand (data, j);
3794               determine_use_iv_cost (data, use, cand);
3795             }
3796         }
3797       else
3798         {
3799           bitmap_iterator bi;
3800
3801           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3802             {
3803               cand = iv_cand (data, j);
3804               if (!determine_use_iv_cost (data, use, cand))
3805                 bitmap_set_bit (to_clear, j);
3806             }
3807
3808           /* Remove the candidates for that the cost is infinite from
3809              the list of related candidates.  */
3810           bitmap_and_compl_into (use->related_cands, to_clear);
3811           bitmap_clear (to_clear);
3812         }
3813     }
3814
3815   BITMAP_FREE (to_clear);
3816
3817   if (dump_file && (dump_flags & TDF_DETAILS))
3818     {
3819       fprintf (dump_file, "Use-candidate costs:\n");
3820
3821       for (i = 0; i < n_iv_uses (data); i++)
3822         {
3823           use = iv_use (data, i);
3824
3825           fprintf (dump_file, "Use %d:\n", i);
3826           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3827           for (j = 0; j < use->n_map_members; j++)
3828             {
3829               if (!use->cost_map[j].cand
3830                   || use->cost_map[j].cost == INFTY)
3831                 continue;
3832
3833               fprintf (dump_file, "  %d\t%d\t",
3834                        use->cost_map[j].cand->id,
3835                        use->cost_map[j].cost);
3836               if (use->cost_map[j].depends_on)
3837                 bitmap_print (dump_file,
3838                               use->cost_map[j].depends_on, "","");
3839               fprintf (dump_file, "\n");
3840             }
3841
3842           fprintf (dump_file, "\n");
3843         }
3844       fprintf (dump_file, "\n");
3845     }
3846 }
3847
3848 /* Determines cost of the candidate CAND.  */
3849
3850 static void
3851 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3852 {
3853   unsigned cost_base, cost_step;
3854   tree base;
3855
3856   if (!cand->iv)
3857     {
3858       cand->cost = 0;
3859       return;
3860     }
3861
3862   /* There are two costs associated with the candidate -- its increment
3863      and its initialization.  The second is almost negligible for any loop
3864      that rolls enough, so we take it just very little into account.  */
3865
3866   base = cand->iv->base;
3867   cost_base = force_var_cost (data, base, NULL);
3868   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3869
3870   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3871
3872   /* Prefer the original iv unless we may gain something by replacing it;
3873      this is not really relevant for artificial ivs created by other
3874      passes.  */
3875   if (cand->pos == IP_ORIGINAL
3876       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3877     cand->cost--;
3878
3879   /* Prefer not to insert statements into latch unless there are some
3880      already (so that we do not create unnecessary jumps).  */
3881   if (cand->pos == IP_END
3882       && empty_block_p (ip_end_pos (data->current_loop)))
3883     cand->cost++;
3884 }
3885
3886 /* Determines costs of computation of the candidates.  */
3887
3888 static void
3889 determine_iv_costs (struct ivopts_data *data)
3890 {
3891   unsigned i;
3892
3893   if (dump_file && (dump_flags & TDF_DETAILS))
3894     {
3895       fprintf (dump_file, "Candidate costs:\n");
3896       fprintf (dump_file, "  cand\tcost\n");
3897     }
3898
3899   for (i = 0; i < n_iv_cands (data); i++)
3900     {
3901       struct iv_cand *cand = iv_cand (data, i);
3902
3903       determine_iv_cost (data, cand);
3904
3905       if (dump_file && (dump_flags & TDF_DETAILS))
3906         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3907     }
3908
3909   if (dump_file && (dump_flags & TDF_DETAILS))
3910     fprintf (dump_file, "\n");
3911 }
3912
3913 /* Calculates cost for having SIZE induction variables.  */
3914
3915 static unsigned
3916 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3917 {
3918   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3919 }
3920
3921 /* For each size of the induction variable set determine the penalty.  */
3922
3923 static void
3924 determine_set_costs (struct ivopts_data *data)
3925 {
3926   unsigned j, n;
3927   tree phi, op;
3928   struct loop *loop = data->current_loop;
3929   bitmap_iterator bi;
3930
3931   /* We use the following model (definitely improvable, especially the
3932      cost function -- TODO):
3933
3934      We estimate the number of registers available (using MD data), name it A.
3935
3936      We estimate the number of registers used by the loop, name it U.  This
3937      number is obtained as the number of loop phi nodes (not counting virtual
3938      registers and bivs) + the number of variables from outside of the loop.
3939
3940      We set a reserve R (free regs that are used for temporary computations,
3941      etc.).  For now the reserve is a constant 3.
3942
3943      Let I be the number of induction variables.
3944
3945      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3946         make a lot of ivs without a reason).
3947      -- if A - R < U + I <= A, the cost is I * PRES_COST
3948      -- if U + I > A, the cost is I * PRES_COST and
3949         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3950
3951   if (dump_file && (dump_flags & TDF_DETAILS))
3952     {
3953       fprintf (dump_file, "Global costs:\n");
3954       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3955       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3956       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3957       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3958     }
3959
3960   n = 0;
3961   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3962     {
3963       op = PHI_RESULT (phi);
3964
3965       if (!is_gimple_reg (op))
3966         continue;
3967
3968       if (get_iv (data, op))
3969         continue;
3970
3971       n++;
3972     }
3973
3974   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3975     {
3976       struct version_info *info = ver_info (data, j);
3977
3978       if (info->inv_id && info->has_nonlin_use)
3979         n++;
3980     }
3981
3982   data->regs_used = n;
3983   if (dump_file && (dump_flags & TDF_DETAILS))
3984     fprintf (dump_file, "  regs_used %d\n", n);
3985
3986   if (dump_file && (dump_flags & TDF_DETAILS))
3987     {
3988       fprintf (dump_file, "  cost for size:\n");
3989       fprintf (dump_file, "  ivs\tcost\n");
3990       for (j = 0; j <= 2 * target_avail_regs; j++)
3991         fprintf (dump_file, "  %d\t%d\n", j,
3992                  ivopts_global_cost_for_size (data, j));
3993       fprintf (dump_file, "\n");
3994     }
3995 }
3996
3997 /* Returns true if A is a cheaper cost pair than B.  */
3998
3999 static bool
4000 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4001 {
4002   if (!a)
4003     return false;
4004
4005   if (!b)
4006     return true;
4007
4008   if (a->cost < b->cost)
4009     return true;
4010
4011   if (a->cost > b->cost)
4012     return false;
4013
4014   /* In case the costs are the same, prefer the cheaper candidate.  */
4015   if (a->cand->cost < b->cand->cost)
4016     return true;
4017
4018   return false;
4019 }
4020
4021 /* Computes the cost field of IVS structure.  */
4022
4023 static void
4024 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4025 {
4026   unsigned cost = 0;
4027
4028   cost += ivs->cand_use_cost;
4029   cost += ivs->cand_cost;
4030   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4031
4032   ivs->cost = cost;
4033 }
4034
4035 /* Remove invariants in set INVS to set IVS.  */
4036
4037 static void
4038 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4039 {
4040   bitmap_iterator bi;
4041   unsigned iid;
4042
4043   if (!invs)
4044     return;
4045
4046   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4047     {
4048       ivs->n_invariant_uses[iid]--;
4049       if (ivs->n_invariant_uses[iid] == 0)
4050         ivs->n_regs--;
4051     }
4052 }
4053
4054 /* Set USE not to be expressed by any candidate in IVS.  */
4055
4056 static void
4057 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4058                  struct iv_use *use)
4059 {
4060   unsigned uid = use->id, cid;
4061   struct cost_pair *cp;
4062
4063   cp = ivs->cand_for_use[uid];
4064   if (!cp)
4065     return;
4066   cid = cp->cand->id;
4067
4068   ivs->bad_uses++;
4069   ivs->cand_for_use[uid] = NULL;
4070   ivs->n_cand_uses[cid]--;
4071
4072   if (ivs->n_cand_uses[cid] == 0)
4073     {
4074       bitmap_clear_bit (ivs->cands, cid);
4075       /* Do not count the pseudocandidates.  */
4076       if (cp->cand->iv)
4077         ivs->n_regs--;
4078       ivs->n_cands--;
4079       ivs->cand_cost -= cp->cand->cost;
4080
4081       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4082     }
4083
4084   ivs->cand_use_cost -= cp->cost;
4085
4086   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4087   iv_ca_recount_cost (data, ivs);
4088 }
4089
4090 /* Add invariants in set INVS to set IVS.  */
4091
4092 static void
4093 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4094 {
4095   bitmap_iterator bi;
4096   unsigned iid;
4097
4098   if (!invs)
4099     return;
4100
4101   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4102     {
4103       ivs->n_invariant_uses[iid]++;
4104       if (ivs->n_invariant_uses[iid] == 1)
4105         ivs->n_regs++;
4106     }
4107 }
4108
4109 /* Set cost pair for USE in set IVS to CP.  */
4110
4111 static void
4112 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4113               struct iv_use *use, struct cost_pair *cp)
4114 {
4115   unsigned uid = use->id, cid;
4116
4117   if (ivs->cand_for_use[uid] == cp)
4118     return;
4119
4120   if (ivs->cand_for_use[uid])
4121     iv_ca_set_no_cp (data, ivs, use);
4122
4123   if (cp)
4124     {
4125       cid = cp->cand->id;
4126
4127       ivs->bad_uses--;
4128       ivs->cand_for_use[uid] = cp;
4129       ivs->n_cand_uses[cid]++;
4130       if (ivs->n_cand_uses[cid] == 1)
4131         {
4132           bitmap_set_bit (ivs->cands, cid);
4133           /* Do not count the pseudocandidates.  */
4134           if (cp->cand->iv)
4135             ivs->n_regs++;
4136           ivs->n_cands++;
4137           ivs->cand_cost += cp->cand->cost;
4138
4139           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4140         }
4141
4142       ivs->cand_use_cost += cp->cost;
4143       iv_ca_set_add_invariants (ivs, cp->depends_on);
4144       iv_ca_recount_cost (data, ivs);
4145     }
4146 }
4147
4148 /* Extend set IVS by expressing USE by some of the candidates in it
4149    if possible.  */
4150
4151 static void
4152 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4153                struct iv_use *use)
4154 {
4155   struct cost_pair *best_cp = NULL, *cp;
4156   bitmap_iterator bi;
4157   unsigned i;
4158
4159   gcc_assert (ivs->upto >= use->id);
4160
4161   if (ivs->upto == use->id)
4162     {
4163       ivs->upto++;
4164       ivs->bad_uses++;
4165     }
4166
4167   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4168     {
4169       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4170
4171       if (cheaper_cost_pair (cp, best_cp))
4172         best_cp = cp;
4173     }
4174
4175   iv_ca_set_cp (data, ivs, use, best_cp);
4176 }
4177
4178 /* Get cost for assignment IVS.  */
4179
4180 static unsigned
4181 iv_ca_cost (struct iv_ca *ivs)
4182 {
4183   return (ivs->bad_uses ? INFTY : ivs->cost);
4184 }
4185
4186 /* Returns true if all dependences of CP are among invariants in IVS.  */
4187
4188 static bool
4189 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4190 {
4191   unsigned i;
4192   bitmap_iterator bi;
4193
4194   if (!cp->depends_on)
4195     return true;
4196
4197   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4198     {
4199       if (ivs->n_invariant_uses[i] == 0)
4200         return false;
4201     }
4202
4203   return true;
4204 }
4205
4206 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4207    it before NEXT_CHANGE.  */
4208
4209 static struct iv_ca_delta *
4210 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4211                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4212 {
4213   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4214
4215   change->use = use;
4216   change->old_cp = old_cp;
4217   change->new_cp = new_cp;
4218   change->next_change = next_change;
4219
4220   return change;
4221 }
4222
4223 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4224    are rewritten.  */
4225
4226 static struct iv_ca_delta *
4227 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4228 {
4229   struct iv_ca_delta *last;
4230
4231   if (!l2)
4232     return l1;
4233
4234   if (!l1)
4235     return l2;
4236
4237   for (last = l1; last->next_change; last = last->next_change)
4238     continue;
4239   last->next_change = l2;
4240
4241   return l1;
4242 }
4243
4244 /* Returns candidate by that USE is expressed in IVS.  */
4245
4246 static struct cost_pair *
4247 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4248 {
4249   return ivs->cand_for_use[use->id];
4250 }
4251
4252 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4253
4254 static struct iv_ca_delta *
4255 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4256 {
4257   struct iv_ca_delta *act, *next, *prev = NULL;
4258   struct cost_pair *tmp;
4259
4260   for (act = delta; act; act = next)
4261     {
4262       next = act->next_change;
4263       act->next_change = prev;
4264       prev = act;
4265
4266       tmp = act->old_cp;
4267       act->old_cp = act->new_cp;
4268       act->new_cp = tmp;
4269     }
4270
4271   return prev;
4272 }
4273
4274 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4275    reverted instead.  */
4276
4277 static void
4278 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4279                     struct iv_ca_delta *delta, bool forward)
4280 {
4281   struct cost_pair *from, *to;
4282   struct iv_ca_delta *act;
4283
4284   if (!forward)
4285     delta = iv_ca_delta_reverse (delta);
4286
4287   for (act = delta; act; act = act->next_change)
4288     {
4289       from = act->old_cp;
4290       to = act->new_cp;
4291       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4292       iv_ca_set_cp (data, ivs, act->use, to);
4293     }
4294
4295   if (!forward)
4296     iv_ca_delta_reverse (delta);
4297 }
4298
4299 /* Returns true if CAND is used in IVS.  */
4300
4301 static bool
4302 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4303 {
4304   return ivs->n_cand_uses[cand->id] > 0;
4305 }
4306
4307 /* Returns number of induction variable candidates in the set IVS.  */
4308
4309 static unsigned
4310 iv_ca_n_cands (struct iv_ca *ivs)
4311 {
4312   return ivs->n_cands;
4313 }
4314
4315 /* Free the list of changes DELTA.  */
4316
4317 static void
4318 iv_ca_delta_free (struct iv_ca_delta **delta)
4319 {
4320   struct iv_ca_delta *act, *next;
4321
4322   for (act = *delta; act; act = next)
4323     {
4324       next = act->next_change;
4325       free (act);
4326     }
4327
4328   *delta = NULL;
4329 }
4330
4331 /* Allocates new iv candidates assignment.  */
4332
4333 static struct iv_ca *
4334 iv_ca_new (struct ivopts_data *data)
4335 {
4336   struct iv_ca *nw = XNEW (struct iv_ca);
4337
4338   nw->upto = 0;
4339   nw->bad_uses = 0;
4340   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4341   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4342   nw->cands = BITMAP_ALLOC (NULL);
4343   nw->n_cands = 0;
4344   nw->n_regs = 0;
4345   nw->cand_use_cost = 0;
4346   nw->cand_cost = 0;
4347   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4348   nw->cost = 0;
4349
4350   return nw;
4351 }
4352
4353 /* Free memory occupied by the set IVS.  */
4354
4355 static void
4356 iv_ca_free (struct iv_ca **ivs)
4357 {
4358   free ((*ivs)->cand_for_use);
4359   free ((*ivs)->n_cand_uses);
4360   BITMAP_FREE ((*ivs)->cands);
4361   free ((*ivs)->n_invariant_uses);
4362   free (*ivs);
4363   *ivs = NULL;
4364 }
4365
4366 /* Dumps IVS to FILE.  */
4367
4368 static void
4369 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4370 {
4371   const char *pref = "  invariants ";
4372   unsigned i;
4373
4374   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4375   bitmap_print (file, ivs->cands, "  candidates ","\n");
4376
4377   for (i = 1; i <= data->max_inv_id; i++)
4378     if (ivs->n_invariant_uses[i])
4379       {
4380         fprintf (file, "%s%d", pref, i);
4381         pref = ", ";
4382       }
4383   fprintf (file, "\n");
4384 }
4385
4386 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4387    new set, and store differences in DELTA.  Number of induction variables
4388    in the new set is stored to N_IVS.  */
4389
4390 static unsigned
4391 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4392               struct iv_cand *cand, struct iv_ca_delta **delta,
4393               unsigned *n_ivs)
4394 {
4395   unsigned i, cost;
4396   struct iv_use *use;
4397   struct cost_pair *old_cp, *new_cp;
4398
4399   *delta = NULL;
4400   for (i = 0; i < ivs->upto; i++)
4401     {
4402       use = iv_use (data, i);
4403       old_cp = iv_ca_cand_for_use (ivs, use);
4404
4405       if (old_cp
4406           && old_cp->cand == cand)
4407         continue;
4408
4409       new_cp = get_use_iv_cost (data, use, cand);
4410       if (!new_cp)
4411         continue;
4412
4413       if (!iv_ca_has_deps (ivs, new_cp))
4414         continue;
4415
4416       if (!cheaper_cost_pair (new_cp, old_cp))
4417         continue;
4418
4419       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4420     }
4421
4422   iv_ca_delta_commit (data, ivs, *delta, true);
4423   cost = iv_ca_cost (ivs);
4424   if (n_ivs)
4425     *n_ivs = iv_ca_n_cands (ivs);
4426   iv_ca_delta_commit (data, ivs, *delta, false);
4427
4428   return cost;
4429 }
4430
4431 /* Try narrowing set IVS by removing CAND.  Return the cost of
4432    the new set and store the differences in DELTA.  */
4433
4434 static unsigned
4435 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4436               struct iv_cand *cand, struct iv_ca_delta **delta)
4437 {
4438   unsigned i, ci;
4439   struct iv_use *use;
4440   struct cost_pair *old_cp, *new_cp, *cp;
4441   bitmap_iterator bi;
4442   struct iv_cand *cnd;
4443   unsigned cost;
4444
4445   *delta = NULL;
4446   for (i = 0; i < n_iv_uses (data); i++)
4447     {
4448       use = iv_use (data, i);
4449
4450       old_cp = iv_ca_cand_for_use (ivs, use);
4451       if (old_cp->cand != cand)
4452         continue;
4453
4454       new_cp = NULL;
4455
4456       if (data->consider_all_candidates)
4457         {
4458           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4459             {
4460               if (ci == cand->id)
4461                 continue;
4462
4463               cnd = iv_cand (data, ci);
4464
4465               cp = get_use_iv_cost (data, use, cnd);
4466               if (!cp)
4467                 continue;
4468               if (!iv_ca_has_deps (ivs, cp))
4469                 continue;
4470
4471               if (!cheaper_cost_pair (cp, new_cp))
4472                 continue;
4473
4474               new_cp = cp;
4475             }
4476         }
4477       else
4478         {
4479           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4480             {
4481               if (ci == cand->id)
4482                 continue;
4483
4484               cnd = iv_cand (data, ci);
4485
4486               cp = get_use_iv_cost (data, use, cnd);
4487               if (!cp)
4488                 continue;
4489               if (!iv_ca_has_deps (ivs, cp))
4490                 continue;
4491
4492               if (!cheaper_cost_pair (cp, new_cp))
4493                 continue;
4494
4495               new_cp = cp;
4496             }
4497         }
4498
4499       if (!new_cp)
4500         {
4501           iv_ca_delta_free (delta);
4502           return INFTY;
4503         }
4504
4505       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4506     }
4507
4508   iv_ca_delta_commit (data, ivs, *delta, true);
4509   cost = iv_ca_cost (ivs);
4510   iv_ca_delta_commit (data, ivs, *delta, false);
4511
4512   return cost;
4513 }
4514
4515 /* Try optimizing the set of candidates IVS by removing candidates different
4516    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4517    differences in DELTA.  */
4518
4519 static unsigned
4520 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4521              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4522 {
4523   bitmap_iterator bi;
4524   struct iv_ca_delta *act_delta, *best_delta;
4525   unsigned i, best_cost, acost;
4526   struct iv_cand *cand;
4527
4528   best_delta = NULL;
4529   best_cost = iv_ca_cost (ivs);
4530
4531   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4532     {
4533       cand = iv_cand (data, i);
4534
4535       if (cand == except_cand)
4536         continue;
4537
4538       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4539
4540       if (acost < best_cost)
4541         {
4542           best_cost = acost;
4543           iv_ca_delta_free (&best_delta);
4544           best_delta = act_delta;
4545         }
4546       else
4547         iv_ca_delta_free (&act_delta);
4548     }
4549
4550   if (!best_delta)
4551     {
4552       *delta = NULL;
4553       return best_cost;
4554     }
4555
4556   /* Recurse to possibly remove other unnecessary ivs.  */
4557   iv_ca_delta_commit (data, ivs, best_delta, true);
4558   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4559   iv_ca_delta_commit (data, ivs, best_delta, false);
4560   *delta = iv_ca_delta_join (best_delta, *delta);
4561   return best_cost;
4562 }
4563
4564 /* Tries to extend the sets IVS in the best possible way in order
4565    to express the USE.  */
4566
4567 static bool
4568 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4569                   struct iv_use *use)
4570 {
4571   unsigned best_cost, act_cost;
4572   unsigned i;
4573   bitmap_iterator bi;
4574   struct iv_cand *cand;
4575   struct iv_ca_delta *best_delta = NULL, *act_delta;
4576   struct cost_pair *cp;
4577
4578   iv_ca_add_use (data, ivs, use);
4579   best_cost = iv_ca_cost (ivs);
4580
4581   cp = iv_ca_cand_for_use (ivs, use);
4582   if (cp)
4583     {
4584       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4585       iv_ca_set_no_cp (data, ivs, use);
4586     }
4587
4588   /* First try important candidates.  Only if it fails, try the specific ones.
4589      Rationale -- in loops with many variables the best choice often is to use
4590      just one generic biv.  If we added here many ivs specific to the uses,
4591      the optimization algorithm later would be likely to get stuck in a local
4592      minimum, thus causing us to create too many ivs.  The approach from
4593      few ivs to more seems more likely to be successful -- starting from few
4594      ivs, replacing an expensive use by a specific iv should always be a
4595      win.  */
4596   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4597     {
4598       cand = iv_cand (data, i);
4599
4600       if (iv_ca_cand_used_p (ivs, cand))
4601         continue;
4602
4603       cp = get_use_iv_cost (data, use, cand);
4604       if (!cp)
4605         continue;
4606
4607       iv_ca_set_cp (data, ivs, use, cp);
4608       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4609       iv_ca_set_no_cp (data, ivs, use);
4610       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4611
4612       if (act_cost < best_cost)
4613         {
4614           best_cost = act_cost;
4615
4616           iv_ca_delta_free (&best_delta);
4617           best_delta = act_delta;
4618         }
4619       else
4620         iv_ca_delta_free (&act_delta);
4621     }
4622
4623   if (best_cost == INFTY)
4624     {
4625       for (i = 0; i < use->n_map_members; i++)
4626         {
4627           cp = use->cost_map + i;
4628           cand = cp->cand;
4629           if (!cand)
4630             continue;
4631
4632           /* Already tried this.  */
4633           if (cand->important)
4634             continue;
4635
4636           if (iv_ca_cand_used_p (ivs, cand))
4637             continue;
4638
4639           act_delta = NULL;
4640           iv_ca_set_cp (data, ivs, use, cp);
4641           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4642           iv_ca_set_no_cp (data, ivs, use);
4643           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4644                                        cp, act_delta);
4645
4646           if (act_cost < best_cost)
4647             {
4648               best_cost = act_cost;
4649
4650               if (best_delta)
4651                 iv_ca_delta_free (&best_delta);
4652               best_delta = act_delta;
4653             }
4654           else
4655             iv_ca_delta_free (&act_delta);
4656         }
4657     }
4658
4659   iv_ca_delta_commit (data, ivs, best_delta, true);
4660   iv_ca_delta_free (&best_delta);
4661
4662   return (best_cost != INFTY);
4663 }
4664
4665 /* Finds an initial assignment of candidates to uses.  */
4666
4667 static struct iv_ca *
4668 get_initial_solution (struct ivopts_data *data)
4669 {
4670   struct iv_ca *ivs = iv_ca_new (data);
4671   unsigned i;
4672
4673   for (i = 0; i < n_iv_uses (data); i++)
4674     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4675       {
4676         iv_ca_free (&ivs);
4677         return NULL;
4678       }
4679
4680   return ivs;
4681 }
4682
4683 /* Tries to improve set of induction variables IVS.  */
4684
4685 static bool
4686 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4687 {
4688   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4689   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4690   struct iv_cand *cand;
4691
4692   /* Try extending the set of induction variables by one.  */
4693   for (i = 0; i < n_iv_cands (data); i++)
4694     {
4695       cand = iv_cand (data, i);
4696
4697       if (iv_ca_cand_used_p (ivs, cand))
4698         continue;
4699
4700       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4701       if (!act_delta)
4702         continue;
4703
4704       /* If we successfully added the candidate and the set is small enough,
4705          try optimizing it by removing other candidates.  */
4706       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4707         {
4708           iv_ca_delta_commit (data, ivs, act_delta, true);
4709           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4710           iv_ca_delta_commit (data, ivs, act_delta, false);
4711           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4712         }
4713
4714       if (acost < best_cost)
4715         {
4716           best_cost = acost;
4717           iv_ca_delta_free (&best_delta);
4718           best_delta = act_delta;
4719         }
4720       else
4721         iv_ca_delta_free (&act_delta);
4722     }
4723
4724   if (!best_delta)
4725     {
4726       /* Try removing the candidates from the set instead.  */
4727       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4728
4729       /* Nothing more we can do.  */
4730       if (!best_delta)
4731         return false;
4732     }
4733
4734   iv_ca_delta_commit (data, ivs, best_delta, true);
4735   gcc_assert (best_cost == iv_ca_cost (ivs));
4736   iv_ca_delta_free (&best_delta);
4737   return true;
4738 }
4739
4740 /* Attempts to find the optimal set of induction variables.  We do simple
4741    greedy heuristic -- we try to replace at most one candidate in the selected
4742    solution and remove the unused ivs while this improves the cost.  */
4743
4744 static struct iv_ca *
4745 find_optimal_iv_set (struct ivopts_data *data)
4746 {
4747   unsigned i;
4748   struct iv_ca *set;
4749   struct iv_use *use;
4750
4751   /* Get the initial solution.  */
4752   set = get_initial_solution (data);
4753   if (!set)
4754     {
4755       if (dump_file && (dump_flags & TDF_DETAILS))
4756         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4757       return NULL;
4758     }
4759
4760   if (dump_file && (dump_flags & TDF_DETAILS))
4761     {
4762       fprintf (dump_file, "Initial set of candidates:\n");
4763       iv_ca_dump (data, dump_file, set);
4764     }
4765
4766   while (try_improve_iv_set (data, set))
4767     {
4768       if (dump_file && (dump_flags & TDF_DETAILS))
4769         {
4770           fprintf (dump_file, "Improved to:\n");
4771           iv_ca_dump (data, dump_file, set);
4772         }
4773     }
4774
4775   if (dump_file && (dump_flags & TDF_DETAILS))
4776     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4777
4778   for (i = 0; i < n_iv_uses (data); i++)
4779     {
4780       use = iv_use (data, i);
4781       use->selected = iv_ca_cand_for_use (set, use)->cand;
4782     }
4783
4784   return set;
4785 }
4786
4787 /* Creates a new induction variable corresponding to CAND.  */
4788
4789 static void
4790 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4791 {
4792   block_stmt_iterator incr_pos;
4793   tree base;
4794   bool after = false;
4795
4796   if (!cand->iv)
4797     return;
4798
4799   switch (cand->pos)
4800     {
4801     case IP_NORMAL:
4802       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4803       break;
4804
4805     case IP_END:
4806       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4807       after = true;
4808       break;
4809
4810     case IP_ORIGINAL:
4811       /* Mark that the iv is preserved.  */
4812       name_info (data, cand->var_before)->preserve_biv = true;
4813       name_info (data, cand->var_after)->preserve_biv = true;
4814
4815       /* Rewrite the increment so that it uses var_before directly.  */
4816       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4817
4818       return;
4819     }
4820
4821   gimple_add_tmp_var (cand->var_before);
4822   add_referenced_var (cand->var_before);
4823
4824   base = unshare_expr (cand->iv->base);
4825
4826   create_iv (base, unshare_expr (cand->iv->step),
4827              cand->var_before, data->current_loop,
4828              &incr_pos, after, &cand->var_before, &cand->var_after);
4829 }
4830
4831 /* Creates new induction variables described in SET.  */
4832
4833 static void
4834 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4835 {
4836   unsigned i;
4837   struct iv_cand *cand;
4838   bitmap_iterator bi;
4839
4840   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4841     {
4842       cand = iv_cand (data, i);
4843       create_new_iv (data, cand);
4844     }
4845 }
4846
4847 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4848    is true, remove also the ssa name defined by the statement.  */
4849
4850 static void
4851 remove_statement (tree stmt, bool including_defined_name)
4852 {
4853   if (TREE_CODE (stmt) == PHI_NODE)
4854     {
4855       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4856     }
4857   else
4858     {
4859       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4860
4861       bsi_remove (&bsi, true);
4862     }
4863 }
4864
4865 /* Rewrites USE (definition of iv used in a nonlinear expression)
4866    using candidate CAND.  */
4867
4868 static void
4869 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4870                             struct iv_use *use, struct iv_cand *cand)
4871 {
4872   tree comp;
4873   tree op, stmts, tgt, ass;
4874   block_stmt_iterator bsi, pbsi;
4875
4876   /* An important special case -- if we are asked to express value of
4877      the original iv by itself, just exit; there is no need to
4878      introduce a new computation (that might also need casting the
4879      variable to unsigned and back).  */
4880   if (cand->pos == IP_ORIGINAL
4881       && cand->incremented_at == use->stmt)
4882     {
4883       tree step, ctype, utype;
4884       enum tree_code incr_code = PLUS_EXPR;
4885
4886       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4887       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4888
4889       step = cand->iv->step;
4890       ctype = TREE_TYPE (step);
4891       utype = TREE_TYPE (cand->var_after);
4892       if (TREE_CODE (step) == NEGATE_EXPR)
4893         {
4894           incr_code = MINUS_EXPR;
4895           step = TREE_OPERAND (step, 0);
4896         }
4897
4898       /* Check whether we may leave the computation unchanged.
4899          This is the case only if it does not rely on other
4900          computations in the loop -- otherwise, the computation
4901          we rely upon may be removed in remove_unused_ivs,
4902          thus leading to ICE.  */
4903       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4904       if (TREE_CODE (op) == PLUS_EXPR
4905           || TREE_CODE (op) == MINUS_EXPR)
4906         {
4907           if (TREE_OPERAND (op, 0) == cand->var_before)
4908             op = TREE_OPERAND (op, 1);
4909           else if (TREE_CODE (op) == PLUS_EXPR
4910                    && TREE_OPERAND (op, 1) == cand->var_before)
4911             op = TREE_OPERAND (op, 0);
4912           else
4913             op = NULL_TREE;
4914         }
4915       else
4916         op = NULL_TREE;
4917
4918       if (op
4919           && (TREE_CODE (op) == INTEGER_CST
4920               || operand_equal_p (op, step, 0)))
4921         return;
4922
4923       /* Otherwise, add the necessary computations to express
4924          the iv.  */
4925       op = fold_convert (ctype, cand->var_before);
4926       comp = fold_convert (utype,
4927                            build2 (incr_code, ctype, op,
4928                                    unshare_expr (step)));
4929     }
4930   else
4931     {
4932       comp = get_computation (data->current_loop, use, cand);
4933       gcc_assert (comp != NULL_TREE);
4934     }
4935
4936   switch (TREE_CODE (use->stmt))
4937     {
4938     case PHI_NODE:
4939       tgt = PHI_RESULT (use->stmt);
4940
4941       /* If we should keep the biv, do not replace it.  */
4942       if (name_info (data, tgt)->preserve_biv)
4943         return;
4944
4945       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4946       while (!bsi_end_p (pbsi)
4947              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4948         {
4949           bsi = pbsi;
4950           bsi_next (&pbsi);
4951         }
4952       break;
4953
4954     case GIMPLE_MODIFY_STMT:
4955       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4956       bsi = bsi_for_stmt (use->stmt);
4957       break;
4958
4959     default:
4960       gcc_unreachable ();
4961     }
4962
4963   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4964
4965   if (TREE_CODE (use->stmt) == PHI_NODE)
4966     {
4967       if (stmts)
4968         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4969       ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4970       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4971       remove_statement (use->stmt, false);
4972       SSA_NAME_DEF_STMT (tgt) = ass;
4973     }
4974   else
4975     {
4976       if (stmts)
4977         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4978       GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4979     }
4980 }
4981
4982 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4983    for_each_index.  */
4984
4985 static bool
4986 idx_remove_ssa_names (tree base, tree *idx,
4987                       void *data ATTRIBUTE_UNUSED)
4988 {
4989   tree *op;
4990
4991   if (TREE_CODE (*idx) == SSA_NAME)
4992     *idx = SSA_NAME_VAR (*idx);
4993
4994   if (TREE_CODE (base) == ARRAY_REF)
4995     {
4996       op = &TREE_OPERAND (base, 2);
4997       if (*op
4998           && TREE_CODE (*op) == SSA_NAME)
4999         *op = SSA_NAME_VAR (*op);
5000       op = &TREE_OPERAND (base, 3);
5001       if (*op
5002           && TREE_CODE (*op) == SSA_NAME)
5003         *op = SSA_NAME_VAR (*op);
5004     }
5005
5006   return true;
5007 }
5008
5009 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5010
5011 static tree
5012 unshare_and_remove_ssa_names (tree ref)
5013 {
5014   ref = unshare_expr (ref);
5015   for_each_index (&ref, idx_remove_ssa_names, NULL);
5016
5017   return ref;
5018 }
5019
5020 /* Extract the alias analysis info for the memory reference REF.  There are
5021    several ways how this information may be stored and what precisely is
5022    its semantics depending on the type of the reference, but there always is
5023    somewhere hidden one _DECL node that is used to determine the set of
5024    virtual operands for the reference.  The code below deciphers this jungle
5025    and extracts this single useful piece of information.  */
5026
5027 static tree
5028 get_ref_tag (tree ref, tree orig)
5029 {
5030   tree var = get_base_address (ref);
5031   tree aref = NULL_TREE, tag, sv;
5032   HOST_WIDE_INT offset, size, maxsize;
5033
5034   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5035     {
5036       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5037       if (ref)
5038         break;
5039     }
5040
5041   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5042     return unshare_expr (sv);
5043
5044   if (!var)
5045     return NULL_TREE;
5046
5047   if (TREE_CODE (var) == INDIRECT_REF)
5048     {
5049       /* If the base is a dereference of a pointer, first check its name memory
5050          tag.  If it does not have one, use its symbol memory tag.  */
5051       var = TREE_OPERAND (var, 0);
5052       if (TREE_CODE (var) != SSA_NAME)
5053         return NULL_TREE;
5054
5055       if (SSA_NAME_PTR_INFO (var))
5056         {
5057           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5058           if (tag)
5059             return tag;
5060         }
5061
5062       var = SSA_NAME_VAR (var);
5063       tag = symbol_mem_tag (var);
5064       gcc_assert (tag != NULL_TREE);
5065       return tag;
5066     }
5067   else
5068     {
5069       if (!DECL_P (var))
5070         return NULL_TREE;
5071
5072       tag = symbol_mem_tag (var);
5073       if (tag)
5074         return tag;
5075
5076       return var;
5077     }
5078 }
5079
5080 /* Copies the reference information from OLD_REF to NEW_REF.  */
5081
5082 static void
5083 copy_ref_info (tree new_ref, tree old_ref)
5084 {
5085   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5086     copy_mem_ref_info (new_ref, old_ref);
5087   else
5088     {
5089       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5090       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5091     }
5092 }
5093
5094 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5095
5096 static void
5097 rewrite_use_address (struct ivopts_data *data,
5098                      struct iv_use *use, struct iv_cand *cand)
5099 {
5100   aff_tree aff;
5101   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5102   tree ref;
5103   bool ok;
5104
5105   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5106   gcc_assert (ok);
5107   unshare_aff_combination (&aff);
5108
5109   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5110   copy_ref_info (ref, *use->op_p);
5111   *use->op_p = ref;
5112 }
5113
5114 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5115    candidate CAND.  */
5116
5117 static void
5118 rewrite_use_compare (struct ivopts_data *data,
5119                      struct iv_use *use, struct iv_cand *cand)
5120 {
5121   tree comp;
5122   tree *op_p, cond, op, stmts, bound;
5123   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5124   enum tree_code compare;
5125   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5126
5127   bound = cp->value;
5128   if (bound)
5129     {
5130       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5131       tree var_type = TREE_TYPE (var);
5132
5133       compare = iv_elimination_compare (data, use);
5134       bound = fold_convert (var_type, bound);
5135       op = force_gimple_operand (unshare_expr (bound), &stmts,
5136                                  true, NULL_TREE);
5137
5138       if (stmts)
5139         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5140
5141       *use->op_p = build2 (compare, boolean_type_node, var, op);
5142       update_stmt (use->stmt);
5143       return;
5144     }
5145
5146   /* The induction variable elimination failed; just express the original
5147      giv.  */
5148   comp = get_computation (data->current_loop, use, cand);
5149   gcc_assert (comp != NULL_TREE);
5150
5151   cond = *use->op_p;
5152   op_p = &TREE_OPERAND (cond, 0);
5153   if (TREE_CODE (*op_p) != SSA_NAME
5154       || null_or_integer_zerop (get_iv (data, *op_p)->step))
5155     op_p = &TREE_OPERAND (cond, 1);
5156
5157   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5158   if (stmts)
5159     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5160
5161   *op_p = op;
5162 }
5163
5164 /* Rewrites USE using candidate CAND.  */
5165
5166 static void
5167 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5168 {
5169   push_stmt_changes (&use->stmt);
5170
5171   switch (use->type)
5172     {
5173       case USE_NONLINEAR_EXPR:
5174         rewrite_use_nonlinear_expr (data, use, cand);
5175         break;
5176
5177       case USE_ADDRESS:
5178         rewrite_use_address (data, use, cand);
5179         break;
5180
5181       case USE_COMPARE:
5182         rewrite_use_compare (data, use, cand);
5183         break;
5184
5185       default:
5186         gcc_unreachable ();
5187     }
5188
5189   pop_stmt_changes (&use->stmt);
5190 }
5191
5192 /* Rewrite the uses using the selected induction variables.  */
5193
5194 static void
5195 rewrite_uses (struct ivopts_data *data)
5196 {
5197   unsigned i;
5198   struct iv_cand *cand;
5199   struct iv_use *use;
5200
5201   for (i = 0; i < n_iv_uses (data); i++)
5202     {
5203       use = iv_use (data, i);
5204       cand = use->selected;
5205       gcc_assert (cand);
5206
5207       rewrite_use (data, use, cand);
5208     }
5209 }
5210
5211 /* Removes the ivs that are not used after rewriting.  */
5212
5213 static void
5214 remove_unused_ivs (struct ivopts_data *data)
5215 {
5216   unsigned j;
5217   bitmap_iterator bi;
5218
5219   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5220     {
5221       struct version_info *info;
5222
5223       info = ver_info (data, j);
5224       if (info->iv
5225           && !null_or_integer_zerop (info->iv->step)
5226           && !info->inv_id
5227           && !info->iv->have_use_for
5228           && !info->preserve_biv)
5229         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5230     }
5231 }
5232
5233 /* Frees data allocated by the optimization of a single loop.  */
5234
5235 static void
5236 free_loop_data (struct ivopts_data *data)
5237 {
5238   unsigned i, j;
5239   bitmap_iterator bi;
5240   tree obj;
5241
5242   htab_empty (data->niters);
5243
5244   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5245     {
5246       struct version_info *info;
5247
5248       info = ver_info (data, i);
5249       if (info->iv)
5250         free (info->iv);
5251       info->iv = NULL;
5252       info->has_nonlin_use = false;
5253       info->preserve_biv = false;
5254       info->inv_id = 0;
5255     }
5256   bitmap_clear (data->relevant);
5257   bitmap_clear (data->important_candidates);
5258
5259   for (i = 0; i < n_iv_uses (data); i++)
5260     {
5261       struct iv_use *use = iv_use (data, i);
5262
5263       free (use->iv);
5264       BITMAP_FREE (use->related_cands);
5265       for (j = 0; j < use->n_map_members; j++)
5266         if (use->cost_map[j].depends_on)
5267           BITMAP_FREE (use->cost_map[j].depends_on);
5268       free (use->cost_map);
5269       free (use);
5270     }
5271   VEC_truncate (iv_use_p, data->iv_uses, 0);
5272
5273   for (i = 0; i < n_iv_cands (data); i++)
5274     {
5275       struct iv_cand *cand = iv_cand (data, i);
5276
5277       if (cand->iv)
5278         free (cand->iv);
5279       if (cand->depends_on)
5280         BITMAP_FREE (cand->depends_on);
5281       free (cand);
5282     }
5283   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5284
5285   if (data->version_info_size < num_ssa_names)
5286     {
5287       data->version_info_size = 2 * num_ssa_names;
5288       free (data->version_info);
5289       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5290     }
5291
5292   data->max_inv_id = 0;
5293
5294   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5295     SET_DECL_RTL (obj, NULL_RTX);
5296
5297   VEC_truncate (tree, decl_rtl_to_reset, 0);
5298 }
5299
5300 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5301    loop tree.  */
5302
5303 static void
5304 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5305 {
5306   free_loop_data (data);
5307   free (data->version_info);
5308   BITMAP_FREE (data->relevant);
5309   BITMAP_FREE (data->important_candidates);
5310   htab_delete (data->niters);
5311
5312   VEC_free (tree, heap, decl_rtl_to_reset);
5313   VEC_free (iv_use_p, heap, data->iv_uses);
5314   VEC_free (iv_cand_p, heap, data->iv_candidates);
5315 }
5316
5317 /* Optimizes the LOOP.  Returns true if anything changed.  */
5318
5319 static bool
5320 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5321 {
5322   bool changed = false;
5323   struct iv_ca *iv_ca;
5324   edge exit;
5325
5326   data->current_loop = loop;
5327
5328   if (dump_file && (dump_flags & TDF_DETAILS))
5329     {
5330       fprintf (dump_file, "Processing loop %d\n", loop->num);
5331
5332       exit = single_dom_exit (loop);
5333       if (exit)
5334         {
5335           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5336                    exit->src->index, exit->dest->index);
5337           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5338           fprintf (dump_file, "\n");
5339         }
5340
5341       fprintf (dump_file, "\n");
5342     }
5343
5344   /* For each ssa name determines whether it behaves as an induction variable
5345      in some loop.  */
5346   if (!find_induction_variables (data))
5347     goto finish;
5348
5349   /* Finds interesting uses (item 1).  */
5350   find_interesting_uses (data);
5351   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5352     goto finish;
5353
5354   /* Finds candidates for the induction variables (item 2).  */
5355   find_iv_candidates (data);
5356
5357   /* Calculates the costs (item 3, part 1).  */
5358   determine_use_iv_costs (data);
5359   determine_iv_costs (data);
5360   determine_set_costs (data);
5361
5362   /* Find the optimal set of induction variables (item 3, part 2).  */
5363   iv_ca = find_optimal_iv_set (data);
5364   if (!iv_ca)
5365     goto finish;
5366   changed = true;
5367
5368   /* Create the new induction variables (item 4, part 1).  */
5369   create_new_ivs (data, iv_ca);
5370   iv_ca_free (&iv_ca);
5371
5372   /* Rewrite the uses (item 4, part 2).  */
5373   rewrite_uses (data);
5374
5375   /* Remove the ivs that are unused after rewriting.  */
5376   remove_unused_ivs (data);
5377
5378   /* We have changed the structure of induction variables; it might happen
5379      that definitions in the scev database refer to some of them that were
5380      eliminated.  */
5381   scev_reset ();
5382
5383 finish:
5384   free_loop_data (data);
5385
5386   return changed;
5387 }
5388
5389 /* Main entry point.  Optimizes induction variables in loops.  */
5390
5391 void
5392 tree_ssa_iv_optimize (void)
5393 {
5394   struct loop *loop;
5395   struct ivopts_data data;
5396   loop_iterator li;
5397
5398   tree_ssa_iv_optimize_init (&data);
5399
5400   /* Optimize the loops starting with the innermost ones.  */
5401   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5402     {
5403       if (dump_file && (dump_flags & TDF_DETAILS))
5404         flow_loop_dump (loop, dump_file, NULL, 1);
5405
5406       tree_ssa_iv_optimize_loop (&data, loop);
5407     }
5408
5409   tree_ssa_iv_optimize_finalize (&data);
5410 }