gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93
  94 /* The infinite cost.  */
  95 #define INFTY 10000000
  96
  97 /* The expected number of loop iterations.  TODO -- use profiling instead of
  98    this.  */
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101
 102 /* Representation of the induction variable.  */
 103 struct iv
 104 {
 105   tree base;            /* Initial value of the iv.  */
 106   tree base_object;     /* A memory object to that the induction variable points.  */
 107   tree step;            /* Step of the iv (constant only).  */
 108   tree ssa_name;        /* The ssa name with the value.  */
 109   bool biv_p;           /* Is it a biv?  */
 110   bool have_use_for;    /* Do we already have a use for it?  */
 111   unsigned use_id;      /* The identifier in the use if it is the case.  */
 112 };
 113
 114 /* Per-ssa version information (induction variable descriptions, etc.).  */
 115 struct version_info
 116 {
 117   tree name;            /* The ssa name.  */
 118   struct iv *iv;        /* Induction variable description.  */
 119   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 120                            an expression that is not an induction variable.  */
 121   unsigned inv_id;      /* Id of an invariant.  */
 122   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 123 };
 124
 125 /* Types of uses.  */
 126 enum use_type
 127 {
 128   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 129   USE_ADDRESS,          /* Use in an address.  */
 130   USE_COMPARE           /* Use is a compare.  */
 131 };
 132
 133 /* The candidate - cost pair.  */
 134 struct cost_pair
 135 {
 136   struct iv_cand *cand; /* The candidate.  */
 137   unsigned cost;        /* The cost.  */
 138   bitmap depends_on;    /* The list of invariants that have to be
 139                            preserved.  */
 140   tree value;           /* For final value elimination, the expression for
 141                            the final value of the iv.  For iv elimination,
 142                            the new bound to compare with.  */
 143 };
 144
 145 /* Use.  */
 146 struct iv_use
 147 {
 148   unsigned id;          /* The id of the use.  */
 149   enum use_type type;   /* Type of the use.  */
 150   struct iv *iv;        /* The induction variable it is based on.  */
 151   tree stmt;            /* Statement in that it occurs.  */
 152   tree *op_p;           /* The place where it occurs.  */
 153   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 154                            important ones.  */
 155
 156   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 157   struct cost_pair *cost_map;
 158                         /* The costs wrto the iv candidates.  */
 159
 160   struct iv_cand *selected;
 161                         /* The selected candidate.  */
 162 };
 163
 164 /* The position where the iv is computed.  */
 165 enum iv_position
 166 {
 167   IP_NORMAL,            /* At the end, just before the exit condition.  */
 168   IP_END,               /* At the end of the latch block.  */
 169   IP_ORIGINAL           /* The original biv.  */
 170 };
 171
 172 /* The induction variable candidate.  */
 173 struct iv_cand
 174 {
 175   unsigned id;          /* The number of the candidate.  */
 176   bool important;       /* Whether this is an "important" candidate, i.e. such
 177                            that it should be considered by all uses.  */
 178   enum iv_position pos; /* Where it is computed.  */
 179   tree incremented_at;  /* For original biv, the statement where it is
 180                            incremented.  */
 181   tree var_before;      /* The variable used for it before increment.  */
 182   tree var_after;       /* The variable used for it after increment.  */
 183   struct iv *iv;        /* The value of the candidate.  NULL for
 184                            "pseudocandidate" used to indicate the possibility
 185                            to replace the final value of an iv by direct
 186                            computation of the value.  */
 187   unsigned cost;        /* Cost of the candidate.  */
 188   bitmap depends_on;    /* The list of invariants that are used in step of the
 189                            biv.  */
 190 };
 191
 192 /* The data used by the induction variable optimizations.  */
 193
 194 typedef struct iv_use *iv_use_p;
 195 DEF_VEC_P(iv_use_p);
 196 DEF_VEC_ALLOC_P(iv_use_p,heap);
 197
 198 typedef struct iv_cand *iv_cand_p;
 199 DEF_VEC_P(iv_cand_p);
 200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 201
 202 struct ivopts_data
 203 {
 204   /* The currently optimized loop.  */
 205   struct loop *current_loop;
 206
 207   /* Number of registers used in it.  */
 208   unsigned regs_used;
 209
 210   /* Numbers of iterations for all exits of the current loop.  */
 211   htab_t niters;
 212
 213   /* The size of version_info array allocated.  */
 214   unsigned version_info_size;
 215
 216   /* The array of information for the ssa names.  */
 217   struct version_info *version_info;
 218
 219   /* The bitmap of indices in version_info whose value was changed.  */
 220   bitmap relevant;
 221
 222   /* The maximum invariant id.  */
 223   unsigned max_inv_id;
 224
 225   /* The uses of induction variables.  */
 226   VEC(iv_use_p,heap) *iv_uses;
 227
 228   /* The candidates.  */
 229   VEC(iv_cand_p,heap) *iv_candidates;
 230
 231   /* A bitmap of important candidates.  */
 232   bitmap important_candidates;
 233
 234   /* Whether to consider just related and important candidates when replacing a
 235      use.  */
 236   bool consider_all_candidates;
 237 };
 238
 239 /* An assignment of iv candidates to uses.  */
 240
 241 struct iv_ca
 242 {
 243   /* The number of uses covered by the assignment.  */
 244   unsigned upto;
 245
 246   /* Number of uses that cannot be expressed by the candidates in the set.  */
 247   unsigned bad_uses;
 248
 249   /* Candidate assigned to a use, together with the related costs.  */
 250   struct cost_pair **cand_for_use;
 251
 252   /* Number of times each candidate is used.  */
 253   unsigned *n_cand_uses;
 254
 255   /* The candidates used.  */
 256   bitmap cands;
 257
 258   /* The number of candidates in the set.  */
 259   unsigned n_cands;
 260
 261   /* Total number of registers needed.  */
 262   unsigned n_regs;
 263
 264   /* Total cost of expressing uses.  */
 265   unsigned cand_use_cost;
 266
 267   /* Total cost of candidates.  */
 268   unsigned cand_cost;
 269
 270   /* Number of times each invariant is used.  */
 271   unsigned *n_invariant_uses;
 272
 273   /* Total cost of the assignment.  */
 274   unsigned cost;
 275 };
 276
 277 /* Difference of two iv candidate assignments.  */
 278
 279 struct iv_ca_delta
 280 {
 281   /* Changed use.  */
 282   struct iv_use *use;
 283
 284   /* An old assignment (for rollback purposes).  */
 285   struct cost_pair *old_cp;
 286
 287   /* A new assignment.  */
 288   struct cost_pair *new_cp;
 289
 290   /* Next change in the list.  */
 291   struct iv_ca_delta *next_change;
 292 };
 293
 294 /* Bound on number of candidates below that all candidates are considered.  */
 295
 296 #define CONSIDER_ALL_CANDIDATES_BOUND \
 297   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 298
 299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 300    optimizing such a loop would help, and it would take ages).  */
 301
 302 #define MAX_CONSIDERED_USES \
 303   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 304
 305 /* If there are at most this number of ivs in the set, try removing unnecessary
 306    ivs from the set always.  */
 307
 308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 309   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 310
 311 /* The list of trees for that the decl_rtl field must be reset is stored
 312    here.  */
 313
 314 static VEC(tree,heap) *decl_rtl_to_reset;
 315
 316 /* Number of uses recorded in DATA.  */
 317
 318 static inline unsigned
 319 n_iv_uses (struct ivopts_data *data)
 320 {
 321   return VEC_length (iv_use_p, data->iv_uses);
 322 }
 323
 324 /* Ith use recorded in DATA.  */
 325
 326 static inline struct iv_use *
 327 iv_use (struct ivopts_data *data, unsigned i)
 328 {
 329   return VEC_index (iv_use_p, data->iv_uses, i);
 330 }
 331
 332 /* Number of candidates recorded in DATA.  */
 333
 334 static inline unsigned
 335 n_iv_cands (struct ivopts_data *data)
 336 {
 337   return VEC_length (iv_cand_p, data->iv_candidates);
 338 }
 339
 340 /* Ith candidate recorded in DATA.  */
 341
 342 static inline struct iv_cand *
 343 iv_cand (struct ivopts_data *data, unsigned i)
 344 {
 345   return VEC_index (iv_cand_p, data->iv_candidates, i);
 346 }
 347
 348 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 349
 350 edge
 351 single_dom_exit (struct loop *loop)
 352 {
 353   edge exit = single_exit (loop);
 354
 355   if (!exit)
 356     return NULL;
 357
 358   if (!just_once_each_iteration_p (loop, exit->src))
 359     return NULL;
 360
 361   return exit;
 362 }
 363
 364 /* Dumps information about the induction variable IV to FILE.  */
 365
 366 extern void dump_iv (FILE *, struct iv *);
 367 void
 368 dump_iv (FILE *file, struct iv *iv)
 369 {
 370   if (iv->ssa_name)
 371     {
 372       fprintf (file, "ssa name ");
 373       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 374       fprintf (file, "\n");
 375     }
 376
 377   fprintf (file, "  type ");
 378   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 379   fprintf (file, "\n");
 380
 381   if (iv->step)
 382     {
 383       fprintf (file, "  base ");
 384       print_generic_expr (file, iv->base, TDF_SLIM);
 385       fprintf (file, "\n");
 386
 387       fprintf (file, "  step ");
 388       print_generic_expr (file, iv->step, TDF_SLIM);
 389       fprintf (file, "\n");
 390     }
 391   else
 392     {
 393       fprintf (file, "  invariant ");
 394       print_generic_expr (file, iv->base, TDF_SLIM);
 395       fprintf (file, "\n");
 396     }
 397
 398   if (iv->base_object)
 399     {
 400       fprintf (file, "  base object ");
 401       print_generic_expr (file, iv->base_object, TDF_SLIM);
 402       fprintf (file, "\n");
 403     }
 404
 405   if (iv->biv_p)
 406     fprintf (file, "  is a biv\n");
 407 }
 408
 409 /* Dumps information about the USE to FILE.  */
 410
 411 extern void dump_use (FILE *, struct iv_use *);
 412 void
 413 dump_use (FILE *file, struct iv_use *use)
 414 {
 415   fprintf (file, "use %d\n", use->id);
 416
 417   switch (use->type)
 418     {
 419     case USE_NONLINEAR_EXPR:
 420       fprintf (file, "  generic\n");
 421       break;
 422
 423     case USE_ADDRESS:
 424       fprintf (file, "  address\n");
 425       break;
 426
 427     case USE_COMPARE:
 428       fprintf (file, "  compare\n");
 429       break;
 430
 431     default:
 432       gcc_unreachable ();
 433     }
 434
 435   fprintf (file, "  in statement ");
 436   print_generic_expr (file, use->stmt, TDF_SLIM);
 437   fprintf (file, "\n");
 438
 439   fprintf (file, "  at position ");
 440   if (use->op_p)
 441     print_generic_expr (file, *use->op_p, TDF_SLIM);
 442   fprintf (file, "\n");
 443
 444   dump_iv (file, use->iv);
 445
 446   if (use->related_cands)
 447     {
 448       fprintf (file, "  related candidates ");
 449       dump_bitmap (file, use->related_cands);
 450     }
 451 }
 452
 453 /* Dumps information about the uses to FILE.  */
 454
 455 extern void dump_uses (FILE *, struct ivopts_data *);
 456 void
 457 dump_uses (FILE *file, struct ivopts_data *data)
 458 {
 459   unsigned i;
 460   struct iv_use *use;
 461
 462   for (i = 0; i < n_iv_uses (data); i++)
 463     {
 464       use = iv_use (data, i);
 465
 466       dump_use (file, use);
 467       fprintf (file, "\n");
 468     }
 469 }
 470
 471 /* Dumps information about induction variable candidate CAND to FILE.  */
 472
 473 extern void dump_cand (FILE *, struct iv_cand *);
 474 void
 475 dump_cand (FILE *file, struct iv_cand *cand)
 476 {
 477   struct iv *iv = cand->iv;
 478
 479   fprintf (file, "candidate %d%s\n",
 480            cand->id, cand->important ? " (important)" : "");
 481
 482   if (cand->depends_on)
 483     {
 484       fprintf (file, "  depends on ");
 485       dump_bitmap (file, cand->depends_on);
 486     }
 487
 488   if (!iv)
 489     {
 490       fprintf (file, "  final value replacement\n");
 491       return;
 492     }
 493
 494   switch (cand->pos)
 495     {
 496     case IP_NORMAL:
 497       fprintf (file, "  incremented before exit test\n");
 498       break;
 499
 500     case IP_END:
 501       fprintf (file, "  incremented at end\n");
 502       break;
 503
 504     case IP_ORIGINAL:
 505       fprintf (file, "  original biv\n");
 506       break;
 507     }
 508
 509   dump_iv (file, iv);
 510 }
 511
 512 /* Returns the info for ssa version VER.  */
 513
 514 static inline struct version_info *
 515 ver_info (struct ivopts_data *data, unsigned ver)
 516 {
 517   return data->version_info + ver;
 518 }
 519
 520 /* Returns the info for ssa name NAME.  */
 521
 522 static inline struct version_info *
 523 name_info (struct ivopts_data *data, tree name)
 524 {
 525   return ver_info (data, SSA_NAME_VERSION (name));
 526 }
 527
 528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 529    emitted in LOOP.  */
 530
 531 static bool
 532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 533 {
 534   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 535
 536   gcc_assert (bb);
 537
 538   if (sbb == loop->latch)
 539     return true;
 540
 541   if (sbb != bb)
 542     return false;
 543
 544   return stmt == last_stmt (bb);
 545 }
 546
 547 /* Returns true if STMT if after the place where the original induction
 548    variable CAND is incremented.  */
 549
 550 static bool
 551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 552 {
 553   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 554   basic_block stmt_bb = bb_for_stmt (stmt);
 555   block_stmt_iterator bsi;
 556
 557   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 558     return false;
 559
 560   if (stmt_bb != cand_bb)
 561     return true;
 562
 563   /* Scan the block from the end, since the original ivs are usually
 564      incremented at the end of the loop body.  */
 565   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 566     {
 567       if (bsi_stmt (bsi) == cand->incremented_at)
 568         return false;
 569       if (bsi_stmt (bsi) == stmt)
 570         return true;
 571     }
 572 }
 573
 574 /* Returns true if STMT if after the place where the induction variable
 575    CAND is incremented in LOOP.  */
 576
 577 static bool
 578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 579 {
 580   switch (cand->pos)
 581     {
 582     case IP_END:
 583       return false;
 584
 585     case IP_NORMAL:
 586       return stmt_after_ip_normal_pos (loop, stmt);
 587
 588     case IP_ORIGINAL:
 589       return stmt_after_ip_original_pos (cand, stmt);
 590
 591     default:
 592       gcc_unreachable ();
 593     }
 594 }
 595
 596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 597
 598 static bool
 599 abnormal_ssa_name_p (tree exp)
 600 {
 601   if (!exp)
 602     return false;
 603
 604   if (TREE_CODE (exp) != SSA_NAME)
 605     return false;
 606
 607   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 608 }
 609
 610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 611    abnormal phi node.  Callback for for_each_index.  */
 612
 613 static bool
 614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 615                                   void *data ATTRIBUTE_UNUSED)
 616 {
 617   if (TREE_CODE (base) == ARRAY_REF)
 618     {
 619       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 620         return false;
 621       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 622         return false;
 623     }
 624
 625   return !abnormal_ssa_name_p (*index);
 626 }
 627
 628 /* Returns true if EXPR contains a ssa name that occurs in an
 629    abnormal phi node.  */
 630
 631 bool
 632 contains_abnormal_ssa_name_p (tree expr)
 633 {
 634   enum tree_code code;
 635   enum tree_code_class class;
 636
 637   if (!expr)
 638     return false;
 639
 640   code = TREE_CODE (expr);
 641   class = TREE_CODE_CLASS (code);
 642
 643   if (code == SSA_NAME)
 644     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 645
 646   if (code == INTEGER_CST
 647       || is_gimple_min_invariant (expr))
 648     return false;
 649
 650   if (code == ADDR_EXPR)
 651     return !for_each_index (&TREE_OPERAND (expr, 0),
 652                             idx_contains_abnormal_ssa_name_p,
 653                             NULL);
 654
 655   switch (class)
 656     {
 657     case tcc_binary:
 658     case tcc_comparison:
 659       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 660         return true;
 661
 662       /* Fallthru.  */
 663     case tcc_unary:
 664       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 665         return true;
 666
 667       break;
 668
 669     default:
 670       gcc_unreachable ();
 671     }
 672
 673   return false;
 674 }
 675
 676 /* Element of the table in that we cache the numbers of iterations obtained
 677    from exits of the loop.  */
 678
 679 struct nfe_cache_elt
 680 {
 681   /* The edge for that the number of iterations is cached.  */
 682   edge exit;
 683
 684   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 685      determined.  */
 686   tree niter;
 687 };
 688
 689 /* Hash function for nfe_cache_elt E.  */
 690
 691 static hashval_t
 692 nfe_hash (const void *e)
 693 {
 694   const struct nfe_cache_elt *elt = e;
 695
 696   return htab_hash_pointer (elt->exit);
 697 }
 698
 699 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 700
 701 static int
 702 nfe_eq (const void *e1, const void *e2)
 703 {
 704   const struct nfe_cache_elt *elt1 = e1;
 705
 706   return elt1->exit == e2;
 707 }
 708
 709 /*  Returns tree describing number of iterations determined from
 710     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 711
 712 static tree
 713 niter_for_exit (struct ivopts_data *data, edge exit)
 714 {
 715   struct nfe_cache_elt *nfe_desc;
 716   struct tree_niter_desc desc;
 717   PTR *slot;
 718
 719   slot = htab_find_slot_with_hash (data->niters, exit,
 720                                    htab_hash_pointer (exit),
 721                                    INSERT);
 722
 723   if (!*slot)
 724     {
 725       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 726       nfe_desc->exit = exit;
 727
 728       /* Try to determine number of iterations.  We must know it
 729          unconditionally (i.e., without possibility of # of iterations
 730          being zero).  Also, we cannot safely work with ssa names that
 731          appear in phi nodes on abnormal edges, so that we do not create
 732          overlapping life ranges for them (PR 27283).  */
 733       if (number_of_iterations_exit (data->current_loop,
 734                                      exit, &desc, true)
 735           && zero_p (desc.may_be_zero)
 736           && !contains_abnormal_ssa_name_p (desc.niter))
 737         nfe_desc->niter = desc.niter;
 738       else
 739         nfe_desc->niter = NULL_TREE;
 740     }
 741   else
 742     nfe_desc = *slot;
 743
 744   return nfe_desc->niter;
 745 }
 746
 747 /* Returns tree describing number of iterations determined from
 748    single dominating exit of DATA->current_loop, or NULL if something
 749    goes wrong.  */
 750
 751 static tree
 752 niter_for_single_dom_exit (struct ivopts_data *data)
 753 {
 754   edge exit = single_dom_exit (data->current_loop);
 755
 756   if (!exit)
 757     return NULL;
 758
 759   return niter_for_exit (data, exit);
 760 }
 761
 762 /* Initializes data structures used by the iv optimization pass, stored
 763    in DATA.  */
 764
 765 static void
 766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 767 {
 768   data->version_info_size = 2 * num_ssa_names;
 769   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 770   data->relevant = BITMAP_ALLOC (NULL);
 771   data->important_candidates = BITMAP_ALLOC (NULL);
 772   data->max_inv_id = 0;
 773   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 774   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 775   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 776   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 777 }
 778
 779 /* Returns a memory object to that EXPR points.  In case we are able to
 780    determine that it does not point to any such object, NULL is returned.  */
 781
 782 static tree
 783 determine_base_object (tree expr)
 784 {
 785   enum tree_code code = TREE_CODE (expr);
 786   tree base, obj, op0, op1;
 787
 788   /* If this is a pointer casted to any type, we need to determine
 789      the base object for the pointer; so handle conversions before
 790      throwing away non-pointer expressions.  */
 791   if (TREE_CODE (expr) == NOP_EXPR
 792       || TREE_CODE (expr) == CONVERT_EXPR)
 793     return determine_base_object (TREE_OPERAND (expr, 0));
 794
 795   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 796     return NULL_TREE;
 797
 798   switch (code)
 799     {
 800     case INTEGER_CST:
 801       return NULL_TREE;
 802
 803     case ADDR_EXPR:
 804       obj = TREE_OPERAND (expr, 0);
 805       base = get_base_address (obj);
 806
 807       if (!base)
 808         return expr;
 809
 810       if (TREE_CODE (base) == INDIRECT_REF)
 811         return determine_base_object (TREE_OPERAND (base, 0));
 812
 813       return fold_convert (ptr_type_node,
 814                            build_fold_addr_expr (base));
 815
 816     case PLUS_EXPR:
 817     case MINUS_EXPR:
 818       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 819       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 820
 821       if (!op1)
 822         return op0;
 823
 824       if (!op0)
 825         return (code == PLUS_EXPR
 826                 ? op1
 827                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 828
 829       return fold_build2 (code, ptr_type_node, op0, op1);
 830
 831     default:
 832       return fold_convert (ptr_type_node, expr);
 833     }
 834 }
 835
 836 /* Allocates an induction variable with given initial value BASE and step STEP
 837    for loop LOOP.  */
 838
 839 static struct iv *
 840 alloc_iv (tree base, tree step)
 841 {
 842   struct iv *iv = XCNEW (struct iv);
 843
 844   if (step && integer_zerop (step))
 845     step = NULL_TREE;
 846
 847   iv->base = base;
 848   iv->base_object = determine_base_object (base);
 849   iv->step = step;
 850   iv->biv_p = false;
 851   iv->have_use_for = false;
 852   iv->use_id = 0;
 853   iv->ssa_name = NULL_TREE;
 854
 855   return iv;
 856 }
 857
 858 /* Sets STEP and BASE for induction variable IV.  */
 859
 860 static void
 861 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 862 {
 863   struct version_info *info = name_info (data, iv);
 864
 865   gcc_assert (!info->iv);
 866
 867   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 868   info->iv = alloc_iv (base, step);
 869   info->iv->ssa_name = iv;
 870 }
 871
 872 /* Finds induction variable declaration for VAR.  */
 873
 874 static struct iv *
 875 get_iv (struct ivopts_data *data, tree var)
 876 {
 877   basic_block bb;
 878
 879   if (!name_info (data, var)->iv)
 880     {
 881       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 882
 883       if (!bb
 884           || !flow_bb_inside_loop_p (data->current_loop, bb))
 885         set_iv (data, var, var, NULL_TREE);
 886     }
 887
 888   return name_info (data, var)->iv;
 889 }
 890
 891 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 892    not define a simple affine biv with nonzero step.  */
 893
 894 static tree
 895 determine_biv_step (tree phi)
 896 {
 897   struct loop *loop = bb_for_stmt (phi)->loop_father;
 898   tree name = PHI_RESULT (phi);
 899   affine_iv iv;
 900
 901   if (!is_gimple_reg (name))
 902     return NULL_TREE;
 903
 904   if (!simple_iv (loop, phi, name, &iv, true))
 905     return NULL_TREE;
 906
 907   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 908 }
 909
 910 /* Finds basic ivs.  */
 911
 912 static bool
 913 find_bivs (struct ivopts_data *data)
 914 {
 915   tree phi, step, type, base;
 916   bool found = false;
 917   struct loop *loop = data->current_loop;
 918
 919   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 920     {
 921       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 922         continue;
 923
 924       step = determine_biv_step (phi);
 925       if (!step)
 926         continue;
 927
 928       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 929       base = expand_simple_operations (base);
 930       if (contains_abnormal_ssa_name_p (base)
 931           || contains_abnormal_ssa_name_p (step))
 932         continue;
 933
 934       type = TREE_TYPE (PHI_RESULT (phi));
 935       base = fold_convert (type, base);
 936       if (step)
 937         step = fold_convert (type, step);
 938
 939       set_iv (data, PHI_RESULT (phi), base, step);
 940       found = true;
 941     }
 942
 943   return found;
 944 }
 945
 946 /* Marks basic ivs.  */
 947
 948 static void
 949 mark_bivs (struct ivopts_data *data)
 950 {
 951   tree phi, var;
 952   struct iv *iv, *incr_iv;
 953   struct loop *loop = data->current_loop;
 954   basic_block incr_bb;
 955
 956   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 957     {
 958       iv = get_iv (data, PHI_RESULT (phi));
 959       if (!iv)
 960         continue;
 961
 962       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 963       incr_iv = get_iv (data, var);
 964       if (!incr_iv)
 965         continue;
 966
 967       /* If the increment is in the subloop, ignore it.  */
 968       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 969       if (incr_bb->loop_father != data->current_loop
 970           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 971         continue;
 972
 973       iv->biv_p = true;
 974       incr_iv->biv_p = true;
 975     }
 976 }
 977
 978 /* Checks whether STMT defines a linear induction variable and stores its
 979    parameters to IV.  */
 980
 981 static bool
 982 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 983 {
 984   tree lhs;
 985   struct loop *loop = data->current_loop;
 986
 987   iv->base = NULL_TREE;
 988   iv->step = NULL_TREE;
 989
 990   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 991     return false;
 992
 993   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 994   if (TREE_CODE (lhs) != SSA_NAME)
 995     return false;
 996
 997   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 998     return false;
 999   iv->base = expand_simple_operations (iv->base);
1000
1001   if (contains_abnormal_ssa_name_p (iv->base)
1002       || contains_abnormal_ssa_name_p (iv->step))
1003     return false;
1004
1005   return true;
1006 }
1007
1008 /* Finds general ivs in statement STMT.  */
1009
1010 static void
1011 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1012 {
1013   affine_iv iv;
1014
1015   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1016     return;
1017
1018   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1019 }
1020
1021 /* Finds general ivs in basic block BB.  */
1022
1023 static void
1024 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1025 {
1026   block_stmt_iterator bsi;
1027
1028   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1029     find_givs_in_stmt (data, bsi_stmt (bsi));
1030 }
1031
1032 /* Finds general ivs.  */
1033
1034 static void
1035 find_givs (struct ivopts_data *data)
1036 {
1037   struct loop *loop = data->current_loop;
1038   basic_block *body = get_loop_body_in_dom_order (loop);
1039   unsigned i;
1040
1041   for (i = 0; i < loop->num_nodes; i++)
1042     find_givs_in_bb (data, body[i]);
1043   free (body);
1044 }
1045
1046 /* For each ssa name defined in LOOP determines whether it is an induction
1047    variable and if so, its initial value and step.  */
1048
1049 static bool
1050 find_induction_variables (struct ivopts_data *data)
1051 {
1052   unsigned i;
1053   bitmap_iterator bi;
1054
1055   if (!find_bivs (data))
1056     return false;
1057
1058   find_givs (data);
1059   mark_bivs (data);
1060
1061   if (dump_file && (dump_flags & TDF_DETAILS))
1062     {
1063       tree niter = niter_for_single_dom_exit (data);
1064
1065       if (niter)
1066         {
1067           fprintf (dump_file, "  number of iterations ");
1068           print_generic_expr (dump_file, niter, TDF_SLIM);
1069           fprintf (dump_file, "\n\n");
1070         };
1071
1072       fprintf (dump_file, "Induction variables:\n\n");
1073
1074       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1075         {
1076           if (ver_info (data, i)->iv)
1077             dump_iv (dump_file, ver_info (data, i)->iv);
1078         }
1079     }
1080
1081   return true;
1082 }
1083
1084 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1085
1086 static struct iv_use *
1087 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1088             tree stmt, enum use_type use_type)
1089 {
1090   struct iv_use *use = XCNEW (struct iv_use);
1091
1092   use->id = n_iv_uses (data);
1093   use->type = use_type;
1094   use->iv = iv;
1095   use->stmt = stmt;
1096   use->op_p = use_p;
1097   use->related_cands = BITMAP_ALLOC (NULL);
1098
1099   /* To avoid showing ssa name in the dumps, if it was not reset by the
1100      caller.  */
1101   iv->ssa_name = NULL_TREE;
1102
1103   if (dump_file && (dump_flags & TDF_DETAILS))
1104     dump_use (dump_file, use);
1105
1106   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1107
1108   return use;
1109 }
1110
1111 /* Checks whether OP is a loop-level invariant and if so, records it.
1112    NONLINEAR_USE is true if the invariant is used in a way we do not
1113    handle specially.  */
1114
1115 static void
1116 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1117 {
1118   basic_block bb;
1119   struct version_info *info;
1120
1121   if (TREE_CODE (op) != SSA_NAME
1122       || !is_gimple_reg (op))
1123     return;
1124
1125   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1126   if (bb
1127       && flow_bb_inside_loop_p (data->current_loop, bb))
1128     return;
1129
1130   info = name_info (data, op);
1131   info->name = op;
1132   info->has_nonlin_use |= nonlinear_use;
1133   if (!info->inv_id)
1134     info->inv_id = ++data->max_inv_id;
1135   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1136 }
1137
1138 /* Checks whether the use OP is interesting and if so, records it.  */
1139
1140 static struct iv_use *
1141 find_interesting_uses_op (struct ivopts_data *data, tree op)
1142 {
1143   struct iv *iv;
1144   struct iv *civ;
1145   tree stmt;
1146   struct iv_use *use;
1147
1148   if (TREE_CODE (op) != SSA_NAME)
1149     return NULL;
1150
1151   iv = get_iv (data, op);
1152   if (!iv)
1153     return NULL;
1154
1155   if (iv->have_use_for)
1156     {
1157       use = iv_use (data, iv->use_id);
1158
1159       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1160       return use;
1161     }
1162
1163   if (zero_p (iv->step))
1164     {
1165       record_invariant (data, op, true);
1166       return NULL;
1167     }
1168   iv->have_use_for = true;
1169
1170   civ = XNEW (struct iv);
1171   *civ = *iv;
1172
1173   stmt = SSA_NAME_DEF_STMT (op);
1174   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1175               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1176
1177   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1178   iv->use_id = use->id;
1179
1180   return use;
1181 }
1182
1183 /* Checks whether the condition *COND_P in STMT is interesting
1184    and if so, records it.  */
1185
1186 static void
1187 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1188 {
1189   tree *op0_p;
1190   tree *op1_p;
1191   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1192   struct iv const_iv;
1193   tree zero = integer_zero_node;
1194
1195   const_iv.step = NULL_TREE;
1196
1197   if (TREE_CODE (*cond_p) != SSA_NAME
1198       && !COMPARISON_CLASS_P (*cond_p))
1199     return;
1200
1201   if (TREE_CODE (*cond_p) == SSA_NAME)
1202     {
1203       op0_p = cond_p;
1204       op1_p = &zero;
1205     }
1206   else
1207     {
1208       op0_p = &TREE_OPERAND (*cond_p, 0);
1209       op1_p = &TREE_OPERAND (*cond_p, 1);
1210     }
1211
1212   if (TREE_CODE (*op0_p) == SSA_NAME)
1213     iv0 = get_iv (data, *op0_p);
1214   else
1215     iv0 = &const_iv;
1216
1217   if (TREE_CODE (*op1_p) == SSA_NAME)
1218     iv1 = get_iv (data, *op1_p);
1219   else
1220     iv1 = &const_iv;
1221
1222   if (/* When comparing with non-invariant value, we may not do any senseful
1223          induction variable elimination.  */
1224       (!iv0 || !iv1)
1225       /* Eliminating condition based on two ivs would be nontrivial.
1226          ??? TODO -- it is not really important to handle this case.  */
1227       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1228     {
1229       find_interesting_uses_op (data, *op0_p);
1230       find_interesting_uses_op (data, *op1_p);
1231       return;
1232     }
1233
1234   if (zero_p (iv0->step) && zero_p (iv1->step))
1235     {
1236       /* If both are invariants, this is a work for unswitching.  */
1237       return;
1238     }
1239
1240   civ = XNEW (struct iv);
1241   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1242   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1243 }
1244
1245 /* Returns true if expression EXPR is obviously invariant in LOOP,
1246    i.e. if all its operands are defined outside of the LOOP.  */
1247
1248 bool
1249 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1250 {
1251   basic_block def_bb;
1252   unsigned i, len;
1253
1254   if (is_gimple_min_invariant (expr))
1255     return true;
1256
1257   if (TREE_CODE (expr) == SSA_NAME)
1258     {
1259       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1260       if (def_bb
1261           && flow_bb_inside_loop_p (loop, def_bb))
1262         return false;
1263
1264       return true;
1265     }
1266
1267   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1268     return false;
1269
1270   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1271   for (i = 0; i < len; i++)
1272     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1273       return false;
1274
1275   return true;
1276 }
1277
1278 /* Cumulates the steps of indices into DATA and replaces their values with the
1279    initial ones.  Returns false when the value of the index cannot be determined.
1280    Callback for for_each_index.  */
1281
1282 struct ifs_ivopts_data
1283 {
1284   struct ivopts_data *ivopts_data;
1285   tree stmt;
1286   tree *step_p;
1287 };
1288
1289 static bool
1290 idx_find_step (tree base, tree *idx, void *data)
1291 {
1292   struct ifs_ivopts_data *dta = data;
1293   struct iv *iv;
1294   tree step, iv_base, iv_step, lbound, off;
1295   struct loop *loop = dta->ivopts_data->current_loop;
1296
1297   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1298       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1299     return false;
1300
1301   /* If base is a component ref, require that the offset of the reference
1302      be invariant.  */
1303   if (TREE_CODE (base) == COMPONENT_REF)
1304     {
1305       off = component_ref_field_offset (base);
1306       return expr_invariant_in_loop_p (loop, off);
1307     }
1308
1309   /* If base is array, first check whether we will be able to move the
1310      reference out of the loop (in order to take its address in strength
1311      reduction).  In order for this to work we need both lower bound
1312      and step to be loop invariants.  */
1313   if (TREE_CODE (base) == ARRAY_REF)
1314     {
1315       step = array_ref_element_size (base);
1316       lbound = array_ref_low_bound (base);
1317
1318       if (!expr_invariant_in_loop_p (loop, step)
1319           || !expr_invariant_in_loop_p (loop, lbound))
1320         return false;
1321     }
1322
1323   if (TREE_CODE (*idx) != SSA_NAME)
1324     return true;
1325
1326   iv = get_iv (dta->ivopts_data, *idx);
1327   if (!iv)
1328     return false;
1329
1330   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1331           *&x[0], which is not folded and does not trigger the
1332           ARRAY_REF path below.  */
1333   *idx = iv->base;
1334
1335   if (!iv->step)
1336     return true;
1337
1338   if (TREE_CODE (base) == ARRAY_REF)
1339     {
1340       step = array_ref_element_size (base);
1341
1342       /* We only handle addresses whose step is an integer constant.  */
1343       if (TREE_CODE (step) != INTEGER_CST)
1344         return false;
1345     }
1346   else
1347     /* The step for pointer arithmetics already is 1 byte.  */
1348     step = build_int_cst (sizetype, 1);
1349
1350   iv_base = iv->base;
1351   iv_step = iv->step;
1352   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1353                             sizetype, &iv_base, &iv_step, dta->stmt,
1354                             false))
1355     {
1356       /* The index might wrap.  */
1357       return false;
1358     }
1359
1360   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1361
1362   if (!*dta->step_p)
1363     *dta->step_p = step;
1364   else
1365     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1366
1367   return true;
1368 }
1369
1370 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1371    object is passed to it in DATA.  */
1372
1373 static bool
1374 idx_record_use (tree base, tree *idx,
1375                 void *data)
1376 {
1377   find_interesting_uses_op (data, *idx);
1378   if (TREE_CODE (base) == ARRAY_REF)
1379     {
1380       find_interesting_uses_op (data, array_ref_element_size (base));
1381       find_interesting_uses_op (data, array_ref_low_bound (base));
1382     }
1383   return true;
1384 }
1385
1386 /* Returns true if memory reference REF may be unaligned.  */
1387
1388 static bool
1389 may_be_unaligned_p (tree ref)
1390 {
1391   tree base;
1392   tree base_type;
1393   HOST_WIDE_INT bitsize;
1394   HOST_WIDE_INT bitpos;
1395   tree toffset;
1396   enum machine_mode mode;
1397   int unsignedp, volatilep;
1398   unsigned base_align;
1399
1400   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1401      thus they are not misaligned.  */
1402   if (TREE_CODE (ref) == TARGET_MEM_REF)
1403     return false;
1404
1405   /* The test below is basically copy of what expr.c:normal_inner_ref
1406      does to check whether the object must be loaded by parts when
1407      STRICT_ALIGNMENT is true.  */
1408   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1409                               &unsignedp, &volatilep, true);
1410   base_type = TREE_TYPE (base);
1411   base_align = TYPE_ALIGN (base_type);
1412
1413   if (mode != BLKmode
1414       && (base_align < GET_MODE_ALIGNMENT (mode)
1415           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1416           || bitpos % BITS_PER_UNIT != 0))
1417     return true;
1418
1419   return false;
1420 }
1421
1422 /* Return true if EXPR may be non-addressable.   */
1423
1424 static bool
1425 may_be_nonaddressable_p (tree expr)
1426 {
1427   switch (TREE_CODE (expr))
1428     {
1429     case COMPONENT_REF:
1430       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1431              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1432
1433     case ARRAY_REF:
1434     case ARRAY_RANGE_REF:
1435       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1436
1437     case VIEW_CONVERT_EXPR:
1438       /* This kind of view-conversions may wrap non-addressable objects
1439          and make them look addressable.  After some processing the
1440          non-addressability may be uncovered again, causing ADDR_EXPRs
1441          of inappropriate objects to be built.  */
1442       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1443              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1444
1445     default:
1446       break;
1447     }
1448
1449   return false;
1450 }
1451
1452 /* Finds addresses in *OP_P inside STMT.  */
1453
1454 static void
1455 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1456 {
1457   tree base = *op_p, step = NULL;
1458   struct iv *civ;
1459   struct ifs_ivopts_data ifs_ivopts_data;
1460
1461   /* Do not play with volatile memory references.  A bit too conservative,
1462      perhaps, but safe.  */
1463   if (stmt_ann (stmt)->has_volatile_ops)
1464     goto fail;
1465
1466   /* Ignore bitfields for now.  Not really something terribly complicated
1467      to handle.  TODO.  */
1468   if (TREE_CODE (base) == BIT_FIELD_REF)
1469     goto fail;
1470
1471   if (may_be_nonaddressable_p (base))
1472     goto fail;
1473
1474   if (STRICT_ALIGNMENT
1475       && may_be_unaligned_p (base))
1476     goto fail;
1477
1478   base = unshare_expr (base);
1479
1480   if (TREE_CODE (base) == TARGET_MEM_REF)
1481     {
1482       tree type = build_pointer_type (TREE_TYPE (base));
1483       tree astep;
1484
1485       if (TMR_BASE (base)
1486           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1487         {
1488           civ = get_iv (data, TMR_BASE (base));
1489           if (!civ)
1490             goto fail;
1491
1492           TMR_BASE (base) = civ->base;
1493           step = civ->step;
1494         }
1495       if (TMR_INDEX (base)
1496           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1497         {
1498           civ = get_iv (data, TMR_INDEX (base));
1499           if (!civ)
1500             goto fail;
1501
1502           TMR_INDEX (base) = civ->base;
1503           astep = civ->step;
1504
1505           if (astep)
1506             {
1507               if (TMR_STEP (base))
1508                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1509
1510               if (step)
1511                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1512               else
1513                 step = astep;
1514             }
1515         }
1516
1517       if (zero_p (step))
1518         goto fail;
1519       base = tree_mem_ref_addr (type, base);
1520     }
1521   else
1522     {
1523       ifs_ivopts_data.ivopts_data = data;
1524       ifs_ivopts_data.stmt = stmt;
1525       ifs_ivopts_data.step_p = &step;
1526       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1527           || zero_p (step))
1528         goto fail;
1529
1530       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1531       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1532
1533       base = build_fold_addr_expr (base);
1534
1535       /* Substituting bases of IVs into the base expression might
1536          have caused folding opportunities.  */
1537       if (TREE_CODE (base) == ADDR_EXPR)
1538         {
1539           tree *ref = &TREE_OPERAND (base, 0);
1540           while (handled_component_p (*ref))
1541             ref = &TREE_OPERAND (*ref, 0);
1542           if (TREE_CODE (*ref) == INDIRECT_REF)
1543             *ref = fold_indirect_ref (*ref);
1544         }
1545     }
1546
1547   civ = alloc_iv (base, step);
1548   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1549   return;
1550
1551 fail:
1552   for_each_index (op_p, idx_record_use, data);
1553 }
1554
1555 /* Finds and records invariants used in STMT.  */
1556
1557 static void
1558 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1559 {
1560   ssa_op_iter iter;
1561   use_operand_p use_p;
1562   tree op;
1563
1564   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1565     {
1566       op = USE_FROM_PTR (use_p);
1567       record_invariant (data, op, false);
1568     }
1569 }
1570
1571 /* Finds interesting uses of induction variables in the statement STMT.  */
1572
1573 static void
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1575 {
1576   struct iv *iv;
1577   tree op, lhs, rhs;
1578   ssa_op_iter iter;
1579   use_operand_p use_p;
1580
1581   find_invariants_stmt (data, stmt);
1582
1583   if (TREE_CODE (stmt) == COND_EXPR)
1584     {
1585       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1586       return;
1587     }
1588
1589   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1590     {
1591       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1592       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1593
1594       if (TREE_CODE (lhs) == SSA_NAME)
1595         {
1596           /* If the statement defines an induction variable, the uses are not
1597              interesting by themselves.  */
1598
1599           iv = get_iv (data, lhs);
1600
1601           if (iv && !zero_p (iv->step))
1602             return;
1603         }
1604
1605       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1606         {
1607         case tcc_comparison:
1608           find_interesting_uses_cond (data, stmt,
1609                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1610           return;
1611
1612         case tcc_reference:
1613           find_interesting_uses_address (data, stmt,
1614                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1615           if (REFERENCE_CLASS_P (lhs))
1616             find_interesting_uses_address (data, stmt,
1617                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1618           return;
1619
1620         default: ;
1621         }
1622
1623       if (REFERENCE_CLASS_P (lhs)
1624           && is_gimple_val (rhs))
1625         {
1626           find_interesting_uses_address (data, stmt,
1627                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1628           find_interesting_uses_op (data, rhs);
1629           return;
1630         }
1631
1632       /* TODO -- we should also handle address uses of type
1633
1634          memory = call (whatever);
1635
1636          and
1637
1638          call (memory).  */
1639     }
1640
1641   if (TREE_CODE (stmt) == PHI_NODE
1642       && bb_for_stmt (stmt) == data->current_loop->header)
1643     {
1644       lhs = PHI_RESULT (stmt);
1645       iv = get_iv (data, lhs);
1646
1647       if (iv && !zero_p (iv->step))
1648         return;
1649     }
1650
1651   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1652     {
1653       op = USE_FROM_PTR (use_p);
1654
1655       if (TREE_CODE (op) != SSA_NAME)
1656         continue;
1657
1658       iv = get_iv (data, op);
1659       if (!iv)
1660         continue;
1661
1662       find_interesting_uses_op (data, op);
1663     }
1664 }
1665
1666 /* Finds interesting uses of induction variables outside of loops
1667    on loop exit edge EXIT.  */
1668
1669 static void
1670 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1671 {
1672   tree phi, def;
1673
1674   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1675     {
1676       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1677       find_interesting_uses_op (data, def);
1678     }
1679 }
1680
1681 /* Finds uses of the induction variables that are interesting.  */
1682
1683 static void
1684 find_interesting_uses (struct ivopts_data *data)
1685 {
1686   basic_block bb;
1687   block_stmt_iterator bsi;
1688   tree phi;
1689   basic_block *body = get_loop_body (data->current_loop);
1690   unsigned i;
1691   struct version_info *info;
1692   edge e;
1693
1694   if (dump_file && (dump_flags & TDF_DETAILS))
1695     fprintf (dump_file, "Uses:\n\n");
1696
1697   for (i = 0; i < data->current_loop->num_nodes; i++)
1698     {
1699       edge_iterator ei;
1700       bb = body[i];
1701
1702       FOR_EACH_EDGE (e, ei, bb->succs)
1703         if (e->dest != EXIT_BLOCK_PTR
1704             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705           find_interesting_uses_outside (data, e);
1706
1707       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708         find_interesting_uses_stmt (data, phi);
1709       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1711     }
1712
1713   if (dump_file && (dump_flags & TDF_DETAILS))
1714     {
1715       bitmap_iterator bi;
1716
1717       fprintf (dump_file, "\n");
1718
1719       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1720         {
1721           info = ver_info (data, i);
1722           if (info->inv_id)
1723             {
1724               fprintf (dump_file, "  ");
1725               print_generic_expr (dump_file, info->name, TDF_SLIM);
1726               fprintf (dump_file, " is invariant (%d)%s\n",
1727                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1728             }
1729         }
1730
1731       fprintf (dump_file, "\n");
1732     }
1733
1734   free (body);
1735 }
1736
1737 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1738    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1739    we are at the top-level of the processed address.  */
1740
1741 static tree
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743                 unsigned HOST_WIDE_INT *offset)
1744 {
1745   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746   enum tree_code code;
1747   tree type, orig_type = TREE_TYPE (expr);
1748   unsigned HOST_WIDE_INT off0, off1, st;
1749   tree orig_expr = expr;
1750
1751   STRIP_NOPS (expr);
1752
1753   type = TREE_TYPE (expr);
1754   code = TREE_CODE (expr);
1755   *offset = 0;
1756
1757   switch (code)
1758     {
1759     case INTEGER_CST:
1760       if (!cst_and_fits_in_hwi (expr)
1761           || zero_p (expr))
1762         return orig_expr;
1763
1764       *offset = int_cst_value (expr);
1765       return build_int_cst (orig_type, 0);
1766
1767     case PLUS_EXPR:
1768     case MINUS_EXPR:
1769       op0 = TREE_OPERAND (expr, 0);
1770       op1 = TREE_OPERAND (expr, 1);
1771
1772       op0 = strip_offset_1 (op0, false, false, &off0);
1773       op1 = strip_offset_1 (op1, false, false, &off1);
1774
1775       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776       if (op0 == TREE_OPERAND (expr, 0)
1777           && op1 == TREE_OPERAND (expr, 1))
1778         return orig_expr;
1779
1780       if (zero_p (op1))
1781         expr = op0;
1782       else if (zero_p (op0))
1783         {
1784           if (code == PLUS_EXPR)
1785             expr = op1;
1786           else
1787             expr = fold_build1 (NEGATE_EXPR, type, op1);
1788         }
1789       else
1790         expr = fold_build2 (code, type, op0, op1);
1791
1792       return fold_convert (orig_type, expr);
1793
1794     case ARRAY_REF:
1795       if (!inside_addr)
1796         return orig_expr;
1797
1798       step = array_ref_element_size (expr);
1799       if (!cst_and_fits_in_hwi (step))
1800         break;
1801
1802       st = int_cst_value (step);
1803       op1 = TREE_OPERAND (expr, 1);
1804       op1 = strip_offset_1 (op1, false, false, &off1);
1805       *offset = off1 * st;
1806
1807       if (top_compref
1808           && zero_p (op1))
1809         {
1810           /* Strip the component reference completely.  */
1811           op0 = TREE_OPERAND (expr, 0);
1812           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1813           *offset += off0;
1814           return op0;
1815         }
1816       break;
1817
1818     case COMPONENT_REF:
1819       if (!inside_addr)
1820         return orig_expr;
1821
1822       tmp = component_ref_field_offset (expr);
1823       if (top_compref
1824           && cst_and_fits_in_hwi (tmp))
1825         {
1826           /* Strip the component reference completely.  */
1827           op0 = TREE_OPERAND (expr, 0);
1828           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829           *offset = off0 + int_cst_value (tmp);
1830           return op0;
1831         }
1832       break;
1833
1834     case ADDR_EXPR:
1835       op0 = TREE_OPERAND (expr, 0);
1836       op0 = strip_offset_1 (op0, true, true, &off0);
1837       *offset += off0;
1838
1839       if (op0 == TREE_OPERAND (expr, 0))
1840         return orig_expr;
1841
1842       expr = build_fold_addr_expr (op0);
1843       return fold_convert (orig_type, expr);
1844
1845     case INDIRECT_REF:
1846       inside_addr = false;
1847       break;
1848
1849     default:
1850       return orig_expr;
1851     }
1852
1853   /* Default handling of expressions for that we want to recurse into
1854      the first operand.  */
1855   op0 = TREE_OPERAND (expr, 0);
1856   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1857   *offset += off0;
1858
1859   if (op0 == TREE_OPERAND (expr, 0)
1860       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1861     return orig_expr;
1862
1863   expr = copy_node (expr);
1864   TREE_OPERAND (expr, 0) = op0;
1865   if (op1)
1866     TREE_OPERAND (expr, 1) = op1;
1867
1868   /* Inside address, we might strip the top level component references,
1869      thus changing type of the expression.  Handling of ADDR_EXPR
1870      will fix that.  */
1871   expr = fold_convert (orig_type, expr);
1872
1873   return expr;
1874 }
1875
1876 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1877
1878 static tree
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1880 {
1881   return strip_offset_1 (expr, false, false, offset);
1882 }
1883
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885    We return unsigned type with the same precision, which avoids problems
1886    with overflows.  */
1887
1888 static tree
1889 generic_type_for (tree type)
1890 {
1891   if (POINTER_TYPE_P (type))
1892     return unsigned_type_for (type);
1893
1894   if (TYPE_UNSIGNED (type))
1895     return type;
1896
1897   return unsigned_type_for (type);
1898 }
1899
1900 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1901    the bitmap to that we should store it.  */
1902
1903 static struct ivopts_data *fd_ivopts_data;
1904 static tree
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1906 {
1907   bitmap *depends_on = data;
1908   struct version_info *info;
1909
1910   if (TREE_CODE (*expr_p) != SSA_NAME)
1911     return NULL_TREE;
1912   info = name_info (fd_ivopts_data, *expr_p);
1913
1914   if (!info->inv_id || info->has_nonlin_use)
1915     return NULL_TREE;
1916
1917   if (!*depends_on)
1918     *depends_on = BITMAP_ALLOC (NULL);
1919   bitmap_set_bit (*depends_on, info->inv_id);
1920
1921   return NULL_TREE;
1922 }
1923
1924 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1925    position to POS.  If USE is not NULL, the candidate is set as related to
1926    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1927    replacement of the final value of the iv by a direct computation.  */
1928
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931                  tree base, tree step, bool important, enum iv_position pos,
1932                  struct iv_use *use, tree incremented_at)
1933 {
1934   unsigned i;
1935   struct iv_cand *cand = NULL;
1936   tree type, orig_type;
1937
1938   if (base)
1939     {
1940       orig_type = TREE_TYPE (base);
1941       type = generic_type_for (orig_type);
1942       if (type != orig_type)
1943         {
1944           base = fold_convert (type, base);
1945           if (step)
1946             step = fold_convert (type, step);
1947         }
1948     }
1949
1950   for (i = 0; i < n_iv_cands (data); i++)
1951     {
1952       cand = iv_cand (data, i);
1953
1954       if (cand->pos != pos)
1955         continue;
1956
1957       if (cand->incremented_at != incremented_at)
1958         continue;
1959
1960       if (!cand->iv)
1961         {
1962           if (!base && !step)
1963             break;
1964
1965           continue;
1966         }
1967
1968       if (!base && !step)
1969         continue;
1970
1971       if (!operand_equal_p (base, cand->iv->base, 0))
1972         continue;
1973
1974       if (zero_p (cand->iv->step))
1975         {
1976           if (zero_p (step))
1977             break;
1978         }
1979       else
1980         {
1981           if (step && operand_equal_p (step, cand->iv->step, 0))
1982             break;
1983         }
1984     }
1985
1986   if (i == n_iv_cands (data))
1987     {
1988       cand = XCNEW (struct iv_cand);
1989       cand->id = i;
1990
1991       if (!base && !step)
1992         cand->iv = NULL;
1993       else
1994         cand->iv = alloc_iv (base, step);
1995
1996       cand->pos = pos;
1997       if (pos != IP_ORIGINAL && cand->iv)
1998         {
1999           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2000           cand->var_after = cand->var_before;
2001         }
2002       cand->important = important;
2003       cand->incremented_at = incremented_at;
2004       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2005
2006       if (step
2007           && TREE_CODE (step) != INTEGER_CST)
2008         {
2009           fd_ivopts_data = data;
2010           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2011         }
2012
2013       if (dump_file && (dump_flags & TDF_DETAILS))
2014         dump_cand (dump_file, cand);
2015     }
2016
2017   if (important && !cand->important)
2018     {
2019       cand->important = true;
2020       if (dump_file && (dump_flags & TDF_DETAILS))
2021         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2022     }
2023
2024   if (use)
2025     {
2026       bitmap_set_bit (use->related_cands, i);
2027       if (dump_file && (dump_flags & TDF_DETAILS))
2028         fprintf (dump_file, "Candidate %d is related to use %d\n",
2029                  cand->id, use->id);
2030     }
2031
2032   return cand;
2033 }
2034
2035 /* Returns true if incrementing the induction variable at the end of the LOOP
2036    is allowed.
2037
2038    The purpose is to avoid splitting latch edge with a biv increment, thus
2039    creating a jump, possibly confusing other optimization passes and leaving
2040    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2041    is not available (so we do not have a better alternative), or if the latch
2042    edge is already nonempty.  */
2043
2044 static bool
2045 allow_ip_end_pos_p (struct loop *loop)
2046 {
2047   if (!ip_normal_pos (loop))
2048     return true;
2049
2050   if (!empty_block_p (ip_end_pos (loop)))
2051     return true;
2052
2053   return false;
2054 }
2055
2056 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2057    position to POS.  If USE is not NULL, the candidate is set as related to
2058    it.  The candidate computation is scheduled on all available positions.  */
2059
2060 static void
2061 add_candidate (struct ivopts_data *data,
2062                tree base, tree step, bool important, struct iv_use *use)
2063 {
2064   if (ip_normal_pos (data->current_loop))
2065     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2066   if (ip_end_pos (data->current_loop)
2067       && allow_ip_end_pos_p (data->current_loop))
2068     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2069 }
2070
2071 /* Add a standard "0 + 1 * iteration" iv candidate for a
2072    type with SIZE bits.  */
2073
2074 static void
2075 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2076                                      unsigned int size)
2077 {
2078   tree type = lang_hooks.types.type_for_size (size, true);
2079   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2080                  true, NULL);
2081 }
2082
2083 /* Adds standard iv candidates.  */
2084
2085 static void
2086 add_standard_iv_candidates (struct ivopts_data *data)
2087 {
2088   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2089
2090   /* The same for a double-integer type if it is still fast enough.  */
2091   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2092     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2093 }
2094
2095
2096 /* Adds candidates bases on the old induction variable IV.  */
2097
2098 static void
2099 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2100 {
2101   tree phi, def;
2102   struct iv_cand *cand;
2103
2104   add_candidate (data, iv->base, iv->step, true, NULL);
2105
2106   /* The same, but with initial value zero.  */
2107   add_candidate (data,
2108                  build_int_cst (TREE_TYPE (iv->base), 0),
2109                  iv->step, true, NULL);
2110
2111   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2112   if (TREE_CODE (phi) == PHI_NODE)
2113     {
2114       /* Additionally record the possibility of leaving the original iv
2115          untouched.  */
2116       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2117       cand = add_candidate_1 (data,
2118                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2119                               SSA_NAME_DEF_STMT (def));
2120       cand->var_before = iv->ssa_name;
2121       cand->var_after = def;
2122     }
2123 }
2124
2125 /* Adds candidates based on the old induction variables.  */
2126
2127 static void
2128 add_old_ivs_candidates (struct ivopts_data *data)
2129 {
2130   unsigned i;
2131   struct iv *iv;
2132   bitmap_iterator bi;
2133
2134   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2135     {
2136       iv = ver_info (data, i)->iv;
2137       if (iv && iv->biv_p && !zero_p (iv->step))
2138         add_old_iv_candidates (data, iv);
2139     }
2140 }
2141
2142 /* Adds candidates based on the value of the induction variable IV and USE.  */
2143
2144 static void
2145 add_iv_value_candidates (struct ivopts_data *data,
2146                          struct iv *iv, struct iv_use *use)
2147 {
2148   unsigned HOST_WIDE_INT offset;
2149   tree base;
2150
2151   add_candidate (data, iv->base, iv->step, false, use);
2152
2153   /* The same, but with initial value zero.  Make such variable important,
2154      since it is generic enough so that possibly many uses may be based
2155      on it.  */
2156   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2157                  iv->step, true, use);
2158
2159   /* Third, try removing the constant offset.  */
2160   base = strip_offset (iv->base, &offset);
2161   if (offset)
2162     add_candidate (data, base, iv->step, false, use);
2163 }
2164
2165 /* Adds candidates based on the uses.  */
2166
2167 static void
2168 add_derived_ivs_candidates (struct ivopts_data *data)
2169 {
2170   unsigned i;
2171
2172   for (i = 0; i < n_iv_uses (data); i++)
2173     {
2174       struct iv_use *use = iv_use (data, i);
2175
2176       if (!use)
2177         continue;
2178
2179       switch (use->type)
2180         {
2181         case USE_NONLINEAR_EXPR:
2182         case USE_COMPARE:
2183         case USE_ADDRESS:
2184           /* Just add the ivs based on the value of the iv used here.  */
2185           add_iv_value_candidates (data, use->iv, use);
2186           break;
2187
2188         default:
2189           gcc_unreachable ();
2190         }
2191     }
2192 }
2193
2194 /* Record important candidates and add them to related_cands bitmaps
2195    if needed.  */
2196
2197 static void
2198 record_important_candidates (struct ivopts_data *data)
2199 {
2200   unsigned i;
2201   struct iv_use *use;
2202
2203   for (i = 0; i < n_iv_cands (data); i++)
2204     {
2205       struct iv_cand *cand = iv_cand (data, i);
2206
2207       if (cand->important)
2208         bitmap_set_bit (data->important_candidates, i);
2209     }
2210
2211   data->consider_all_candidates = (n_iv_cands (data)
2212                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2213
2214   if (data->consider_all_candidates)
2215     {
2216       /* We will not need "related_cands" bitmaps in this case,
2217          so release them to decrease peak memory consumption.  */
2218       for (i = 0; i < n_iv_uses (data); i++)
2219         {
2220           use = iv_use (data, i);
2221           BITMAP_FREE (use->related_cands);
2222         }
2223     }
2224   else
2225     {
2226       /* Add important candidates to the related_cands bitmaps.  */
2227       for (i = 0; i < n_iv_uses (data); i++)
2228         bitmap_ior_into (iv_use (data, i)->related_cands,
2229                          data->important_candidates);
2230     }
2231 }
2232
2233 /* Finds the candidates for the induction variables.  */
2234
2235 static void
2236 find_iv_candidates (struct ivopts_data *data)
2237 {
2238   /* Add commonly used ivs.  */
2239   add_standard_iv_candidates (data);
2240
2241   /* Add old induction variables.  */
2242   add_old_ivs_candidates (data);
2243
2244   /* Add induction variables derived from uses.  */
2245   add_derived_ivs_candidates (data);
2246
2247   /* Record the important candidates.  */
2248   record_important_candidates (data);
2249 }
2250
2251 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2252    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2253    we allocate a simple list to every use.  */
2254
2255 static void
2256 alloc_use_cost_map (struct ivopts_data *data)
2257 {
2258   unsigned i, size, s, j;
2259
2260   for (i = 0; i < n_iv_uses (data); i++)
2261     {
2262       struct iv_use *use = iv_use (data, i);
2263       bitmap_iterator bi;
2264
2265       if (data->consider_all_candidates)
2266         size = n_iv_cands (data);
2267       else
2268         {
2269           s = 0;
2270           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2271             {
2272               s++;
2273             }
2274
2275           /* Round up to the power of two, so that moduling by it is fast.  */
2276           for (size = 1; size < s; size <<= 1)
2277             continue;
2278         }
2279
2280       use->n_map_members = size;
2281       use->cost_map = XCNEWVEC (struct cost_pair, size);
2282     }
2283 }
2284
2285 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2286    on invariants DEPENDS_ON and that the value used in expressing it
2287    is VALUE.*/
2288
2289 static void
2290 set_use_iv_cost (struct ivopts_data *data,
2291                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2292                  bitmap depends_on, tree value)
2293 {
2294   unsigned i, s;
2295
2296   if (cost == INFTY)
2297     {
2298       BITMAP_FREE (depends_on);
2299       return;
2300     }
2301
2302   if (data->consider_all_candidates)
2303     {
2304       use->cost_map[cand->id].cand = cand;
2305       use->cost_map[cand->id].cost = cost;
2306       use->cost_map[cand->id].depends_on = depends_on;
2307       use->cost_map[cand->id].value = value;
2308       return;
2309     }
2310
2311   /* n_map_members is a power of two, so this computes modulo.  */
2312   s = cand->id & (use->n_map_members - 1);
2313   for (i = s; i < use->n_map_members; i++)
2314     if (!use->cost_map[i].cand)
2315       goto found;
2316   for (i = 0; i < s; i++)
2317     if (!use->cost_map[i].cand)
2318       goto found;
2319
2320   gcc_unreachable ();
2321
2322 found:
2323   use->cost_map[i].cand = cand;
2324   use->cost_map[i].cost = cost;
2325   use->cost_map[i].depends_on = depends_on;
2326   use->cost_map[i].value = value;
2327 }
2328
2329 /* Gets cost of (USE, CANDIDATE) pair.  */
2330
2331 static struct cost_pair *
2332 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2333                  struct iv_cand *cand)
2334 {
2335   unsigned i, s;
2336   struct cost_pair *ret;
2337
2338   if (!cand)
2339     return NULL;
2340
2341   if (data->consider_all_candidates)
2342     {
2343       ret = use->cost_map + cand->id;
2344       if (!ret->cand)
2345         return NULL;
2346
2347       return ret;
2348     }
2349
2350   /* n_map_members is a power of two, so this computes modulo.  */
2351   s = cand->id & (use->n_map_members - 1);
2352   for (i = s; i < use->n_map_members; i++)
2353     if (use->cost_map[i].cand == cand)
2354       return use->cost_map + i;
2355
2356   for (i = 0; i < s; i++)
2357     if (use->cost_map[i].cand == cand)
2358       return use->cost_map + i;
2359
2360   return NULL;
2361 }
2362
2363 /* Returns estimate on cost of computing SEQ.  */
2364
2365 static unsigned
2366 seq_cost (rtx seq)
2367 {
2368   unsigned cost = 0;
2369   rtx set;
2370
2371   for (; seq; seq = NEXT_INSN (seq))
2372     {
2373       set = single_set (seq);
2374       if (set)
2375         cost += rtx_cost (set, SET);
2376       else
2377         cost++;
2378     }
2379
2380   return cost;
2381 }
2382
2383 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2384 static rtx
2385 produce_memory_decl_rtl (tree obj, int *regno)
2386 {
2387   rtx x;
2388
2389   gcc_assert (obj);
2390   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2391     {
2392       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2393       x = gen_rtx_SYMBOL_REF (Pmode, name);
2394     }
2395   else
2396     x = gen_raw_REG (Pmode, (*regno)++);
2397
2398   return gen_rtx_MEM (DECL_MODE (obj), x);
2399 }
2400
2401 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2402    walk_tree.  DATA contains the actual fake register number.  */
2403
2404 static tree
2405 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2406 {
2407   tree obj = NULL_TREE;
2408   rtx x = NULL_RTX;
2409   int *regno = data;
2410
2411   switch (TREE_CODE (*expr_p))
2412     {
2413     case ADDR_EXPR:
2414       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2415            handled_component_p (*expr_p);
2416            expr_p = &TREE_OPERAND (*expr_p, 0))
2417         continue;
2418       obj = *expr_p;
2419       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2420         x = produce_memory_decl_rtl (obj, regno);
2421       break;
2422
2423     case SSA_NAME:
2424       *ws = 0;
2425       obj = SSA_NAME_VAR (*expr_p);
2426       if (!DECL_RTL_SET_P (obj))
2427         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2428       break;
2429
2430     case VAR_DECL:
2431     case PARM_DECL:
2432     case RESULT_DECL:
2433       *ws = 0;
2434       obj = *expr_p;
2435
2436       if (DECL_RTL_SET_P (obj))
2437         break;
2438
2439       if (DECL_MODE (obj) == BLKmode)
2440         x = produce_memory_decl_rtl (obj, regno);
2441       else
2442         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2443
2444       break;
2445
2446     default:
2447       break;
2448     }
2449
2450   if (x)
2451     {
2452       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2453       SET_DECL_RTL (obj, x);
2454     }
2455
2456   return NULL_TREE;
2457 }
2458
2459 /* Determines cost of the computation of EXPR.  */
2460
2461 static unsigned
2462 computation_cost (tree expr)
2463 {
2464   rtx seq, rslt;
2465   tree type = TREE_TYPE (expr);
2466   unsigned cost;
2467   /* Avoid using hard regs in ways which may be unsupported.  */
2468   int regno = LAST_VIRTUAL_REGISTER + 1;
2469
2470   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2471   start_sequence ();
2472   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2473   seq = get_insns ();
2474   end_sequence ();
2475
2476   cost = seq_cost (seq);
2477   if (MEM_P (rslt))
2478     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2479
2480   return cost;
2481 }
2482
2483 /* Returns variable containing the value of candidate CAND at statement AT.  */
2484
2485 static tree
2486 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2487 {
2488   if (stmt_after_increment (loop, cand, stmt))
2489     return cand->var_after;
2490   else
2491     return cand->var_before;
2492 }
2493
2494 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2495    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2496
2497 int
2498 tree_int_cst_sign_bit (tree t)
2499 {
2500   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2501   unsigned HOST_WIDE_INT w;
2502
2503   if (bitno < HOST_BITS_PER_WIDE_INT)
2504     w = TREE_INT_CST_LOW (t);
2505   else
2506     {
2507       w = TREE_INT_CST_HIGH (t);
2508       bitno -= HOST_BITS_PER_WIDE_INT;
2509     }
2510
2511   return (w >> bitno) & 1;
2512 }
2513
2514 /* If we can prove that TOP = cst * BOT for some constant cst,
2515    store cst to MUL and return true.  Otherwise return false.
2516    The returned value is always sign-extended, regardless of the
2517    signedness of TOP and BOT.  */
2518
2519 static bool
2520 constant_multiple_of (tree top, tree bot, double_int *mul)
2521 {
2522   tree mby;
2523   enum tree_code code;
2524   double_int res, p0, p1;
2525   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2526
2527   STRIP_NOPS (top);
2528   STRIP_NOPS (bot);
2529
2530   if (operand_equal_p (top, bot, 0))
2531     {
2532       *mul = double_int_one;
2533       return true;
2534     }
2535
2536   code = TREE_CODE (top);
2537   switch (code)
2538     {
2539     case MULT_EXPR:
2540       mby = TREE_OPERAND (top, 1);
2541       if (TREE_CODE (mby) != INTEGER_CST)
2542         return false;
2543
2544       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2545         return false;
2546
2547       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2548                               precision);
2549       return true;
2550
2551     case PLUS_EXPR:
2552     case MINUS_EXPR:
2553       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2554           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2555         return false;
2556
2557       if (code == MINUS_EXPR)
2558         p1 = double_int_neg (p1);
2559       *mul = double_int_sext (double_int_add (p0, p1), precision);
2560       return true;
2561
2562     case INTEGER_CST:
2563       if (TREE_CODE (bot) != INTEGER_CST)
2564         return false;
2565
2566       p0 = double_int_sext (tree_to_double_int (top), precision);
2567       p1 = double_int_sext (tree_to_double_int (bot), precision);
2568       if (double_int_zero_p (p1))
2569         return false;
2570       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2571                               precision);
2572       return double_int_zero_p (res);
2573
2574     default:
2575       return false;
2576     }
2577 }
2578
2579 /* Folds EXPR using the affine expressions framework.  */
2580
2581 static tree
2582 fold_affine_expr (tree expr)
2583 {
2584   tree type = TREE_TYPE (expr);
2585   struct affine_tree_combination comb;
2586
2587   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2588     return expr;
2589
2590   tree_to_aff_combination (expr, type, &comb);
2591   return aff_combination_to_tree (&comb);
2592 }
2593
2594 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2595    same precision that is at least as wide as the precision of TYPE, stores
2596    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2597    type of A and B.  */
2598
2599 static tree
2600 determine_common_wider_type (tree *a, tree *b)
2601 {
2602   tree wider_type = NULL;
2603   tree suba, subb;
2604   tree atype = TREE_TYPE (*a);
2605
2606   if ((TREE_CODE (*a) == NOP_EXPR
2607        || TREE_CODE (*a) == CONVERT_EXPR))
2608     {
2609       suba = TREE_OPERAND (*a, 0);
2610       wider_type = TREE_TYPE (suba);
2611       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2612         return atype;
2613     }
2614   else
2615     return atype;
2616
2617   if ((TREE_CODE (*b) == NOP_EXPR
2618        || TREE_CODE (*b) == CONVERT_EXPR))
2619     {
2620       subb = TREE_OPERAND (*b, 0);
2621       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2622         return atype;
2623     }
2624   else
2625     return atype;
2626
2627   *a = suba;
2628   *b = subb;
2629   return wider_type;
2630 }
2631
2632 /* Determines the expression by that USE is expressed from induction variable
2633    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2634    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2635
2636 static bool
2637 get_computation_aff (struct loop *loop,
2638                      struct iv_use *use, struct iv_cand *cand, tree at,
2639                      struct affine_tree_combination *aff)
2640 {
2641   tree ubase = use->iv->base;
2642   tree ustep = use->iv->step;
2643   tree cbase = cand->iv->base;
2644   tree cstep = cand->iv->step, cstep_common;
2645   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2646   tree common_type, var;
2647   tree uutype;
2648   aff_tree cbase_aff, var_aff;
2649   double_int rat;
2650
2651   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2652     {
2653       /* We do not have a precision to express the values of use.  */
2654       return false;
2655     }
2656
2657   var = var_at_stmt (loop, cand, at);
2658   uutype = unsigned_type_for (utype);
2659
2660   /* If the conversion is not noop, perform it.  */
2661   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2662     {
2663       cstep = fold_convert (uutype, cstep);
2664       cbase = fold_convert (uutype, cbase);
2665       var = fold_convert (uutype, var);
2666     }
2667
2668   if (!constant_multiple_of (ustep, cstep, &rat))
2669     return false;
2670
2671   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2672      type, we achieve better folding by computing their difference in this
2673      wider type, and cast the result to UUTYPE.  We do not need to worry about
2674      overflows, as all the arithmetics will in the end be performed in UUTYPE
2675      anyway.  */
2676   common_type = determine_common_wider_type (&ubase, &cbase);
2677
2678   /* use = ubase - ratio * cbase + ratio * var.  */
2679   tree_to_aff_combination (ubase, common_type, aff);
2680   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2681   tree_to_aff_combination (var, uutype, &var_aff);
2682
2683   /* We need to shift the value if we are after the increment.  */
2684   if (stmt_after_increment (loop, cand, at))
2685     {
2686       aff_tree cstep_aff;
2687
2688       if (common_type != uutype)
2689         cstep_common = fold_convert (common_type, cstep);
2690       else
2691         cstep_common = cstep;
2692
2693       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2694       aff_combination_add (&cbase_aff, &cstep_aff);
2695     }
2696
2697   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2698   aff_combination_add (aff, &cbase_aff);
2699   if (common_type != uutype)
2700     aff_combination_convert (aff, uutype);
2701
2702   aff_combination_scale (&var_aff, rat);
2703   aff_combination_add (aff, &var_aff);
2704
2705   return true;
2706 }
2707
2708 /* Determines the expression by that USE is expressed from induction variable
2709    CAND at statement AT in LOOP.  The computation is unshared.  */
2710
2711 static tree
2712 get_computation_at (struct loop *loop,
2713                     struct iv_use *use, struct iv_cand *cand, tree at)
2714 {
2715   aff_tree aff;
2716   tree type = TREE_TYPE (use->iv->base);
2717
2718   if (!get_computation_aff (loop, use, cand, at, &aff))
2719     return NULL_TREE;
2720   unshare_aff_combination (&aff);
2721   return fold_convert (type, aff_combination_to_tree (&aff));
2722 }
2723
2724 /* Determines the expression by that USE is expressed from induction variable
2725    CAND in LOOP.  The computation is unshared.  */
2726
2727 static tree
2728 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2729 {
2730   return get_computation_at (loop, use, cand, use->stmt);
2731 }
2732
2733 /* Returns cost of addition in MODE.  */
2734
2735 static unsigned
2736 add_cost (enum machine_mode mode)
2737 {
2738   static unsigned costs[NUM_MACHINE_MODES];
2739   rtx seq;
2740   unsigned cost;
2741
2742   if (costs[mode])
2743     return costs[mode];
2744
2745   start_sequence ();
2746   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2747                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2748                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2749                  NULL_RTX);
2750   seq = get_insns ();
2751   end_sequence ();
2752
2753   cost = seq_cost (seq);
2754   if (!cost)
2755     cost = 1;
2756
2757   costs[mode] = cost;
2758
2759   if (dump_file && (dump_flags & TDF_DETAILS))
2760     fprintf (dump_file, "Addition in %s costs %d\n",
2761              GET_MODE_NAME (mode), cost);
2762   return cost;
2763 }
2764
2765 /* Entry in a hashtable of already known costs for multiplication.  */
2766 struct mbc_entry
2767 {
2768   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2769   enum machine_mode mode;       /* In mode.  */
2770   unsigned cost;                /* The cost.  */
2771 };
2772
2773 /* Counts hash value for the ENTRY.  */
2774
2775 static hashval_t
2776 mbc_entry_hash (const void *entry)
2777 {
2778   const struct mbc_entry *e = entry;
2779
2780   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2781 }
2782
2783 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2784
2785 static int
2786 mbc_entry_eq (const void *entry1, const void *entry2)
2787 {
2788   const struct mbc_entry *e1 = entry1;
2789   const struct mbc_entry *e2 = entry2;
2790
2791   return (e1->mode == e2->mode
2792           && e1->cst == e2->cst);
2793 }
2794
2795 /* Returns cost of multiplication by constant CST in MODE.  */
2796
2797 unsigned
2798 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2799 {
2800   static htab_t costs;
2801   struct mbc_entry **cached, act;
2802   rtx seq;
2803   unsigned cost;
2804
2805   if (!costs)
2806     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2807
2808   act.mode = mode;
2809   act.cst = cst;
2810   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2811   if (*cached)
2812     return (*cached)->cost;
2813
2814   *cached = XNEW (struct mbc_entry);
2815   (*cached)->mode = mode;
2816   (*cached)->cst = cst;
2817
2818   start_sequence ();
2819   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2820                gen_int_mode (cst, mode), NULL_RTX, 0);
2821   seq = get_insns ();
2822   end_sequence ();
2823
2824   cost = seq_cost (seq);
2825
2826   if (dump_file && (dump_flags & TDF_DETAILS))
2827     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2828              (int) cst, GET_MODE_NAME (mode), cost);
2829
2830   (*cached)->cost = cost;
2831
2832   return cost;
2833 }
2834
2835 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2836    validity for a memory reference accessing memory of mode MODE.  */
2837
2838 bool
2839 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2840 {
2841 #define MAX_RATIO 128
2842   static sbitmap valid_mult[MAX_MACHINE_MODE];
2843
2844   if (!valid_mult[mode])
2845     {
2846       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2847       rtx addr;
2848       HOST_WIDE_INT i;
2849
2850       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2851       sbitmap_zero (valid_mult[mode]);
2852       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2853       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2854         {
2855           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2856           if (memory_address_p (mode, addr))
2857             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2858         }
2859
2860       if (dump_file && (dump_flags & TDF_DETAILS))
2861         {
2862           fprintf (dump_file, "  allowed multipliers:");
2863           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2864             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2865               fprintf (dump_file, " %d", (int) i);
2866           fprintf (dump_file, "\n");
2867           fprintf (dump_file, "\n");
2868         }
2869     }
2870
2871   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2872     return false;
2873
2874   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2875 }
2876
2877 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2878    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2879    variable is omitted.  Compute the cost for a memory reference that accesses
2880    a memory location of mode MEM_MODE.
2881
2882    TODO -- there must be some better way.  This all is quite crude.  */
2883
2884 static unsigned
2885 get_address_cost (bool symbol_present, bool var_present,
2886                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2887                   enum machine_mode mem_mode)
2888 {
2889   static bool initialized[MAX_MACHINE_MODE];
2890   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2891   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2892   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2893   unsigned cost, acost;
2894   bool offset_p, ratio_p;
2895   HOST_WIDE_INT s_offset;
2896   unsigned HOST_WIDE_INT mask;
2897   unsigned bits;
2898
2899   if (!initialized[mem_mode])
2900     {
2901       HOST_WIDE_INT i;
2902       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2903       int old_cse_not_expected;
2904       unsigned sym_p, var_p, off_p, rat_p, add_c;
2905       rtx seq, addr, base;
2906       rtx reg0, reg1;
2907
2908       initialized[mem_mode] = true;
2909
2910       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2911
2912       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2913       for (i = start; i <= 1 << 20; i <<= 1)
2914         {
2915           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2916           if (!memory_address_p (mem_mode, addr))
2917             break;
2918         }
2919       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2920       off[mem_mode] = max_offset[mem_mode];
2921
2922       for (i = start; i <= 1 << 20; i <<= 1)
2923         {
2924           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2925           if (!memory_address_p (mem_mode, addr))
2926             break;
2927         }
2928       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2929
2930       if (dump_file && (dump_flags & TDF_DETAILS))
2931         {
2932           fprintf (dump_file, "get_address_cost:\n");
2933           fprintf (dump_file, "  min offset %s %d\n",
2934                    GET_MODE_NAME (mem_mode),
2935                    (int) min_offset[mem_mode]);
2936           fprintf (dump_file, "  max offset %s %d\n",
2937                    GET_MODE_NAME (mem_mode),
2938                    (int) max_offset[mem_mode]);
2939         }
2940
2941       rat[mem_mode] = 1;
2942       for (i = 2; i <= MAX_RATIO; i++)
2943         if (multiplier_allowed_in_address_p (i, mem_mode))
2944           {
2945             rat[mem_mode] = i;
2946             break;
2947           }
2948
2949       /* Compute the cost of various addressing modes.  */
2950       acost = 0;
2951       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2952       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2953
2954       for (i = 0; i < 16; i++)
2955         {
2956           sym_p = i & 1;
2957           var_p = (i >> 1) & 1;
2958           off_p = (i >> 2) & 1;
2959           rat_p = (i >> 3) & 1;
2960
2961           addr = reg0;
2962           if (rat_p)
2963             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2964                                    gen_int_mode (rat[mem_mode], Pmode));
2965
2966           if (var_p)
2967             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2968
2969           if (sym_p)
2970             {
2971               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2972               if (off_p)
2973                 base = gen_rtx_fmt_e (CONST, Pmode,
2974                                       gen_rtx_fmt_ee (PLUS, Pmode,
2975                                                       base,
2976                                                       gen_int_mode (off[mem_mode],
2977                                                                     Pmode)));
2978             }
2979           else if (off_p)
2980             base = gen_int_mode (off[mem_mode], Pmode);
2981           else
2982             base = NULL_RTX;
2983
2984           if (base)
2985             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2986
2987           start_sequence ();
2988           /* To avoid splitting addressing modes, pretend that no cse will
2989              follow.  */
2990           old_cse_not_expected = cse_not_expected;
2991           cse_not_expected = true;
2992           addr = memory_address (mem_mode, addr);
2993           cse_not_expected = old_cse_not_expected;
2994           seq = get_insns ();
2995           end_sequence ();
2996
2997           acost = seq_cost (seq);
2998           acost += address_cost (addr, mem_mode);
2999
3000           if (!acost)
3001             acost = 1;
3002           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3003         }
3004
3005       /* On some targets, it is quite expensive to load symbol to a register,
3006          which makes addresses that contain symbols look much more expensive.
3007          However, the symbol will have to be loaded in any case before the
3008          loop (and quite likely we have it in register already), so it does not
3009          make much sense to penalize them too heavily.  So make some final
3010          tweaks for the SYMBOL_PRESENT modes:
3011
3012          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3013          var is cheaper, use this mode with small penalty.
3014          If VAR_PRESENT is true, try whether the mode with
3015          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3016          if this is the case, use it.  */
3017       add_c = add_cost (Pmode);
3018       for (i = 0; i < 8; i++)
3019         {
3020           var_p = i & 1;
3021           off_p = (i >> 1) & 1;
3022           rat_p = (i >> 2) & 1;
3023
3024           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3025           if (var_p)
3026             acost += add_c;
3027
3028           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3029             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3030         }
3031
3032       if (dump_file && (dump_flags & TDF_DETAILS))
3033         {
3034           fprintf (dump_file, "Address costs:\n");
3035
3036           for (i = 0; i < 16; i++)
3037             {
3038               sym_p = i & 1;
3039               var_p = (i >> 1) & 1;
3040               off_p = (i >> 2) & 1;
3041               rat_p = (i >> 3) & 1;
3042
3043               fprintf (dump_file, "  ");
3044               if (sym_p)
3045                 fprintf (dump_file, "sym + ");
3046               if (var_p)
3047                 fprintf (dump_file, "var + ");
3048               if (off_p)
3049                 fprintf (dump_file, "cst + ");
3050               if (rat_p)
3051                 fprintf (dump_file, "rat * ");
3052
3053               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3054               fprintf (dump_file, "index costs %d\n", acost);
3055             }
3056           fprintf (dump_file, "\n");
3057         }
3058     }
3059
3060   bits = GET_MODE_BITSIZE (Pmode);
3061   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3062   offset &= mask;
3063   if ((offset >> (bits - 1) & 1))
3064     offset |= ~mask;
3065   s_offset = offset;
3066
3067   cost = 0;
3068   offset_p = (s_offset != 0
3069               && min_offset[mem_mode] <= s_offset
3070               && s_offset <= max_offset[mem_mode]);
3071   ratio_p = (ratio != 1
3072              && multiplier_allowed_in_address_p (ratio, mem_mode));
3073
3074   if (ratio != 1 && !ratio_p)
3075     cost += multiply_by_cost (ratio, Pmode);
3076
3077   if (s_offset && !offset_p && !symbol_present)
3078     cost += add_cost (Pmode);
3079
3080   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3081   return cost + acost;
3082 }
3083
3084 /* Estimates cost of forcing expression EXPR into a variable.  */
3085
3086 unsigned
3087 force_expr_to_var_cost (tree expr)
3088 {
3089   static bool costs_initialized = false;
3090   static unsigned integer_cost;
3091   static unsigned symbol_cost;
3092   static unsigned address_cost;
3093   tree op0, op1;
3094   unsigned cost0, cost1, cost;
3095   enum machine_mode mode;
3096
3097   if (!costs_initialized)
3098     {
3099       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3100       rtx x = gen_rtx_MEM (DECL_MODE (var),
3101                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3102       tree addr;
3103       tree type = build_pointer_type (integer_type_node);
3104
3105       integer_cost = computation_cost (build_int_cst (integer_type_node,
3106                                                       2000));
3107
3108       SET_DECL_RTL (var, x);
3109       TREE_STATIC (var) = 1;
3110       addr = build1 (ADDR_EXPR, type, var);
3111       symbol_cost = computation_cost (addr) + 1;
3112
3113       address_cost
3114         = computation_cost (build2 (PLUS_EXPR, type,
3115                                     addr,
3116                                     build_int_cst (type, 2000))) + 1;
3117       if (dump_file && (dump_flags & TDF_DETAILS))
3118         {
3119           fprintf (dump_file, "force_expr_to_var_cost:\n");
3120           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3121           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3122           fprintf (dump_file, "  address %d\n", (int) address_cost);
3123           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3124           fprintf (dump_file, "\n");
3125         }
3126
3127       costs_initialized = true;
3128     }
3129
3130   STRIP_NOPS (expr);
3131
3132   if (SSA_VAR_P (expr))
3133     return 0;
3134
3135   if (TREE_INVARIANT (expr))
3136     {
3137       if (TREE_CODE (expr) == INTEGER_CST)
3138         return integer_cost;
3139
3140       if (TREE_CODE (expr) == ADDR_EXPR)
3141         {
3142           tree obj = TREE_OPERAND (expr, 0);
3143
3144           if (TREE_CODE (obj) == VAR_DECL
3145               || TREE_CODE (obj) == PARM_DECL
3146               || TREE_CODE (obj) == RESULT_DECL)
3147             return symbol_cost;
3148         }
3149
3150       return address_cost;
3151     }
3152
3153   switch (TREE_CODE (expr))
3154     {
3155     case PLUS_EXPR:
3156     case MINUS_EXPR:
3157     case MULT_EXPR:
3158       op0 = TREE_OPERAND (expr, 0);
3159       op1 = TREE_OPERAND (expr, 1);
3160       STRIP_NOPS (op0);
3161       STRIP_NOPS (op1);
3162
3163       if (is_gimple_val (op0))
3164         cost0 = 0;
3165       else
3166         cost0 = force_expr_to_var_cost (op0);
3167
3168       if (is_gimple_val (op1))
3169         cost1 = 0;
3170       else
3171         cost1 = force_expr_to_var_cost (op1);
3172
3173       break;
3174
3175     default:
3176       /* Just an arbitrary value, FIXME.  */
3177       return target_spill_cost;
3178     }
3179
3180   mode = TYPE_MODE (TREE_TYPE (expr));
3181   switch (TREE_CODE (expr))
3182     {
3183     case PLUS_EXPR:
3184     case MINUS_EXPR:
3185       cost = add_cost (mode);
3186       break;
3187
3188     case MULT_EXPR:
3189       if (cst_and_fits_in_hwi (op0))
3190         cost = multiply_by_cost (int_cst_value (op0), mode);
3191       else if (cst_and_fits_in_hwi (op1))
3192         cost = multiply_by_cost (int_cst_value (op1), mode);
3193       else
3194         return target_spill_cost;
3195       break;
3196
3197     default:
3198       gcc_unreachable ();
3199     }
3200
3201   cost += cost0;
3202   cost += cost1;
3203
3204   /* Bound the cost by target_spill_cost.  The parts of complicated
3205      computations often are either loop invariant or at least can
3206      be shared between several iv uses, so letting this grow without
3207      limits would not give reasonable results.  */
3208   return cost < target_spill_cost ? cost : target_spill_cost;
3209 }
3210
3211 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3212    invariants the computation depends on.  */
3213
3214 static unsigned
3215 force_var_cost (struct ivopts_data *data,
3216                 tree expr, bitmap *depends_on)
3217 {
3218   if (depends_on)
3219     {
3220       fd_ivopts_data = data;
3221       walk_tree (&expr, find_depends, depends_on, NULL);
3222     }
3223
3224   return force_expr_to_var_cost (expr);
3225 }
3226
3227 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3228    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3229    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3230    invariants the computation depends on.  */
3231
3232 static unsigned
3233 split_address_cost (struct ivopts_data *data,
3234                     tree addr, bool *symbol_present, bool *var_present,
3235                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3236 {
3237   tree core;
3238   HOST_WIDE_INT bitsize;
3239   HOST_WIDE_INT bitpos;
3240   tree toffset;
3241   enum machine_mode mode;
3242   int unsignedp, volatilep;
3243
3244   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3245                               &unsignedp, &volatilep, false);
3246
3247   if (toffset != 0
3248       || bitpos % BITS_PER_UNIT != 0
3249       || TREE_CODE (core) != VAR_DECL)
3250     {
3251       *symbol_present = false;
3252       *var_present = true;
3253       fd_ivopts_data = data;
3254       walk_tree (&addr, find_depends, depends_on, NULL);
3255       return target_spill_cost;
3256     }
3257
3258   *offset += bitpos / BITS_PER_UNIT;
3259   if (TREE_STATIC (core)
3260       || DECL_EXTERNAL (core))
3261     {
3262       *symbol_present = true;
3263       *var_present = false;
3264       return 0;
3265     }
3266
3267   *symbol_present = false;
3268   *var_present = true;
3269   return 0;
3270 }
3271
3272 /* Estimates cost of expressing difference of addresses E1 - E2 as
3273    var + symbol + offset.  The value of offset is added to OFFSET,
3274    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3275    part is missing.  DEPENDS_ON is a set of the invariants the computation
3276    depends on.  */
3277
3278 static unsigned
3279 ptr_difference_cost (struct ivopts_data *data,
3280                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3281                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3282 {
3283   HOST_WIDE_INT diff = 0;
3284   unsigned cost;
3285
3286   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3287
3288   if (ptr_difference_const (e1, e2, &diff))
3289     {
3290       *offset += diff;
3291       *symbol_present = false;
3292       *var_present = false;
3293       return 0;
3294     }
3295
3296   if (e2 == integer_zero_node)
3297     return split_address_cost (data, TREE_OPERAND (e1, 0),
3298                                symbol_present, var_present, offset, depends_on);
3299
3300   *symbol_present = false;
3301   *var_present = true;
3302
3303   cost = force_var_cost (data, e1, depends_on);
3304   cost += force_var_cost (data, e2, depends_on);
3305   cost += add_cost (Pmode);
3306
3307   return cost;
3308 }
3309
3310 /* Estimates cost of expressing difference E1 - E2 as
3311    var + symbol + offset.  The value of offset is added to OFFSET,
3312    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3313    part is missing.  DEPENDS_ON is a set of the invariants the computation
3314    depends on.  */
3315
3316 static unsigned
3317 difference_cost (struct ivopts_data *data,
3318                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3319                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3320 {
3321   unsigned cost;
3322   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3323   unsigned HOST_WIDE_INT off1, off2;
3324
3325   e1 = strip_offset (e1, &off1);
3326   e2 = strip_offset (e2, &off2);
3327   *offset += off1 - off2;
3328
3329   STRIP_NOPS (e1);
3330   STRIP_NOPS (e2);
3331
3332   if (TREE_CODE (e1) == ADDR_EXPR)
3333     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3334                                 depends_on);
3335   *symbol_present = false;
3336
3337   if (operand_equal_p (e1, e2, 0))
3338     {
3339       *var_present = false;
3340       return 0;
3341     }
3342   *var_present = true;
3343   if (zero_p (e2))
3344     return force_var_cost (data, e1, depends_on);
3345
3346   if (zero_p (e1))
3347     {
3348       cost = force_var_cost (data, e2, depends_on);
3349       cost += multiply_by_cost (-1, mode);
3350
3351       return cost;
3352     }
3353
3354   cost = force_var_cost (data, e1, depends_on);
3355   cost += force_var_cost (data, e2, depends_on);
3356   cost += add_cost (mode);
3357
3358   return cost;
3359 }
3360
3361 /* Determines the cost of the computation by that USE is expressed
3362    from induction variable CAND.  If ADDRESS_P is true, we just need
3363    to create an address from it, otherwise we want to get it into
3364    register.  A set of invariants we depend on is stored in
3365    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3366
3367 static unsigned
3368 get_computation_cost_at (struct ivopts_data *data,
3369                          struct iv_use *use, struct iv_cand *cand,
3370                          bool address_p, bitmap *depends_on, tree at)
3371 {
3372   tree ubase = use->iv->base, ustep = use->iv->step;
3373   tree cbase, cstep;
3374   tree utype = TREE_TYPE (ubase), ctype;
3375   unsigned HOST_WIDE_INT cstepi, offset = 0;
3376   HOST_WIDE_INT ratio, aratio;
3377   bool var_present, symbol_present;
3378   unsigned cost = 0, n_sums;
3379   double_int rat;
3380
3381   *depends_on = NULL;
3382
3383   /* Only consider real candidates.  */
3384   if (!cand->iv)
3385     return INFTY;
3386
3387   cbase = cand->iv->base;
3388   cstep = cand->iv->step;
3389   ctype = TREE_TYPE (cbase);
3390
3391   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3392     {
3393       /* We do not have a precision to express the values of use.  */
3394       return INFTY;
3395     }
3396
3397   if (address_p)
3398     {
3399       /* Do not try to express address of an object with computation based
3400          on address of a different object.  This may cause problems in rtl
3401          level alias analysis (that does not expect this to be happening,
3402          as this is illegal in C), and would be unlikely to be useful
3403          anyway.  */
3404       if (use->iv->base_object
3405           && cand->iv->base_object
3406           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3407         return INFTY;
3408     }
3409
3410   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3411     {
3412       /* TODO -- add direct handling of this case.  */
3413       goto fallback;
3414     }
3415
3416   /* CSTEPI is removed from the offset in case statement is after the
3417      increment.  If the step is not constant, we use zero instead.
3418      This is a bit imprecise (there is the extra addition), but
3419      redundancy elimination is likely to transform the code so that
3420      it uses value of the variable before increment anyway,
3421      so it is not that much unrealistic.  */
3422   if (cst_and_fits_in_hwi (cstep))
3423     cstepi = int_cst_value (cstep);
3424   else
3425     cstepi = 0;
3426
3427   if (!constant_multiple_of (ustep, cstep, &rat))
3428     return INFTY;
3429
3430   if (double_int_fits_in_shwi_p (rat))
3431     ratio = double_int_to_shwi (rat);
3432   else
3433     return INFTY;
3434
3435   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3436      or ratio == 1, it is better to handle this like
3437
3438      ubase - ratio * cbase + ratio * var
3439
3440      (also holds in the case ratio == -1, TODO.  */
3441
3442   if (cst_and_fits_in_hwi (cbase))
3443     {
3444       offset = - ratio * int_cst_value (cbase);
3445       cost += difference_cost (data,
3446                                ubase, integer_zero_node,
3447                                &symbol_present, &var_present, &offset,
3448                                depends_on);
3449     }
3450   else if (ratio == 1)
3451     {
3452       cost += difference_cost (data,
3453                                ubase, cbase,
3454                                &symbol_present, &var_present, &offset,
3455                                depends_on);
3456     }
3457   else
3458     {
3459       cost += force_var_cost (data, cbase, depends_on);
3460       cost += add_cost (TYPE_MODE (ctype));
3461       cost += difference_cost (data,
3462                                ubase, integer_zero_node,
3463                                &symbol_present, &var_present, &offset,
3464                                depends_on);
3465     }
3466
3467   /* If we are after the increment, the value of the candidate is higher by
3468      one iteration.  */
3469   if (stmt_after_increment (data->current_loop, cand, at))
3470     offset -= ratio * cstepi;
3471
3472   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3473      (symbol/var/const parts may be omitted).  If we are looking for an address,
3474      find the cost of addressing this.  */
3475   if (address_p)
3476     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3477                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3478
3479   /* Otherwise estimate the costs for computing the expression.  */
3480   aratio = ratio > 0 ? ratio : -ratio;
3481   if (!symbol_present && !var_present && !offset)
3482     {
3483       if (ratio != 1)
3484         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3485
3486       return cost;
3487     }
3488
3489   if (aratio != 1)
3490     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3491
3492   n_sums = 1;
3493   if (var_present
3494       /* Symbol + offset should be compile-time computable.  */
3495       && (symbol_present || offset))
3496     n_sums++;
3497
3498   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3499
3500 fallback:
3501   {
3502     /* Just get the expression, expand it and measure the cost.  */
3503     tree comp = get_computation_at (data->current_loop, use, cand, at);
3504
3505     if (!comp)
3506       return INFTY;
3507
3508     if (address_p)
3509       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3510
3511     return computation_cost (comp);
3512   }
3513 }
3514
3515 /* Determines the cost of the computation by that USE is expressed
3516    from induction variable CAND.  If ADDRESS_P is true, we just need
3517    to create an address from it, otherwise we want to get it into
3518    register.  A set of invariants we depend on is stored in
3519    DEPENDS_ON.  */
3520
3521 static unsigned
3522 get_computation_cost (struct ivopts_data *data,
3523                       struct iv_use *use, struct iv_cand *cand,
3524                       bool address_p, bitmap *depends_on)
3525 {
3526   return get_computation_cost_at (data,
3527                                   use, cand, address_p, depends_on, use->stmt);
3528 }
3529
3530 /* Determines cost of basing replacement of USE on CAND in a generic
3531    expression.  */
3532
3533 static bool
3534 determine_use_iv_cost_generic (struct ivopts_data *data,
3535                                struct iv_use *use, struct iv_cand *cand)
3536 {
3537   bitmap depends_on;
3538   unsigned cost;
3539
3540   /* The simple case first -- if we need to express value of the preserved
3541      original biv, the cost is 0.  This also prevents us from counting the
3542      cost of increment twice -- once at this use and once in the cost of
3543      the candidate.  */
3544   if (cand->pos == IP_ORIGINAL
3545       && cand->incremented_at == use->stmt)
3546     {
3547       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3548       return true;
3549     }
3550
3551   cost = get_computation_cost (data, use, cand, false, &depends_on);
3552   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3553
3554   return cost != INFTY;
3555 }
3556
3557 /* Determines cost of basing replacement of USE on CAND in an address.  */
3558
3559 static bool
3560 determine_use_iv_cost_address (struct ivopts_data *data,
3561                                struct iv_use *use, struct iv_cand *cand)
3562 {
3563   bitmap depends_on;
3564   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3565
3566   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3567
3568   return cost != INFTY;
3569 }
3570
3571 /* Computes value of induction variable IV in iteration NITER.  */
3572
3573 static tree
3574 iv_value (struct iv *iv, tree niter)
3575 {
3576   tree val;
3577   tree type = TREE_TYPE (iv->base);
3578
3579   niter = fold_convert (type, niter);
3580   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3581
3582   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3583 }
3584
3585 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3586
3587 static tree
3588 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3589 {
3590   tree val = iv_value (cand->iv, niter);
3591   tree type = TREE_TYPE (cand->iv->base);
3592
3593   if (stmt_after_increment (loop, cand, at))
3594     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3595
3596   return val;
3597 }
3598
3599 /* Returns period of induction variable iv.  */
3600
3601 static tree
3602 iv_period (struct iv *iv)
3603 {
3604   tree step = iv->step, period, type;
3605   tree pow2div;
3606
3607   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3608
3609   /* Period of the iv is gcd (step, type range).  Since type range is power
3610      of two, it suffices to determine the maximum power of two that divides
3611      step.  */
3612   pow2div = num_ending_zeros (step);
3613   type = unsigned_type_for (TREE_TYPE (step));
3614
3615   period = build_low_bits_mask (type,
3616                                 (TYPE_PRECISION (type)
3617                                  - tree_low_cst (pow2div, 1)));
3618
3619   return period;
3620 }
3621
3622 /* Returns the comparison operator used when eliminating the iv USE.  */
3623
3624 static enum tree_code
3625 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3626 {
3627   struct loop *loop = data->current_loop;
3628   basic_block ex_bb;
3629   edge exit;
3630
3631   ex_bb = bb_for_stmt (use->stmt);
3632   exit = EDGE_SUCC (ex_bb, 0);
3633   if (flow_bb_inside_loop_p (loop, exit->dest))
3634     exit = EDGE_SUCC (ex_bb, 1);
3635
3636   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3637 }
3638
3639 /* Check whether it is possible to express the condition in USE by comparison
3640    of candidate CAND.  If so, store the value compared with to BOUND.  */
3641
3642 static bool
3643 may_eliminate_iv (struct ivopts_data *data,
3644                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3645 {
3646   basic_block ex_bb;
3647   edge exit;
3648   tree nit, nit_type;
3649   tree wider_type, period, per_type;
3650   struct loop *loop = data->current_loop;
3651
3652   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3653     return false;
3654
3655   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3656      for other conditions inside loop body.  */
3657   ex_bb = bb_for_stmt (use->stmt);
3658   if (use->stmt != last_stmt (ex_bb)
3659       || TREE_CODE (use->stmt) != COND_EXPR)
3660     return false;
3661   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3662     return false;
3663
3664   exit = EDGE_SUCC (ex_bb, 0);
3665   if (flow_bb_inside_loop_p (loop, exit->dest))
3666     exit = EDGE_SUCC (ex_bb, 1);
3667   if (flow_bb_inside_loop_p (loop, exit->dest))
3668     return false;
3669
3670   nit = niter_for_exit (data, exit);
3671   if (!nit)
3672     return false;
3673
3674   nit_type = TREE_TYPE (nit);
3675
3676   /* Determine whether we may use the variable to test whether niter iterations
3677      elapsed.  This is the case iff the period of the induction variable is
3678      greater than the number of iterations.  */
3679   period = iv_period (cand->iv);
3680   if (!period)
3681     return false;
3682   per_type = TREE_TYPE (period);
3683
3684   wider_type = TREE_TYPE (period);
3685   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3686     wider_type = per_type;
3687   else
3688     wider_type = nit_type;
3689
3690   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3691                                       fold_convert (wider_type, period),
3692                                       fold_convert (wider_type, nit))))
3693     return false;
3694
3695   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
3696   return true;
3697 }
3698
3699 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3700
3701 static bool
3702 determine_use_iv_cost_condition (struct ivopts_data *data,
3703                                  struct iv_use *use, struct iv_cand *cand)
3704 {
3705   tree bound = NULL_TREE, op, cond;
3706   bitmap depends_on = NULL;
3707   unsigned cost;
3708
3709   /* Only consider real candidates.  */
3710   if (!cand->iv)
3711     {
3712       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3713       return false;
3714     }
3715
3716   if (may_eliminate_iv (data, use, cand, &bound))
3717     {
3718       cost = force_var_cost (data, bound, &depends_on);
3719
3720       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3721       return cost != INFTY;
3722     }
3723
3724   /* The induction variable elimination failed; just express the original
3725      giv.  If it is compared with an invariant, note that we cannot get
3726      rid of it.  */
3727   cost = get_computation_cost (data, use, cand, false, &depends_on);
3728
3729   cond = *use->op_p;
3730   if (TREE_CODE (cond) != SSA_NAME)
3731     {
3732       op = TREE_OPERAND (cond, 0);
3733       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
3734         op = TREE_OPERAND (cond, 1);
3735       if (TREE_CODE (op) == SSA_NAME)
3736         {
3737           op = get_iv (data, op)->base;
3738           fd_ivopts_data = data;
3739           walk_tree (&op, find_depends, &depends_on, NULL);
3740         }
3741     }
3742
3743   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3744   return cost != INFTY;
3745 }
3746
3747 /* Determines cost of basing replacement of USE on CAND.  Returns false
3748    if USE cannot be based on CAND.  */
3749
3750 static bool
3751 determine_use_iv_cost (struct ivopts_data *data,
3752                        struct iv_use *use, struct iv_cand *cand)
3753 {
3754   switch (use->type)
3755     {
3756     case USE_NONLINEAR_EXPR:
3757       return determine_use_iv_cost_generic (data, use, cand);
3758
3759     case USE_ADDRESS:
3760       return determine_use_iv_cost_address (data, use, cand);
3761
3762     case USE_COMPARE:
3763       return determine_use_iv_cost_condition (data, use, cand);
3764
3765     default:
3766       gcc_unreachable ();
3767     }
3768 }
3769
3770 /* Determines costs of basing the use of the iv on an iv candidate.  */
3771
3772 static void
3773 determine_use_iv_costs (struct ivopts_data *data)
3774 {
3775   unsigned i, j;
3776   struct iv_use *use;
3777   struct iv_cand *cand;
3778   bitmap to_clear = BITMAP_ALLOC (NULL);
3779
3780   alloc_use_cost_map (data);
3781
3782   for (i = 0; i < n_iv_uses (data); i++)
3783     {
3784       use = iv_use (data, i);
3785
3786       if (data->consider_all_candidates)
3787         {
3788           for (j = 0; j < n_iv_cands (data); j++)
3789             {
3790               cand = iv_cand (data, j);
3791               determine_use_iv_cost (data, use, cand);
3792             }
3793         }
3794       else
3795         {
3796           bitmap_iterator bi;
3797
3798           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3799             {
3800               cand = iv_cand (data, j);
3801               if (!determine_use_iv_cost (data, use, cand))
3802                 bitmap_set_bit (to_clear, j);
3803             }
3804
3805           /* Remove the candidates for that the cost is infinite from
3806              the list of related candidates.  */
3807           bitmap_and_compl_into (use->related_cands, to_clear);
3808           bitmap_clear (to_clear);
3809         }
3810     }
3811
3812   BITMAP_FREE (to_clear);
3813
3814   if (dump_file && (dump_flags & TDF_DETAILS))
3815     {
3816       fprintf (dump_file, "Use-candidate costs:\n");
3817
3818       for (i = 0; i < n_iv_uses (data); i++)
3819         {
3820           use = iv_use (data, i);
3821
3822           fprintf (dump_file, "Use %d:\n", i);
3823           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3824           for (j = 0; j < use->n_map_members; j++)
3825             {
3826               if (!use->cost_map[j].cand
3827                   || use->cost_map[j].cost == INFTY)
3828                 continue;
3829
3830               fprintf (dump_file, "  %d\t%d\t",
3831                        use->cost_map[j].cand->id,
3832                        use->cost_map[j].cost);
3833               if (use->cost_map[j].depends_on)
3834                 bitmap_print (dump_file,
3835                               use->cost_map[j].depends_on, "","");
3836               fprintf (dump_file, "\n");
3837             }
3838
3839           fprintf (dump_file, "\n");
3840         }
3841       fprintf (dump_file, "\n");
3842     }
3843 }
3844
3845 /* Determines cost of the candidate CAND.  */
3846
3847 static void
3848 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3849 {
3850   unsigned cost_base, cost_step;
3851   tree base;
3852
3853   if (!cand->iv)
3854     {
3855       cand->cost = 0;
3856       return;
3857     }
3858
3859   /* There are two costs associated with the candidate -- its increment
3860      and its initialization.  The second is almost negligible for any loop
3861      that rolls enough, so we take it just very little into account.  */
3862
3863   base = cand->iv->base;
3864   cost_base = force_var_cost (data, base, NULL);
3865   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3866
3867   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3868
3869   /* Prefer the original iv unless we may gain something by replacing it;
3870      this is not really relevant for artificial ivs created by other
3871      passes.  */
3872   if (cand->pos == IP_ORIGINAL
3873       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3874     cand->cost--;
3875
3876   /* Prefer not to insert statements into latch unless there are some
3877      already (so that we do not create unnecessary jumps).  */
3878   if (cand->pos == IP_END
3879       && empty_block_p (ip_end_pos (data->current_loop)))
3880     cand->cost++;
3881 }
3882
3883 /* Determines costs of computation of the candidates.  */
3884
3885 static void
3886 determine_iv_costs (struct ivopts_data *data)
3887 {
3888   unsigned i;
3889
3890   if (dump_file && (dump_flags & TDF_DETAILS))
3891     {
3892       fprintf (dump_file, "Candidate costs:\n");
3893       fprintf (dump_file, "  cand\tcost\n");
3894     }
3895
3896   for (i = 0; i < n_iv_cands (data); i++)
3897     {
3898       struct iv_cand *cand = iv_cand (data, i);
3899
3900       determine_iv_cost (data, cand);
3901
3902       if (dump_file && (dump_flags & TDF_DETAILS))
3903         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3904     }
3905
3906   if (dump_file && (dump_flags & TDF_DETAILS))
3907     fprintf (dump_file, "\n");
3908 }
3909
3910 /* Calculates cost for having SIZE induction variables.  */
3911
3912 static unsigned
3913 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3914 {
3915   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3916 }
3917
3918 /* For each size of the induction variable set determine the penalty.  */
3919
3920 static void
3921 determine_set_costs (struct ivopts_data *data)
3922 {
3923   unsigned j, n;
3924   tree phi, op;
3925   struct loop *loop = data->current_loop;
3926   bitmap_iterator bi;
3927
3928   /* We use the following model (definitely improvable, especially the
3929      cost function -- TODO):
3930
3931      We estimate the number of registers available (using MD data), name it A.
3932
3933      We estimate the number of registers used by the loop, name it U.  This
3934      number is obtained as the number of loop phi nodes (not counting virtual
3935      registers and bivs) + the number of variables from outside of the loop.
3936
3937      We set a reserve R (free regs that are used for temporary computations,
3938      etc.).  For now the reserve is a constant 3.
3939
3940      Let I be the number of induction variables.
3941
3942      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3943         make a lot of ivs without a reason).
3944      -- if A - R < U + I <= A, the cost is I * PRES_COST
3945      -- if U + I > A, the cost is I * PRES_COST and
3946         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3947
3948   if (dump_file && (dump_flags & TDF_DETAILS))
3949     {
3950       fprintf (dump_file, "Global costs:\n");
3951       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3952       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3953       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3954       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3955     }
3956
3957   n = 0;
3958   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3959     {
3960       op = PHI_RESULT (phi);
3961
3962       if (!is_gimple_reg (op))
3963         continue;
3964
3965       if (get_iv (data, op))
3966         continue;
3967
3968       n++;
3969     }
3970
3971   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3972     {
3973       struct version_info *info = ver_info (data, j);
3974
3975       if (info->inv_id && info->has_nonlin_use)
3976         n++;
3977     }
3978
3979   data->regs_used = n;
3980   if (dump_file && (dump_flags & TDF_DETAILS))
3981     fprintf (dump_file, "  regs_used %d\n", n);
3982
3983   if (dump_file && (dump_flags & TDF_DETAILS))
3984     {
3985       fprintf (dump_file, "  cost for size:\n");
3986       fprintf (dump_file, "  ivs\tcost\n");
3987       for (j = 0; j <= 2 * target_avail_regs; j++)
3988         fprintf (dump_file, "  %d\t%d\n", j,
3989                  ivopts_global_cost_for_size (data, j));
3990       fprintf (dump_file, "\n");
3991     }
3992 }
3993
3994 /* Returns true if A is a cheaper cost pair than B.  */
3995
3996 static bool
3997 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3998 {
3999   if (!a)
4000     return false;
4001
4002   if (!b)
4003     return true;
4004
4005   if (a->cost < b->cost)
4006     return true;
4007
4008   if (a->cost > b->cost)
4009     return false;
4010
4011   /* In case the costs are the same, prefer the cheaper candidate.  */
4012   if (a->cand->cost < b->cand->cost)
4013     return true;
4014
4015   return false;
4016 }
4017
4018 /* Computes the cost field of IVS structure.  */
4019
4020 static void
4021 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4022 {
4023   unsigned cost = 0;
4024
4025   cost += ivs->cand_use_cost;
4026   cost += ivs->cand_cost;
4027   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4028
4029   ivs->cost = cost;
4030 }
4031
4032 /* Remove invariants in set INVS to set IVS.  */
4033
4034 static void
4035 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4036 {
4037   bitmap_iterator bi;
4038   unsigned iid;
4039
4040   if (!invs)
4041     return;
4042
4043   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4044     {
4045       ivs->n_invariant_uses[iid]--;
4046       if (ivs->n_invariant_uses[iid] == 0)
4047         ivs->n_regs--;
4048     }
4049 }
4050
4051 /* Set USE not to be expressed by any candidate in IVS.  */
4052
4053 static void
4054 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4055                  struct iv_use *use)
4056 {
4057   unsigned uid = use->id, cid;
4058   struct cost_pair *cp;
4059
4060   cp = ivs->cand_for_use[uid];
4061   if (!cp)
4062     return;
4063   cid = cp->cand->id;
4064
4065   ivs->bad_uses++;
4066   ivs->cand_for_use[uid] = NULL;
4067   ivs->n_cand_uses[cid]--;
4068
4069   if (ivs->n_cand_uses[cid] == 0)
4070     {
4071       bitmap_clear_bit (ivs->cands, cid);
4072       /* Do not count the pseudocandidates.  */
4073       if (cp->cand->iv)
4074         ivs->n_regs--;
4075       ivs->n_cands--;
4076       ivs->cand_cost -= cp->cand->cost;
4077
4078       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4079     }
4080
4081   ivs->cand_use_cost -= cp->cost;
4082
4083   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4084   iv_ca_recount_cost (data, ivs);
4085 }
4086
4087 /* Add invariants in set INVS to set IVS.  */
4088
4089 static void
4090 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4091 {
4092   bitmap_iterator bi;
4093   unsigned iid;
4094
4095   if (!invs)
4096     return;
4097
4098   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4099     {
4100       ivs->n_invariant_uses[iid]++;
4101       if (ivs->n_invariant_uses[iid] == 1)
4102         ivs->n_regs++;
4103     }
4104 }
4105
4106 /* Set cost pair for USE in set IVS to CP.  */
4107
4108 static void
4109 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4110               struct iv_use *use, struct cost_pair *cp)
4111 {
4112   unsigned uid = use->id, cid;
4113
4114   if (ivs->cand_for_use[uid] == cp)
4115     return;
4116
4117   if (ivs->cand_for_use[uid])
4118     iv_ca_set_no_cp (data, ivs, use);
4119
4120   if (cp)
4121     {
4122       cid = cp->cand->id;
4123
4124       ivs->bad_uses--;
4125       ivs->cand_for_use[uid] = cp;
4126       ivs->n_cand_uses[cid]++;
4127       if (ivs->n_cand_uses[cid] == 1)
4128         {
4129           bitmap_set_bit (ivs->cands, cid);
4130           /* Do not count the pseudocandidates.  */
4131           if (cp->cand->iv)
4132             ivs->n_regs++;
4133           ivs->n_cands++;
4134           ivs->cand_cost += cp->cand->cost;
4135
4136           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4137         }
4138
4139       ivs->cand_use_cost += cp->cost;
4140       iv_ca_set_add_invariants (ivs, cp->depends_on);
4141       iv_ca_recount_cost (data, ivs);
4142     }
4143 }
4144
4145 /* Extend set IVS by expressing USE by some of the candidates in it
4146    if possible.  */
4147
4148 static void
4149 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4150                struct iv_use *use)
4151 {
4152   struct cost_pair *best_cp = NULL, *cp;
4153   bitmap_iterator bi;
4154   unsigned i;
4155
4156   gcc_assert (ivs->upto >= use->id);
4157
4158   if (ivs->upto == use->id)
4159     {
4160       ivs->upto++;
4161       ivs->bad_uses++;
4162     }
4163
4164   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4165     {
4166       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4167
4168       if (cheaper_cost_pair (cp, best_cp))
4169         best_cp = cp;
4170     }
4171
4172   iv_ca_set_cp (data, ivs, use, best_cp);
4173 }
4174
4175 /* Get cost for assignment IVS.  */
4176
4177 static unsigned
4178 iv_ca_cost (struct iv_ca *ivs)
4179 {
4180   return (ivs->bad_uses ? INFTY : ivs->cost);
4181 }
4182
4183 /* Returns true if all dependences of CP are among invariants in IVS.  */
4184
4185 static bool
4186 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4187 {
4188   unsigned i;
4189   bitmap_iterator bi;
4190
4191   if (!cp->depends_on)
4192     return true;
4193
4194   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4195     {
4196       if (ivs->n_invariant_uses[i] == 0)
4197         return false;
4198     }
4199
4200   return true;
4201 }
4202
4203 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4204    it before NEXT_CHANGE.  */
4205
4206 static struct iv_ca_delta *
4207 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4208                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4209 {
4210   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4211
4212   change->use = use;
4213   change->old_cp = old_cp;
4214   change->new_cp = new_cp;
4215   change->next_change = next_change;
4216
4217   return change;
4218 }
4219
4220 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4221    are rewritten.  */
4222
4223 static struct iv_ca_delta *
4224 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4225 {
4226   struct iv_ca_delta *last;
4227
4228   if (!l2)
4229     return l1;
4230
4231   if (!l1)
4232     return l2;
4233
4234   for (last = l1; last->next_change; last = last->next_change)
4235     continue;
4236   last->next_change = l2;
4237
4238   return l1;
4239 }
4240
4241 /* Returns candidate by that USE is expressed in IVS.  */
4242
4243 static struct cost_pair *
4244 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4245 {
4246   return ivs->cand_for_use[use->id];
4247 }
4248
4249 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4250
4251 static struct iv_ca_delta *
4252 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4253 {
4254   struct iv_ca_delta *act, *next, *prev = NULL;
4255   struct cost_pair *tmp;
4256
4257   for (act = delta; act; act = next)
4258     {
4259       next = act->next_change;
4260       act->next_change = prev;
4261       prev = act;
4262
4263       tmp = act->old_cp;
4264       act->old_cp = act->new_cp;
4265       act->new_cp = tmp;
4266     }
4267
4268   return prev;
4269 }
4270
4271 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4272    reverted instead.  */
4273
4274 static void
4275 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4276                     struct iv_ca_delta *delta, bool forward)
4277 {
4278   struct cost_pair *from, *to;
4279   struct iv_ca_delta *act;
4280
4281   if (!forward)
4282     delta = iv_ca_delta_reverse (delta);
4283
4284   for (act = delta; act; act = act->next_change)
4285     {
4286       from = act->old_cp;
4287       to = act->new_cp;
4288       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4289       iv_ca_set_cp (data, ivs, act->use, to);
4290     }
4291
4292   if (!forward)
4293     iv_ca_delta_reverse (delta);
4294 }
4295
4296 /* Returns true if CAND is used in IVS.  */
4297
4298 static bool
4299 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4300 {
4301   return ivs->n_cand_uses[cand->id] > 0;
4302 }
4303
4304 /* Returns number of induction variable candidates in the set IVS.  */
4305
4306 static unsigned
4307 iv_ca_n_cands (struct iv_ca *ivs)
4308 {
4309   return ivs->n_cands;
4310 }
4311
4312 /* Free the list of changes DELTA.  */
4313
4314 static void
4315 iv_ca_delta_free (struct iv_ca_delta **delta)
4316 {
4317   struct iv_ca_delta *act, *next;
4318
4319   for (act = *delta; act; act = next)
4320     {
4321       next = act->next_change;
4322       free (act);
4323     }
4324
4325   *delta = NULL;
4326 }
4327
4328 /* Allocates new iv candidates assignment.  */
4329
4330 static struct iv_ca *
4331 iv_ca_new (struct ivopts_data *data)
4332 {
4333   struct iv_ca *nw = XNEW (struct iv_ca);
4334
4335   nw->upto = 0;
4336   nw->bad_uses = 0;
4337   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4338   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4339   nw->cands = BITMAP_ALLOC (NULL);
4340   nw->n_cands = 0;
4341   nw->n_regs = 0;
4342   nw->cand_use_cost = 0;
4343   nw->cand_cost = 0;
4344   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4345   nw->cost = 0;
4346
4347   return nw;
4348 }
4349
4350 /* Free memory occupied by the set IVS.  */
4351
4352 static void
4353 iv_ca_free (struct iv_ca **ivs)
4354 {
4355   free ((*ivs)->cand_for_use);
4356   free ((*ivs)->n_cand_uses);
4357   BITMAP_FREE ((*ivs)->cands);
4358   free ((*ivs)->n_invariant_uses);
4359   free (*ivs);
4360   *ivs = NULL;
4361 }
4362
4363 /* Dumps IVS to FILE.  */
4364
4365 static void
4366 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4367 {
4368   const char *pref = "  invariants ";
4369   unsigned i;
4370
4371   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4372   bitmap_print (file, ivs->cands, "  candidates ","\n");
4373
4374   for (i = 1; i <= data->max_inv_id; i++)
4375     if (ivs->n_invariant_uses[i])
4376       {
4377         fprintf (file, "%s%d", pref, i);
4378         pref = ", ";
4379       }
4380   fprintf (file, "\n");
4381 }
4382
4383 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4384    new set, and store differences in DELTA.  Number of induction variables
4385    in the new set is stored to N_IVS.  */
4386
4387 static unsigned
4388 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4389               struct iv_cand *cand, struct iv_ca_delta **delta,
4390               unsigned *n_ivs)
4391 {
4392   unsigned i, cost;
4393   struct iv_use *use;
4394   struct cost_pair *old_cp, *new_cp;
4395
4396   *delta = NULL;
4397   for (i = 0; i < ivs->upto; i++)
4398     {
4399       use = iv_use (data, i);
4400       old_cp = iv_ca_cand_for_use (ivs, use);
4401
4402       if (old_cp
4403           && old_cp->cand == cand)
4404         continue;
4405
4406       new_cp = get_use_iv_cost (data, use, cand);
4407       if (!new_cp)
4408         continue;
4409
4410       if (!iv_ca_has_deps (ivs, new_cp))
4411         continue;
4412
4413       if (!cheaper_cost_pair (new_cp, old_cp))
4414         continue;
4415
4416       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4417     }
4418
4419   iv_ca_delta_commit (data, ivs, *delta, true);
4420   cost = iv_ca_cost (ivs);
4421   if (n_ivs)
4422     *n_ivs = iv_ca_n_cands (ivs);
4423   iv_ca_delta_commit (data, ivs, *delta, false);
4424
4425   return cost;
4426 }
4427
4428 /* Try narrowing set IVS by removing CAND.  Return the cost of
4429    the new set and store the differences in DELTA.  */
4430
4431 static unsigned
4432 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4433               struct iv_cand *cand, struct iv_ca_delta **delta)
4434 {
4435   unsigned i, ci;
4436   struct iv_use *use;
4437   struct cost_pair *old_cp, *new_cp, *cp;
4438   bitmap_iterator bi;
4439   struct iv_cand *cnd;
4440   unsigned cost;
4441
4442   *delta = NULL;
4443   for (i = 0; i < n_iv_uses (data); i++)
4444     {
4445       use = iv_use (data, i);
4446
4447       old_cp = iv_ca_cand_for_use (ivs, use);
4448       if (old_cp->cand != cand)
4449         continue;
4450
4451       new_cp = NULL;
4452
4453       if (data->consider_all_candidates)
4454         {
4455           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4456             {
4457               if (ci == cand->id)
4458                 continue;
4459
4460               cnd = iv_cand (data, ci);
4461
4462               cp = get_use_iv_cost (data, use, cnd);
4463               if (!cp)
4464                 continue;
4465               if (!iv_ca_has_deps (ivs, cp))
4466                 continue;
4467
4468               if (!cheaper_cost_pair (cp, new_cp))
4469                 continue;
4470
4471               new_cp = cp;
4472             }
4473         }
4474       else
4475         {
4476           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4477             {
4478               if (ci == cand->id)
4479                 continue;
4480
4481               cnd = iv_cand (data, ci);
4482
4483               cp = get_use_iv_cost (data, use, cnd);
4484               if (!cp)
4485                 continue;
4486               if (!iv_ca_has_deps (ivs, cp))
4487                 continue;
4488
4489               if (!cheaper_cost_pair (cp, new_cp))
4490                 continue;
4491
4492               new_cp = cp;
4493             }
4494         }
4495
4496       if (!new_cp)
4497         {
4498           iv_ca_delta_free (delta);
4499           return INFTY;
4500         }
4501
4502       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4503     }
4504
4505   iv_ca_delta_commit (data, ivs, *delta, true);
4506   cost = iv_ca_cost (ivs);
4507   iv_ca_delta_commit (data, ivs, *delta, false);
4508
4509   return cost;
4510 }
4511
4512 /* Try optimizing the set of candidates IVS by removing candidates different
4513    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4514    differences in DELTA.  */
4515
4516 static unsigned
4517 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4518              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4519 {
4520   bitmap_iterator bi;
4521   struct iv_ca_delta *act_delta, *best_delta;
4522   unsigned i, best_cost, acost;
4523   struct iv_cand *cand;
4524
4525   best_delta = NULL;
4526   best_cost = iv_ca_cost (ivs);
4527
4528   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4529     {
4530       cand = iv_cand (data, i);
4531
4532       if (cand == except_cand)
4533         continue;
4534
4535       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4536
4537       if (acost < best_cost)
4538         {
4539           best_cost = acost;
4540           iv_ca_delta_free (&best_delta);
4541           best_delta = act_delta;
4542         }
4543       else
4544         iv_ca_delta_free (&act_delta);
4545     }
4546
4547   if (!best_delta)
4548     {
4549       *delta = NULL;
4550       return best_cost;
4551     }
4552
4553   /* Recurse to possibly remove other unnecessary ivs.  */
4554   iv_ca_delta_commit (data, ivs, best_delta, true);
4555   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4556   iv_ca_delta_commit (data, ivs, best_delta, false);
4557   *delta = iv_ca_delta_join (best_delta, *delta);
4558   return best_cost;
4559 }
4560
4561 /* Tries to extend the sets IVS in the best possible way in order
4562    to express the USE.  */
4563
4564 static bool
4565 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4566                   struct iv_use *use)
4567 {
4568   unsigned best_cost, act_cost;
4569   unsigned i;
4570   bitmap_iterator bi;
4571   struct iv_cand *cand;
4572   struct iv_ca_delta *best_delta = NULL, *act_delta;
4573   struct cost_pair *cp;
4574
4575   iv_ca_add_use (data, ivs, use);
4576   best_cost = iv_ca_cost (ivs);
4577
4578   cp = iv_ca_cand_for_use (ivs, use);
4579   if (cp)
4580     {
4581       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4582       iv_ca_set_no_cp (data, ivs, use);
4583     }
4584
4585   /* First try important candidates.  Only if it fails, try the specific ones.
4586      Rationale -- in loops with many variables the best choice often is to use
4587      just one generic biv.  If we added here many ivs specific to the uses,
4588      the optimization algorithm later would be likely to get stuck in a local
4589      minimum, thus causing us to create too many ivs.  The approach from
4590      few ivs to more seems more likely to be successful -- starting from few
4591      ivs, replacing an expensive use by a specific iv should always be a
4592      win.  */
4593   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4594     {
4595       cand = iv_cand (data, i);
4596
4597       if (iv_ca_cand_used_p (ivs, cand))
4598         continue;
4599
4600       cp = get_use_iv_cost (data, use, cand);
4601       if (!cp)
4602         continue;
4603
4604       iv_ca_set_cp (data, ivs, use, cp);
4605       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4606       iv_ca_set_no_cp (data, ivs, use);
4607       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4608
4609       if (act_cost < best_cost)
4610         {
4611           best_cost = act_cost;
4612
4613           iv_ca_delta_free (&best_delta);
4614           best_delta = act_delta;
4615         }
4616       else
4617         iv_ca_delta_free (&act_delta);
4618     }
4619
4620   if (best_cost == INFTY)
4621     {
4622       for (i = 0; i < use->n_map_members; i++)
4623         {
4624           cp = use->cost_map + i;
4625           cand = cp->cand;
4626           if (!cand)
4627             continue;
4628
4629           /* Already tried this.  */
4630           if (cand->important)
4631             continue;
4632
4633           if (iv_ca_cand_used_p (ivs, cand))
4634             continue;
4635
4636           act_delta = NULL;
4637           iv_ca_set_cp (data, ivs, use, cp);
4638           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4639           iv_ca_set_no_cp (data, ivs, use);
4640           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4641                                        cp, act_delta);
4642
4643           if (act_cost < best_cost)
4644             {
4645               best_cost = act_cost;
4646
4647               if (best_delta)
4648                 iv_ca_delta_free (&best_delta);
4649               best_delta = act_delta;
4650             }
4651           else
4652             iv_ca_delta_free (&act_delta);
4653         }
4654     }
4655
4656   iv_ca_delta_commit (data, ivs, best_delta, true);
4657   iv_ca_delta_free (&best_delta);
4658
4659   return (best_cost != INFTY);
4660 }
4661
4662 /* Finds an initial assignment of candidates to uses.  */
4663
4664 static struct iv_ca *
4665 get_initial_solution (struct ivopts_data *data)
4666 {
4667   struct iv_ca *ivs = iv_ca_new (data);
4668   unsigned i;
4669
4670   for (i = 0; i < n_iv_uses (data); i++)
4671     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4672       {
4673         iv_ca_free (&ivs);
4674         return NULL;
4675       }
4676
4677   return ivs;
4678 }
4679
4680 /* Tries to improve set of induction variables IVS.  */
4681
4682 static bool
4683 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4684 {
4685   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4686   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4687   struct iv_cand *cand;
4688
4689   /* Try extending the set of induction variables by one.  */
4690   for (i = 0; i < n_iv_cands (data); i++)
4691     {
4692       cand = iv_cand (data, i);
4693
4694       if (iv_ca_cand_used_p (ivs, cand))
4695         continue;
4696
4697       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4698       if (!act_delta)
4699         continue;
4700
4701       /* If we successfully added the candidate and the set is small enough,
4702          try optimizing it by removing other candidates.  */
4703       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4704         {
4705           iv_ca_delta_commit (data, ivs, act_delta, true);
4706           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4707           iv_ca_delta_commit (data, ivs, act_delta, false);
4708           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4709         }
4710
4711       if (acost < best_cost)
4712         {
4713           best_cost = acost;
4714           iv_ca_delta_free (&best_delta);
4715           best_delta = act_delta;
4716         }
4717       else
4718         iv_ca_delta_free (&act_delta);
4719     }
4720
4721   if (!best_delta)
4722     {
4723       /* Try removing the candidates from the set instead.  */
4724       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4725
4726       /* Nothing more we can do.  */
4727       if (!best_delta)
4728         return false;
4729     }
4730
4731   iv_ca_delta_commit (data, ivs, best_delta, true);
4732   gcc_assert (best_cost == iv_ca_cost (ivs));
4733   iv_ca_delta_free (&best_delta);
4734   return true;
4735 }
4736
4737 /* Attempts to find the optimal set of induction variables.  We do simple
4738    greedy heuristic -- we try to replace at most one candidate in the selected
4739    solution and remove the unused ivs while this improves the cost.  */
4740
4741 static struct iv_ca *
4742 find_optimal_iv_set (struct ivopts_data *data)
4743 {
4744   unsigned i;
4745   struct iv_ca *set;
4746   struct iv_use *use;
4747
4748   /* Get the initial solution.  */
4749   set = get_initial_solution (data);
4750   if (!set)
4751     {
4752       if (dump_file && (dump_flags & TDF_DETAILS))
4753         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4754       return NULL;
4755     }
4756
4757   if (dump_file && (dump_flags & TDF_DETAILS))
4758     {
4759       fprintf (dump_file, "Initial set of candidates:\n");
4760       iv_ca_dump (data, dump_file, set);
4761     }
4762
4763   while (try_improve_iv_set (data, set))
4764     {
4765       if (dump_file && (dump_flags & TDF_DETAILS))
4766         {
4767           fprintf (dump_file, "Improved to:\n");
4768           iv_ca_dump (data, dump_file, set);
4769         }
4770     }
4771
4772   if (dump_file && (dump_flags & TDF_DETAILS))
4773     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4774
4775   for (i = 0; i < n_iv_uses (data); i++)
4776     {
4777       use = iv_use (data, i);
4778       use->selected = iv_ca_cand_for_use (set, use)->cand;
4779     }
4780
4781   return set;
4782 }
4783
4784 /* Creates a new induction variable corresponding to CAND.  */
4785
4786 static void
4787 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4788 {
4789   block_stmt_iterator incr_pos;
4790   tree base;
4791   bool after = false;
4792
4793   if (!cand->iv)
4794     return;
4795
4796   switch (cand->pos)
4797     {
4798     case IP_NORMAL:
4799       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4800       break;
4801
4802     case IP_END:
4803       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4804       after = true;
4805       break;
4806
4807     case IP_ORIGINAL:
4808       /* Mark that the iv is preserved.  */
4809       name_info (data, cand->var_before)->preserve_biv = true;
4810       name_info (data, cand->var_after)->preserve_biv = true;
4811
4812       /* Rewrite the increment so that it uses var_before directly.  */
4813       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4814
4815       return;
4816     }
4817
4818   gimple_add_tmp_var (cand->var_before);
4819   add_referenced_var (cand->var_before);
4820
4821   base = unshare_expr (cand->iv->base);
4822
4823   create_iv (base, unshare_expr (cand->iv->step),
4824              cand->var_before, data->current_loop,
4825              &incr_pos, after, &cand->var_before, &cand->var_after);
4826 }
4827
4828 /* Creates new induction variables described in SET.  */
4829
4830 static void
4831 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4832 {
4833   unsigned i;
4834   struct iv_cand *cand;
4835   bitmap_iterator bi;
4836
4837   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4838     {
4839       cand = iv_cand (data, i);
4840       create_new_iv (data, cand);
4841     }
4842 }
4843
4844 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4845    is true, remove also the ssa name defined by the statement.  */
4846
4847 static void
4848 remove_statement (tree stmt, bool including_defined_name)
4849 {
4850   if (TREE_CODE (stmt) == PHI_NODE)
4851     {
4852       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4853     }
4854   else
4855     {
4856       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4857
4858       bsi_remove (&bsi, true);
4859     }
4860 }
4861
4862 /* Rewrites USE (definition of iv used in a nonlinear expression)
4863    using candidate CAND.  */
4864
4865 static void
4866 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4867                             struct iv_use *use, struct iv_cand *cand)
4868 {
4869   tree comp;
4870   tree op, stmts, tgt, ass;
4871   block_stmt_iterator bsi, pbsi;
4872
4873   /* An important special case -- if we are asked to express value of
4874      the original iv by itself, just exit; there is no need to
4875      introduce a new computation (that might also need casting the
4876      variable to unsigned and back).  */
4877   if (cand->pos == IP_ORIGINAL
4878       && cand->incremented_at == use->stmt)
4879     {
4880       tree step, ctype, utype;
4881       enum tree_code incr_code = PLUS_EXPR;
4882
4883       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4884       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4885
4886       step = cand->iv->step;
4887       ctype = TREE_TYPE (step);
4888       utype = TREE_TYPE (cand->var_after);
4889       if (TREE_CODE (step) == NEGATE_EXPR)
4890         {
4891           incr_code = MINUS_EXPR;
4892           step = TREE_OPERAND (step, 0);
4893         }
4894
4895       /* Check whether we may leave the computation unchanged.
4896          This is the case only if it does not rely on other
4897          computations in the loop -- otherwise, the computation
4898          we rely upon may be removed in remove_unused_ivs,
4899          thus leading to ICE.  */
4900       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4901       if (TREE_CODE (op) == PLUS_EXPR
4902           || TREE_CODE (op) == MINUS_EXPR)
4903         {
4904           if (TREE_OPERAND (op, 0) == cand->var_before)
4905             op = TREE_OPERAND (op, 1);
4906           else if (TREE_CODE (op) == PLUS_EXPR
4907                    && TREE_OPERAND (op, 1) == cand->var_before)
4908             op = TREE_OPERAND (op, 0);
4909           else
4910             op = NULL_TREE;
4911         }
4912       else
4913         op = NULL_TREE;
4914
4915       if (op
4916           && (TREE_CODE (op) == INTEGER_CST
4917               || operand_equal_p (op, step, 0)))
4918         return;
4919
4920       /* Otherwise, add the necessary computations to express
4921          the iv.  */
4922       op = fold_convert (ctype, cand->var_before);
4923       comp = fold_convert (utype,
4924                            build2 (incr_code, ctype, op,
4925                                    unshare_expr (step)));
4926     }
4927   else
4928     {
4929       comp = get_computation (data->current_loop, use, cand);
4930       gcc_assert (comp != NULL_TREE);
4931     }
4932
4933   switch (TREE_CODE (use->stmt))
4934     {
4935     case PHI_NODE:
4936       tgt = PHI_RESULT (use->stmt);
4937
4938       /* If we should keep the biv, do not replace it.  */
4939       if (name_info (data, tgt)->preserve_biv)
4940         return;
4941
4942       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4943       while (!bsi_end_p (pbsi)
4944              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4945         {
4946           bsi = pbsi;
4947           bsi_next (&pbsi);
4948         }
4949       break;
4950
4951     case GIMPLE_MODIFY_STMT:
4952       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4953       bsi = bsi_for_stmt (use->stmt);
4954       break;
4955
4956     default:
4957       gcc_unreachable ();
4958     }
4959
4960   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4961
4962   if (TREE_CODE (use->stmt) == PHI_NODE)
4963     {
4964       if (stmts)
4965         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4966       ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4967       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4968       remove_statement (use->stmt, false);
4969       SSA_NAME_DEF_STMT (tgt) = ass;
4970     }
4971   else
4972     {
4973       if (stmts)
4974         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4975       GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4976     }
4977 }
4978
4979 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4980    for_each_index.  */
4981
4982 static bool
4983 idx_remove_ssa_names (tree base, tree *idx,
4984                       void *data ATTRIBUTE_UNUSED)
4985 {
4986   tree *op;
4987
4988   if (TREE_CODE (*idx) == SSA_NAME)
4989     *idx = SSA_NAME_VAR (*idx);
4990
4991   if (TREE_CODE (base) == ARRAY_REF)
4992     {
4993       op = &TREE_OPERAND (base, 2);
4994       if (*op
4995           && TREE_CODE (*op) == SSA_NAME)
4996         *op = SSA_NAME_VAR (*op);
4997       op = &TREE_OPERAND (base, 3);
4998       if (*op
4999           && TREE_CODE (*op) == SSA_NAME)
5000         *op = SSA_NAME_VAR (*op);
5001     }
5002
5003   return true;
5004 }
5005
5006 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5007
5008 static tree
5009 unshare_and_remove_ssa_names (tree ref)
5010 {
5011   ref = unshare_expr (ref);
5012   for_each_index (&ref, idx_remove_ssa_names, NULL);
5013
5014   return ref;
5015 }
5016
5017 /* Extract the alias analysis info for the memory reference REF.  There are
5018    several ways how this information may be stored and what precisely is
5019    its semantics depending on the type of the reference, but there always is
5020    somewhere hidden one _DECL node that is used to determine the set of
5021    virtual operands for the reference.  The code below deciphers this jungle
5022    and extracts this single useful piece of information.  */
5023
5024 static tree
5025 get_ref_tag (tree ref, tree orig)
5026 {
5027   tree var = get_base_address (ref);
5028   tree aref = NULL_TREE, tag, sv;
5029   HOST_WIDE_INT offset, size, maxsize;
5030
5031   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5032     {
5033       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5034       if (ref)
5035         break;
5036     }
5037
5038   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5039     return unshare_expr (sv);
5040
5041   if (!var)
5042     return NULL_TREE;
5043
5044   if (TREE_CODE (var) == INDIRECT_REF)
5045     {
5046       /* If the base is a dereference of a pointer, first check its name memory
5047          tag.  If it does not have one, use its symbol memory tag.  */
5048       var = TREE_OPERAND (var, 0);
5049       if (TREE_CODE (var) != SSA_NAME)
5050         return NULL_TREE;
5051
5052       if (SSA_NAME_PTR_INFO (var))
5053         {
5054           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5055           if (tag)
5056             return tag;
5057         }
5058
5059       var = SSA_NAME_VAR (var);
5060       tag = symbol_mem_tag (var);
5061       gcc_assert (tag != NULL_TREE);
5062       return tag;
5063     }
5064   else
5065     {
5066       if (!DECL_P (var))
5067         return NULL_TREE;
5068
5069       tag = symbol_mem_tag (var);
5070       if (tag)
5071         return tag;
5072
5073       return var;
5074     }
5075 }
5076
5077 /* Copies the reference information from OLD_REF to NEW_REF.  */
5078
5079 static void
5080 copy_ref_info (tree new_ref, tree old_ref)
5081 {
5082   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5083     copy_mem_ref_info (new_ref, old_ref);
5084   else
5085     {
5086       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5087       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5088     }
5089 }
5090
5091 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5092
5093 static void
5094 rewrite_use_address (struct ivopts_data *data,
5095                      struct iv_use *use, struct iv_cand *cand)
5096 {
5097   aff_tree aff;
5098   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5099   tree ref;
5100   bool ok;
5101
5102   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5103   gcc_assert (ok);
5104   unshare_aff_combination (&aff);
5105
5106   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5107   copy_ref_info (ref, *use->op_p);
5108   *use->op_p = ref;
5109 }
5110
5111 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5112    candidate CAND.  */
5113
5114 static void
5115 rewrite_use_compare (struct ivopts_data *data,
5116                      struct iv_use *use, struct iv_cand *cand)
5117 {
5118   tree comp;
5119   tree *op_p, cond, op, stmts, bound;
5120   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5121   enum tree_code compare;
5122   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5123
5124   bound = cp->value;
5125   if (bound)
5126     {
5127       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5128       tree var_type = TREE_TYPE (var);
5129
5130       compare = iv_elimination_compare (data, use);
5131       bound = fold_convert (var_type, bound);
5132       op = force_gimple_operand (unshare_expr (bound), &stmts,
5133                                  true, NULL_TREE);
5134
5135       if (stmts)
5136         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5137
5138       *use->op_p = build2 (compare, boolean_type_node, var, op);
5139       update_stmt (use->stmt);
5140       return;
5141     }
5142
5143   /* The induction variable elimination failed; just express the original
5144      giv.  */
5145   comp = get_computation (data->current_loop, use, cand);
5146   gcc_assert (comp != NULL_TREE);
5147
5148   cond = *use->op_p;
5149   op_p = &TREE_OPERAND (cond, 0);
5150   if (TREE_CODE (*op_p) != SSA_NAME
5151       || zero_p (get_iv (data, *op_p)->step))
5152     op_p = &TREE_OPERAND (cond, 1);
5153
5154   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5155   if (stmts)
5156     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5157
5158   *op_p = op;
5159 }
5160
5161 /* Rewrites USE using candidate CAND.  */
5162
5163 static void
5164 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5165 {
5166   push_stmt_changes (&use->stmt);
5167
5168   switch (use->type)
5169     {
5170       case USE_NONLINEAR_EXPR:
5171         rewrite_use_nonlinear_expr (data, use, cand);
5172         break;
5173
5174       case USE_ADDRESS:
5175         rewrite_use_address (data, use, cand);
5176         break;
5177
5178       case USE_COMPARE:
5179         rewrite_use_compare (data, use, cand);
5180         break;
5181
5182       default:
5183         gcc_unreachable ();
5184     }
5185
5186   pop_stmt_changes (&use->stmt);
5187 }
5188
5189 /* Rewrite the uses using the selected induction variables.  */
5190
5191 static void
5192 rewrite_uses (struct ivopts_data *data)
5193 {
5194   unsigned i;
5195   struct iv_cand *cand;
5196   struct iv_use *use;
5197
5198   for (i = 0; i < n_iv_uses (data); i++)
5199     {
5200       use = iv_use (data, i);
5201       cand = use->selected;
5202       gcc_assert (cand);
5203
5204       rewrite_use (data, use, cand);
5205     }
5206 }
5207
5208 /* Removes the ivs that are not used after rewriting.  */
5209
5210 static void
5211 remove_unused_ivs (struct ivopts_data *data)
5212 {
5213   unsigned j;
5214   bitmap_iterator bi;
5215
5216   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5217     {
5218       struct version_info *info;
5219
5220       info = ver_info (data, j);
5221       if (info->iv
5222           && !zero_p (info->iv->step)
5223           && !info->inv_id
5224           && !info->iv->have_use_for
5225           && !info->preserve_biv)
5226         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5227     }
5228 }
5229
5230 /* Frees data allocated by the optimization of a single loop.  */
5231
5232 static void
5233 free_loop_data (struct ivopts_data *data)
5234 {
5235   unsigned i, j;
5236   bitmap_iterator bi;
5237   tree obj;
5238
5239   htab_empty (data->niters);
5240
5241   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5242     {
5243       struct version_info *info;
5244
5245       info = ver_info (data, i);
5246       if (info->iv)
5247         free (info->iv);
5248       info->iv = NULL;
5249       info->has_nonlin_use = false;
5250       info->preserve_biv = false;
5251       info->inv_id = 0;
5252     }
5253   bitmap_clear (data->relevant);
5254   bitmap_clear (data->important_candidates);
5255
5256   for (i = 0; i < n_iv_uses (data); i++)
5257     {
5258       struct iv_use *use = iv_use (data, i);
5259
5260       free (use->iv);
5261       BITMAP_FREE (use->related_cands);
5262       for (j = 0; j < use->n_map_members; j++)
5263         if (use->cost_map[j].depends_on)
5264           BITMAP_FREE (use->cost_map[j].depends_on);
5265       free (use->cost_map);
5266       free (use);
5267     }
5268   VEC_truncate (iv_use_p, data->iv_uses, 0);
5269
5270   for (i = 0; i < n_iv_cands (data); i++)
5271     {
5272       struct iv_cand *cand = iv_cand (data, i);
5273
5274       if (cand->iv)
5275         free (cand->iv);
5276       if (cand->depends_on)
5277         BITMAP_FREE (cand->depends_on);
5278       free (cand);
5279     }
5280   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5281
5282   if (data->version_info_size < num_ssa_names)
5283     {
5284       data->version_info_size = 2 * num_ssa_names;
5285       free (data->version_info);
5286       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5287     }
5288
5289   data->max_inv_id = 0;
5290
5291   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5292     SET_DECL_RTL (obj, NULL_RTX);
5293
5294   VEC_truncate (tree, decl_rtl_to_reset, 0);
5295 }
5296
5297 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5298    loop tree.  */
5299
5300 static void
5301 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5302 {
5303   free_loop_data (data);
5304   free (data->version_info);
5305   BITMAP_FREE (data->relevant);
5306   BITMAP_FREE (data->important_candidates);
5307   htab_delete (data->niters);
5308
5309   VEC_free (tree, heap, decl_rtl_to_reset);
5310   VEC_free (iv_use_p, heap, data->iv_uses);
5311   VEC_free (iv_cand_p, heap, data->iv_candidates);
5312 }
5313
5314 /* Optimizes the LOOP.  Returns true if anything changed.  */
5315
5316 static bool
5317 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5318 {
5319   bool changed = false;
5320   struct iv_ca *iv_ca;
5321   edge exit;
5322
5323   data->current_loop = loop;
5324
5325   if (dump_file && (dump_flags & TDF_DETAILS))
5326     {
5327       fprintf (dump_file, "Processing loop %d\n", loop->num);
5328
5329       exit = single_dom_exit (loop);
5330       if (exit)
5331         {
5332           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5333                    exit->src->index, exit->dest->index);
5334           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5335           fprintf (dump_file, "\n");
5336         }
5337
5338       fprintf (dump_file, "\n");
5339     }
5340
5341   /* For each ssa name determines whether it behaves as an induction variable
5342      in some loop.  */
5343   if (!find_induction_variables (data))
5344     goto finish;
5345
5346   /* Finds interesting uses (item 1).  */
5347   find_interesting_uses (data);
5348   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5349     goto finish;
5350
5351   /* Finds candidates for the induction variables (item 2).  */
5352   find_iv_candidates (data);
5353
5354   /* Calculates the costs (item 3, part 1).  */
5355   determine_use_iv_costs (data);
5356   determine_iv_costs (data);
5357   determine_set_costs (data);
5358
5359   /* Find the optimal set of induction variables (item 3, part 2).  */
5360   iv_ca = find_optimal_iv_set (data);
5361   if (!iv_ca)
5362     goto finish;
5363   changed = true;
5364
5365   /* Create the new induction variables (item 4, part 1).  */
5366   create_new_ivs (data, iv_ca);
5367   iv_ca_free (&iv_ca);
5368
5369   /* Rewrite the uses (item 4, part 2).  */
5370   rewrite_uses (data);
5371
5372   /* Remove the ivs that are unused after rewriting.  */
5373   remove_unused_ivs (data);
5374
5375   /* We have changed the structure of induction variables; it might happen
5376      that definitions in the scev database refer to some of them that were
5377      eliminated.  */
5378   scev_reset ();
5379
5380 finish:
5381   free_loop_data (data);
5382
5383   return changed;
5384 }
5385
5386 /* Main entry point.  Optimizes induction variables in loops.  */
5387
5388 void
5389 tree_ssa_iv_optimize (void)
5390 {
5391   struct loop *loop;
5392   struct ivopts_data data;
5393   loop_iterator li;
5394
5395   tree_ssa_iv_optimize_init (&data);
5396
5397   /* Optimize the loops starting with the innermost ones.  */
5398   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5399     {
5400       if (dump_file && (dump_flags & TDF_DETAILS))
5401         flow_loop_dump (loop, dump_file, NULL, 1);
5402
5403       tree_ssa_iv_optimize_loop (&data, loop);
5404     }
5405
5406   tree_ssa_iv_optimize_finalize (&data);
5407 }