gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93
  94 /* The infinite cost.  */
  95 #define INFTY 10000000
  96
  97 /* The expected number of loop iterations.  TODO -- use profiling instead of
  98    this.  */
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101
 102 /* Representation of the induction variable.  */
 103 struct iv
 104 {
 105   tree base;            /* Initial value of the iv.  */
 106   tree base_object;     /* A memory object to that the induction variable points.  */
 107   tree step;            /* Step of the iv (constant only).  */
 108   tree ssa_name;        /* The ssa name with the value.  */
 109   bool biv_p;           /* Is it a biv?  */
 110   bool have_use_for;    /* Do we already have a use for it?  */
 111   unsigned use_id;      /* The identifier in the use if it is the case.  */
 112 };
 113
 114 /* Per-ssa version information (induction variable descriptions, etc.).  */
 115 struct version_info
 116 {
 117   tree name;            /* The ssa name.  */
 118   struct iv *iv;        /* Induction variable description.  */
 119   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 120                            an expression that is not an induction variable.  */
 121   unsigned inv_id;      /* Id of an invariant.  */
 122   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 123 };
 124
 125 /* Types of uses.  */
 126 enum use_type
 127 {
 128   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 129   USE_ADDRESS,          /* Use in an address.  */
 130   USE_COMPARE           /* Use is a compare.  */
 131 };
 132
 133 /* The candidate - cost pair.  */
 134 struct cost_pair
 135 {
 136   struct iv_cand *cand; /* The candidate.  */
 137   unsigned cost;        /* The cost.  */
 138   bitmap depends_on;    /* The list of invariants that have to be
 139                            preserved.  */
 140   tree value;           /* For final value elimination, the expression for
 141                            the final value of the iv.  For iv elimination,
 142                            the new bound to compare with.  */
 143 };
 144
 145 /* Use.  */
 146 struct iv_use
 147 {
 148   unsigned id;          /* The id of the use.  */
 149   enum use_type type;   /* Type of the use.  */
 150   struct iv *iv;        /* The induction variable it is based on.  */
 151   tree stmt;            /* Statement in that it occurs.  */
 152   tree *op_p;           /* The place where it occurs.  */
 153   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 154                            important ones.  */
 155
 156   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 157   struct cost_pair *cost_map;
 158                         /* The costs wrto the iv candidates.  */
 159
 160   struct iv_cand *selected;
 161                         /* The selected candidate.  */
 162 };
 163
 164 /* The position where the iv is computed.  */
 165 enum iv_position
 166 {
 167   IP_NORMAL,            /* At the end, just before the exit condition.  */
 168   IP_END,               /* At the end of the latch block.  */
 169   IP_ORIGINAL           /* The original biv.  */
 170 };
 171
 172 /* The induction variable candidate.  */
 173 struct iv_cand
 174 {
 175   unsigned id;          /* The number of the candidate.  */
 176   bool important;       /* Whether this is an "important" candidate, i.e. such
 177                            that it should be considered by all uses.  */
 178   enum iv_position pos; /* Where it is computed.  */
 179   tree incremented_at;  /* For original biv, the statement where it is
 180                            incremented.  */
 181   tree var_before;      /* The variable used for it before increment.  */
 182   tree var_after;       /* The variable used for it after increment.  */
 183   struct iv *iv;        /* The value of the candidate.  NULL for
 184                            "pseudocandidate" used to indicate the possibility
 185                            to replace the final value of an iv by direct
 186                            computation of the value.  */
 187   unsigned cost;        /* Cost of the candidate.  */
 188   bitmap depends_on;    /* The list of invariants that are used in step of the
 189                            biv.  */
 190 };
 191
 192 /* The data used by the induction variable optimizations.  */
 193
 194 typedef struct iv_use *iv_use_p;
 195 DEF_VEC_P(iv_use_p);
 196 DEF_VEC_ALLOC_P(iv_use_p,heap);
 197
 198 typedef struct iv_cand *iv_cand_p;
 199 DEF_VEC_P(iv_cand_p);
 200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 201
 202 struct ivopts_data
 203 {
 204   /* The currently optimized loop.  */
 205   struct loop *current_loop;
 206
 207   /* Number of registers used in it.  */
 208   unsigned regs_used;
 209
 210   /* Numbers of iterations for all exits of the current loop.  */
 211   htab_t niters;
 212
 213   /* The size of version_info array allocated.  */
 214   unsigned version_info_size;
 215
 216   /* The array of information for the ssa names.  */
 217   struct version_info *version_info;
 218
 219   /* The bitmap of indices in version_info whose value was changed.  */
 220   bitmap relevant;
 221
 222   /* The maximum invariant id.  */
 223   unsigned max_inv_id;
 224
 225   /* The uses of induction variables.  */
 226   VEC(iv_use_p,heap) *iv_uses;
 227
 228   /* The candidates.  */
 229   VEC(iv_cand_p,heap) *iv_candidates;
 230
 231   /* A bitmap of important candidates.  */
 232   bitmap important_candidates;
 233
 234   /* Whether to consider just related and important candidates when replacing a
 235      use.  */
 236   bool consider_all_candidates;
 237 };
 238
 239 /* An assignment of iv candidates to uses.  */
 240
 241 struct iv_ca
 242 {
 243   /* The number of uses covered by the assignment.  */
 244   unsigned upto;
 245
 246   /* Number of uses that cannot be expressed by the candidates in the set.  */
 247   unsigned bad_uses;
 248
 249   /* Candidate assigned to a use, together with the related costs.  */
 250   struct cost_pair **cand_for_use;
 251
 252   /* Number of times each candidate is used.  */
 253   unsigned *n_cand_uses;
 254
 255   /* The candidates used.  */
 256   bitmap cands;
 257
 258   /* The number of candidates in the set.  */
 259   unsigned n_cands;
 260
 261   /* Total number of registers needed.  */
 262   unsigned n_regs;
 263
 264   /* Total cost of expressing uses.  */
 265   unsigned cand_use_cost;
 266
 267   /* Total cost of candidates.  */
 268   unsigned cand_cost;
 269
 270   /* Number of times each invariant is used.  */
 271   unsigned *n_invariant_uses;
 272
 273   /* Total cost of the assignment.  */
 274   unsigned cost;
 275 };
 276
 277 /* Difference of two iv candidate assignments.  */
 278
 279 struct iv_ca_delta
 280 {
 281   /* Changed use.  */
 282   struct iv_use *use;
 283
 284   /* An old assignment (for rollback purposes).  */
 285   struct cost_pair *old_cp;
 286
 287   /* A new assignment.  */
 288   struct cost_pair *new_cp;
 289
 290   /* Next change in the list.  */
 291   struct iv_ca_delta *next_change;
 292 };
 293
 294 /* Bound on number of candidates below that all candidates are considered.  */
 295
 296 #define CONSIDER_ALL_CANDIDATES_BOUND \
 297   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 298
 299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 300    optimizing such a loop would help, and it would take ages).  */
 301
 302 #define MAX_CONSIDERED_USES \
 303   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 304
 305 /* If there are at most this number of ivs in the set, try removing unnecessary
 306    ivs from the set always.  */
 307
 308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 309   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 310
 311 /* The list of trees for that the decl_rtl field must be reset is stored
 312    here.  */
 313
 314 static VEC(tree,heap) *decl_rtl_to_reset;
 315
 316 /* Number of uses recorded in DATA.  */
 317
 318 static inline unsigned
 319 n_iv_uses (struct ivopts_data *data)
 320 {
 321   return VEC_length (iv_use_p, data->iv_uses);
 322 }
 323
 324 /* Ith use recorded in DATA.  */
 325
 326 static inline struct iv_use *
 327 iv_use (struct ivopts_data *data, unsigned i)
 328 {
 329   return VEC_index (iv_use_p, data->iv_uses, i);
 330 }
 331
 332 /* Number of candidates recorded in DATA.  */
 333
 334 static inline unsigned
 335 n_iv_cands (struct ivopts_data *data)
 336 {
 337   return VEC_length (iv_cand_p, data->iv_candidates);
 338 }
 339
 340 /* Ith candidate recorded in DATA.  */
 341
 342 static inline struct iv_cand *
 343 iv_cand (struct ivopts_data *data, unsigned i)
 344 {
 345   return VEC_index (iv_cand_p, data->iv_candidates, i);
 346 }
 347
 348 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 349
 350 edge
 351 single_dom_exit (struct loop *loop)
 352 {
 353   edge exit = single_exit (loop);
 354
 355   if (!exit)
 356     return NULL;
 357
 358   if (!just_once_each_iteration_p (loop, exit->src))
 359     return NULL;
 360
 361   return exit;
 362 }
 363
 364 /* Dumps information about the induction variable IV to FILE.  */
 365
 366 extern void dump_iv (FILE *, struct iv *);
 367 void
 368 dump_iv (FILE *file, struct iv *iv)
 369 {
 370   if (iv->ssa_name)
 371     {
 372       fprintf (file, "ssa name ");
 373       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 374       fprintf (file, "\n");
 375     }
 376
 377   fprintf (file, "  type ");
 378   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 379   fprintf (file, "\n");
 380
 381   if (iv->step)
 382     {
 383       fprintf (file, "  base ");
 384       print_generic_expr (file, iv->base, TDF_SLIM);
 385       fprintf (file, "\n");
 386
 387       fprintf (file, "  step ");
 388       print_generic_expr (file, iv->step, TDF_SLIM);
 389       fprintf (file, "\n");
 390     }
 391   else
 392     {
 393       fprintf (file, "  invariant ");
 394       print_generic_expr (file, iv->base, TDF_SLIM);
 395       fprintf (file, "\n");
 396     }
 397
 398   if (iv->base_object)
 399     {
 400       fprintf (file, "  base object ");
 401       print_generic_expr (file, iv->base_object, TDF_SLIM);
 402       fprintf (file, "\n");
 403     }
 404
 405   if (iv->biv_p)
 406     fprintf (file, "  is a biv\n");
 407 }
 408
 409 /* Dumps information about the USE to FILE.  */
 410
 411 extern void dump_use (FILE *, struct iv_use *);
 412 void
 413 dump_use (FILE *file, struct iv_use *use)
 414 {
 415   fprintf (file, "use %d\n", use->id);
 416
 417   switch (use->type)
 418     {
 419     case USE_NONLINEAR_EXPR:
 420       fprintf (file, "  generic\n");
 421       break;
 422
 423     case USE_ADDRESS:
 424       fprintf (file, "  address\n");
 425       break;
 426
 427     case USE_COMPARE:
 428       fprintf (file, "  compare\n");
 429       break;
 430
 431     default:
 432       gcc_unreachable ();
 433     }
 434
 435   fprintf (file, "  in statement ");
 436   print_generic_expr (file, use->stmt, TDF_SLIM);
 437   fprintf (file, "\n");
 438
 439   fprintf (file, "  at position ");
 440   if (use->op_p)
 441     print_generic_expr (file, *use->op_p, TDF_SLIM);
 442   fprintf (file, "\n");
 443
 444   dump_iv (file, use->iv);
 445
 446   if (use->related_cands)
 447     {
 448       fprintf (file, "  related candidates ");
 449       dump_bitmap (file, use->related_cands);
 450     }
 451 }
 452
 453 /* Dumps information about the uses to FILE.  */
 454
 455 extern void dump_uses (FILE *, struct ivopts_data *);
 456 void
 457 dump_uses (FILE *file, struct ivopts_data *data)
 458 {
 459   unsigned i;
 460   struct iv_use *use;
 461
 462   for (i = 0; i < n_iv_uses (data); i++)
 463     {
 464       use = iv_use (data, i);
 465
 466       dump_use (file, use);
 467       fprintf (file, "\n");
 468     }
 469 }
 470
 471 /* Dumps information about induction variable candidate CAND to FILE.  */
 472
 473 extern void dump_cand (FILE *, struct iv_cand *);
 474 void
 475 dump_cand (FILE *file, struct iv_cand *cand)
 476 {
 477   struct iv *iv = cand->iv;
 478
 479   fprintf (file, "candidate %d%s\n",
 480            cand->id, cand->important ? " (important)" : "");
 481
 482   if (cand->depends_on)
 483     {
 484       fprintf (file, "  depends on ");
 485       dump_bitmap (file, cand->depends_on);
 486     }
 487
 488   if (!iv)
 489     {
 490       fprintf (file, "  final value replacement\n");
 491       return;
 492     }
 493
 494   switch (cand->pos)
 495     {
 496     case IP_NORMAL:
 497       fprintf (file, "  incremented before exit test\n");
 498       break;
 499
 500     case IP_END:
 501       fprintf (file, "  incremented at end\n");
 502       break;
 503
 504     case IP_ORIGINAL:
 505       fprintf (file, "  original biv\n");
 506       break;
 507     }
 508
 509   dump_iv (file, iv);
 510 }
 511
 512 /* Returns the info for ssa version VER.  */
 513
 514 static inline struct version_info *
 515 ver_info (struct ivopts_data *data, unsigned ver)
 516 {
 517   return data->version_info + ver;
 518 }
 519
 520 /* Returns the info for ssa name NAME.  */
 521
 522 static inline struct version_info *
 523 name_info (struct ivopts_data *data, tree name)
 524 {
 525   return ver_info (data, SSA_NAME_VERSION (name));
 526 }
 527
 528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 529    emitted in LOOP.  */
 530
 531 static bool
 532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 533 {
 534   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 535
 536   gcc_assert (bb);
 537
 538   if (sbb == loop->latch)
 539     return true;
 540
 541   if (sbb != bb)
 542     return false;
 543
 544   return stmt == last_stmt (bb);
 545 }
 546
 547 /* Returns true if STMT if after the place where the original induction
 548    variable CAND is incremented.  */
 549
 550 static bool
 551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 552 {
 553   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 554   basic_block stmt_bb = bb_for_stmt (stmt);
 555   block_stmt_iterator bsi;
 556
 557   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 558     return false;
 559
 560   if (stmt_bb != cand_bb)
 561     return true;
 562
 563   /* Scan the block from the end, since the original ivs are usually
 564      incremented at the end of the loop body.  */
 565   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 566     {
 567       if (bsi_stmt (bsi) == cand->incremented_at)
 568         return false;
 569       if (bsi_stmt (bsi) == stmt)
 570         return true;
 571     }
 572 }
 573
 574 /* Returns true if STMT if after the place where the induction variable
 575    CAND is incremented in LOOP.  */
 576
 577 static bool
 578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 579 {
 580   switch (cand->pos)
 581     {
 582     case IP_END:
 583       return false;
 584
 585     case IP_NORMAL:
 586       return stmt_after_ip_normal_pos (loop, stmt);
 587
 588     case IP_ORIGINAL:
 589       return stmt_after_ip_original_pos (cand, stmt);
 590
 591     default:
 592       gcc_unreachable ();
 593     }
 594 }
 595
 596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 597
 598 static bool
 599 abnormal_ssa_name_p (tree exp)
 600 {
 601   if (!exp)
 602     return false;
 603
 604   if (TREE_CODE (exp) != SSA_NAME)
 605     return false;
 606
 607   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 608 }
 609
 610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 611    abnormal phi node.  Callback for for_each_index.  */
 612
 613 static bool
 614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 615                                   void *data ATTRIBUTE_UNUSED)
 616 {
 617   if (TREE_CODE (base) == ARRAY_REF)
 618     {
 619       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 620         return false;
 621       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 622         return false;
 623     }
 624
 625   return !abnormal_ssa_name_p (*index);
 626 }
 627
 628 /* Returns true if EXPR contains a ssa name that occurs in an
 629    abnormal phi node.  */
 630
 631 bool
 632 contains_abnormal_ssa_name_p (tree expr)
 633 {
 634   enum tree_code code;
 635   enum tree_code_class class;
 636
 637   if (!expr)
 638     return false;
 639
 640   code = TREE_CODE (expr);
 641   class = TREE_CODE_CLASS (code);
 642
 643   if (code == SSA_NAME)
 644     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 645
 646   if (code == INTEGER_CST
 647       || is_gimple_min_invariant (expr))
 648     return false;
 649
 650   if (code == ADDR_EXPR)
 651     return !for_each_index (&TREE_OPERAND (expr, 0),
 652                             idx_contains_abnormal_ssa_name_p,
 653                             NULL);
 654
 655   switch (class)
 656     {
 657     case tcc_binary:
 658     case tcc_comparison:
 659       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 660         return true;
 661
 662       /* Fallthru.  */
 663     case tcc_unary:
 664       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 665         return true;
 666
 667       break;
 668
 669     default:
 670       gcc_unreachable ();
 671     }
 672
 673   return false;
 674 }
 675
 676 /* Element of the table in that we cache the numbers of iterations obtained
 677    from exits of the loop.  */
 678
 679 struct nfe_cache_elt
 680 {
 681   /* The edge for that the number of iterations is cached.  */
 682   edge exit;
 683
 684   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 685      determined.  */
 686   tree niter;
 687 };
 688
 689 /* Hash function for nfe_cache_elt E.  */
 690
 691 static hashval_t
 692 nfe_hash (const void *e)
 693 {
 694   const struct nfe_cache_elt *elt = e;
 695
 696   return htab_hash_pointer (elt->exit);
 697 }
 698
 699 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 700
 701 static int
 702 nfe_eq (const void *e1, const void *e2)
 703 {
 704   const struct nfe_cache_elt *elt1 = e1;
 705
 706   return elt1->exit == e2;
 707 }
 708
 709 /*  Returns tree describing number of iterations determined from
 710     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 711
 712 static tree
 713 niter_for_exit (struct ivopts_data *data, edge exit)
 714 {
 715   struct nfe_cache_elt *nfe_desc;
 716   struct tree_niter_desc desc;
 717   PTR *slot;
 718
 719   slot = htab_find_slot_with_hash (data->niters, exit,
 720                                    htab_hash_pointer (exit),
 721                                    INSERT);
 722
 723   if (!*slot)
 724     {
 725       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 726       nfe_desc->exit = exit;
 727
 728       /* Try to determine number of iterations.  We must know it
 729          unconditionally (i.e., without possibility of # of iterations
 730          being zero).  Also, we cannot safely work with ssa names that
 731          appear in phi nodes on abnormal edges, so that we do not create
 732          overlapping life ranges for them (PR 27283).  */
 733       if (number_of_iterations_exit (data->current_loop,
 734                                      exit, &desc, true)
 735           && integer_zerop (desc.may_be_zero)
 736           && !contains_abnormal_ssa_name_p (desc.niter))
 737         nfe_desc->niter = desc.niter;
 738       else
 739         nfe_desc->niter = NULL_TREE;
 740     }
 741   else
 742     nfe_desc = *slot;
 743
 744   return nfe_desc->niter;
 745 }
 746
 747 /* Returns tree describing number of iterations determined from
 748    single dominating exit of DATA->current_loop, or NULL if something
 749    goes wrong.  */
 750
 751 static tree
 752 niter_for_single_dom_exit (struct ivopts_data *data)
 753 {
 754   edge exit = single_dom_exit (data->current_loop);
 755
 756   if (!exit)
 757     return NULL;
 758
 759   return niter_for_exit (data, exit);
 760 }
 761
 762 /* Initializes data structures used by the iv optimization pass, stored
 763    in DATA.  */
 764
 765 static void
 766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 767 {
 768   data->version_info_size = 2 * num_ssa_names;
 769   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 770   data->relevant = BITMAP_ALLOC (NULL);
 771   data->important_candidates = BITMAP_ALLOC (NULL);
 772   data->max_inv_id = 0;
 773   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 774   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 775   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 776   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 777 }
 778
 779 /* Returns a memory object to that EXPR points.  In case we are able to
 780    determine that it does not point to any such object, NULL is returned.  */
 781
 782 static tree
 783 determine_base_object (tree expr)
 784 {
 785   enum tree_code code = TREE_CODE (expr);
 786   tree base, obj, op0, op1;
 787
 788   /* If this is a pointer casted to any type, we need to determine
 789      the base object for the pointer; so handle conversions before
 790      throwing away non-pointer expressions.  */
 791   if (TREE_CODE (expr) == NOP_EXPR
 792       || TREE_CODE (expr) == CONVERT_EXPR)
 793     return determine_base_object (TREE_OPERAND (expr, 0));
 794
 795   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 796     return NULL_TREE;
 797
 798   switch (code)
 799     {
 800     case INTEGER_CST:
 801       return NULL_TREE;
 802
 803     case ADDR_EXPR:
 804       obj = TREE_OPERAND (expr, 0);
 805       base = get_base_address (obj);
 806
 807       if (!base)
 808         return expr;
 809
 810       if (TREE_CODE (base) == INDIRECT_REF)
 811         return determine_base_object (TREE_OPERAND (base, 0));
 812
 813       return fold_convert (ptr_type_node,
 814                            build_fold_addr_expr (base));
 815
 816     case PLUS_EXPR:
 817     case MINUS_EXPR:
 818       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 819       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 820
 821       if (!op1)
 822         return op0;
 823
 824       if (!op0)
 825         return (code == PLUS_EXPR
 826                 ? op1
 827                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 828
 829       return fold_build2 (code, ptr_type_node, op0, op1);
 830
 831     default:
 832       return fold_convert (ptr_type_node, expr);
 833     }
 834 }
 835
 836 /* Allocates an induction variable with given initial value BASE and step STEP
 837    for loop LOOP.  */
 838
 839 static struct iv *
 840 alloc_iv (tree base, tree step)
 841 {
 842   struct iv *iv = XCNEW (struct iv);
 843   gcc_assert (step != NULL_TREE);
 844
 845   iv->base = base;
 846   iv->base_object = determine_base_object (base);
 847   iv->step = step;
 848   iv->biv_p = false;
 849   iv->have_use_for = false;
 850   iv->use_id = 0;
 851   iv->ssa_name = NULL_TREE;
 852
 853   return iv;
 854 }
 855
 856 /* Sets STEP and BASE for induction variable IV.  */
 857
 858 static void
 859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 860 {
 861   struct version_info *info = name_info (data, iv);
 862
 863   gcc_assert (!info->iv);
 864
 865   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 866   info->iv = alloc_iv (base, step);
 867   info->iv->ssa_name = iv;
 868 }
 869
 870 /* Finds induction variable declaration for VAR.  */
 871
 872 static struct iv *
 873 get_iv (struct ivopts_data *data, tree var)
 874 {
 875   basic_block bb;
 876   tree type = TREE_TYPE (var);
 877
 878   if (!POINTER_TYPE_P (type)
 879       && !INTEGRAL_TYPE_P (type))
 880     return NULL;
 881
 882   if (!name_info (data, var)->iv)
 883     {
 884       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 885
 886       if (!bb
 887           || !flow_bb_inside_loop_p (data->current_loop, bb))
 888         set_iv (data, var, var, build_int_cst (type, 0));
 889     }
 890
 891   return name_info (data, var)->iv;
 892 }
 893
 894 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 895    not define a simple affine biv with nonzero step.  */
 896
 897 static tree
 898 determine_biv_step (tree phi)
 899 {
 900   struct loop *loop = bb_for_stmt (phi)->loop_father;
 901   tree name = PHI_RESULT (phi);
 902   affine_iv iv;
 903
 904   if (!is_gimple_reg (name))
 905     return NULL_TREE;
 906
 907   if (!simple_iv (loop, phi, name, &iv, true))
 908     return NULL_TREE;
 909
 910   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 911 }
 912
 913 /* Finds basic ivs.  */
 914
 915 static bool
 916 find_bivs (struct ivopts_data *data)
 917 {
 918   tree phi, step, type, base;
 919   bool found = false;
 920   struct loop *loop = data->current_loop;
 921
 922   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 923     {
 924       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 925         continue;
 926
 927       step = determine_biv_step (phi);
 928       if (!step)
 929         continue;
 930
 931       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 932       base = expand_simple_operations (base);
 933       if (contains_abnormal_ssa_name_p (base)
 934           || contains_abnormal_ssa_name_p (step))
 935         continue;
 936
 937       type = TREE_TYPE (PHI_RESULT (phi));
 938       base = fold_convert (type, base);
 939       if (step)
 940         step = fold_convert (type, step);
 941
 942       set_iv (data, PHI_RESULT (phi), base, step);
 943       found = true;
 944     }
 945
 946   return found;
 947 }
 948
 949 /* Marks basic ivs.  */
 950
 951 static void
 952 mark_bivs (struct ivopts_data *data)
 953 {
 954   tree phi, var;
 955   struct iv *iv, *incr_iv;
 956   struct loop *loop = data->current_loop;
 957   basic_block incr_bb;
 958
 959   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 960     {
 961       iv = get_iv (data, PHI_RESULT (phi));
 962       if (!iv)
 963         continue;
 964
 965       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 966       incr_iv = get_iv (data, var);
 967       if (!incr_iv)
 968         continue;
 969
 970       /* If the increment is in the subloop, ignore it.  */
 971       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 972       if (incr_bb->loop_father != data->current_loop
 973           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 974         continue;
 975
 976       iv->biv_p = true;
 977       incr_iv->biv_p = true;
 978     }
 979 }
 980
 981 /* Checks whether STMT defines a linear induction variable and stores its
 982    parameters to IV.  */
 983
 984 static bool
 985 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 986 {
 987   tree lhs;
 988   struct loop *loop = data->current_loop;
 989
 990   iv->base = NULL_TREE;
 991   iv->step = NULL_TREE;
 992
 993   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 994     return false;
 995
 996   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 997   if (TREE_CODE (lhs) != SSA_NAME)
 998     return false;
 999
1000   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1001     return false;
1002   iv->base = expand_simple_operations (iv->base);
1003
1004   if (contains_abnormal_ssa_name_p (iv->base)
1005       || contains_abnormal_ssa_name_p (iv->step))
1006     return false;
1007
1008   return true;
1009 }
1010
1011 /* Finds general ivs in statement STMT.  */
1012
1013 static void
1014 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1015 {
1016   affine_iv iv;
1017
1018   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1019     return;
1020
1021   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1022 }
1023
1024 /* Finds general ivs in basic block BB.  */
1025
1026 static void
1027 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1028 {
1029   block_stmt_iterator bsi;
1030
1031   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1032     find_givs_in_stmt (data, bsi_stmt (bsi));
1033 }
1034
1035 /* Finds general ivs.  */
1036
1037 static void
1038 find_givs (struct ivopts_data *data)
1039 {
1040   struct loop *loop = data->current_loop;
1041   basic_block *body = get_loop_body_in_dom_order (loop);
1042   unsigned i;
1043
1044   for (i = 0; i < loop->num_nodes; i++)
1045     find_givs_in_bb (data, body[i]);
1046   free (body);
1047 }
1048
1049 /* For each ssa name defined in LOOP determines whether it is an induction
1050    variable and if so, its initial value and step.  */
1051
1052 static bool
1053 find_induction_variables (struct ivopts_data *data)
1054 {
1055   unsigned i;
1056   bitmap_iterator bi;
1057
1058   if (!find_bivs (data))
1059     return false;
1060
1061   find_givs (data);
1062   mark_bivs (data);
1063
1064   if (dump_file && (dump_flags & TDF_DETAILS))
1065     {
1066       tree niter = niter_for_single_dom_exit (data);
1067
1068       if (niter)
1069         {
1070           fprintf (dump_file, "  number of iterations ");
1071           print_generic_expr (dump_file, niter, TDF_SLIM);
1072           fprintf (dump_file, "\n\n");
1073         };
1074
1075       fprintf (dump_file, "Induction variables:\n\n");
1076
1077       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1078         {
1079           if (ver_info (data, i)->iv)
1080             dump_iv (dump_file, ver_info (data, i)->iv);
1081         }
1082     }
1083
1084   return true;
1085 }
1086
1087 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1088
1089 static struct iv_use *
1090 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1091             tree stmt, enum use_type use_type)
1092 {
1093   struct iv_use *use = XCNEW (struct iv_use);
1094
1095   use->id = n_iv_uses (data);
1096   use->type = use_type;
1097   use->iv = iv;
1098   use->stmt = stmt;
1099   use->op_p = use_p;
1100   use->related_cands = BITMAP_ALLOC (NULL);
1101
1102   /* To avoid showing ssa name in the dumps, if it was not reset by the
1103      caller.  */
1104   iv->ssa_name = NULL_TREE;
1105
1106   if (dump_file && (dump_flags & TDF_DETAILS))
1107     dump_use (dump_file, use);
1108
1109   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1110
1111   return use;
1112 }
1113
1114 /* Checks whether OP is a loop-level invariant and if so, records it.
1115    NONLINEAR_USE is true if the invariant is used in a way we do not
1116    handle specially.  */
1117
1118 static void
1119 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1120 {
1121   basic_block bb;
1122   struct version_info *info;
1123
1124   if (TREE_CODE (op) != SSA_NAME
1125       || !is_gimple_reg (op))
1126     return;
1127
1128   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1129   if (bb
1130       && flow_bb_inside_loop_p (data->current_loop, bb))
1131     return;
1132
1133   info = name_info (data, op);
1134   info->name = op;
1135   info->has_nonlin_use |= nonlinear_use;
1136   if (!info->inv_id)
1137     info->inv_id = ++data->max_inv_id;
1138   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1139 }
1140
1141 /* Checks whether the use OP is interesting and if so, records it.  */
1142
1143 static struct iv_use *
1144 find_interesting_uses_op (struct ivopts_data *data, tree op)
1145 {
1146   struct iv *iv;
1147   struct iv *civ;
1148   tree stmt;
1149   struct iv_use *use;
1150
1151   if (TREE_CODE (op) != SSA_NAME)
1152     return NULL;
1153
1154   iv = get_iv (data, op);
1155   if (!iv)
1156     return NULL;
1157
1158   if (iv->have_use_for)
1159     {
1160       use = iv_use (data, iv->use_id);
1161
1162       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1163       return use;
1164     }
1165
1166   if (integer_zerop (iv->step))
1167     {
1168       record_invariant (data, op, true);
1169       return NULL;
1170     }
1171   iv->have_use_for = true;
1172
1173   civ = XNEW (struct iv);
1174   *civ = *iv;
1175
1176   stmt = SSA_NAME_DEF_STMT (op);
1177   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1178               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1179
1180   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1181   iv->use_id = use->id;
1182
1183   return use;
1184 }
1185
1186 /* Checks whether the condition *COND_P in STMT is interesting
1187    and if so, records it.  */
1188
1189 static void
1190 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1191 {
1192   tree *op0_p;
1193   tree *op1_p;
1194   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1195   struct iv const_iv;
1196   tree zero = integer_zero_node;
1197
1198   const_iv.step = integer_zero_node;
1199
1200   if (TREE_CODE (*cond_p) != SSA_NAME
1201       && !COMPARISON_CLASS_P (*cond_p))
1202     return;
1203
1204   if (TREE_CODE (*cond_p) == SSA_NAME)
1205     {
1206       op0_p = cond_p;
1207       op1_p = &zero;
1208     }
1209   else
1210     {
1211       op0_p = &TREE_OPERAND (*cond_p, 0);
1212       op1_p = &TREE_OPERAND (*cond_p, 1);
1213     }
1214
1215   if (TREE_CODE (*op0_p) == SSA_NAME)
1216     iv0 = get_iv (data, *op0_p);
1217   else
1218     iv0 = &const_iv;
1219
1220   if (TREE_CODE (*op1_p) == SSA_NAME)
1221     iv1 = get_iv (data, *op1_p);
1222   else
1223     iv1 = &const_iv;
1224
1225   if (/* When comparing with non-invariant value, we may not do any senseful
1226          induction variable elimination.  */
1227       (!iv0 || !iv1)
1228       /* Eliminating condition based on two ivs would be nontrivial.
1229          ??? TODO -- it is not really important to handle this case.  */
1230       || (!integer_zerop (iv0->step)
1231           && !integer_zerop (iv1->step)))
1232     {
1233       find_interesting_uses_op (data, *op0_p);
1234       find_interesting_uses_op (data, *op1_p);
1235       return;
1236     }
1237
1238   if (integer_zerop (iv0->step)
1239       && integer_zerop (iv1->step))
1240     {
1241       /* If both are invariants, this is a work for unswitching.  */
1242       return;
1243     }
1244
1245   civ = XNEW (struct iv);
1246   *civ = integer_zerop (iv0->step) ? *iv1: *iv0;
1247   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1248 }
1249
1250 /* Returns true if expression EXPR is obviously invariant in LOOP,
1251    i.e. if all its operands are defined outside of the LOOP.  */
1252
1253 bool
1254 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1255 {
1256   basic_block def_bb;
1257   unsigned i, len;
1258
1259   if (is_gimple_min_invariant (expr))
1260     return true;
1261
1262   if (TREE_CODE (expr) == SSA_NAME)
1263     {
1264       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1265       if (def_bb
1266           && flow_bb_inside_loop_p (loop, def_bb))
1267         return false;
1268
1269       return true;
1270     }
1271
1272   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1273     return false;
1274
1275   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1276   for (i = 0; i < len; i++)
1277     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1278       return false;
1279
1280   return true;
1281 }
1282
1283 /* Cumulates the steps of indices into DATA and replaces their values with the
1284    initial ones.  Returns false when the value of the index cannot be determined.
1285    Callback for for_each_index.  */
1286
1287 struct ifs_ivopts_data
1288 {
1289   struct ivopts_data *ivopts_data;
1290   tree stmt;
1291   tree step;
1292 };
1293
1294 static bool
1295 idx_find_step (tree base, tree *idx, void *data)
1296 {
1297   struct ifs_ivopts_data *dta = data;
1298   struct iv *iv;
1299   tree step, iv_base, iv_step, lbound, off;
1300   struct loop *loop = dta->ivopts_data->current_loop;
1301
1302   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1303       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1304     return false;
1305
1306   /* If base is a component ref, require that the offset of the reference
1307      be invariant.  */
1308   if (TREE_CODE (base) == COMPONENT_REF)
1309     {
1310       off = component_ref_field_offset (base);
1311       return expr_invariant_in_loop_p (loop, off);
1312     }
1313
1314   /* If base is array, first check whether we will be able to move the
1315      reference out of the loop (in order to take its address in strength
1316      reduction).  In order for this to work we need both lower bound
1317      and step to be loop invariants.  */
1318   if (TREE_CODE (base) == ARRAY_REF)
1319     {
1320       step = array_ref_element_size (base);
1321       lbound = array_ref_low_bound (base);
1322
1323       if (!expr_invariant_in_loop_p (loop, step)
1324           || !expr_invariant_in_loop_p (loop, lbound))
1325         return false;
1326     }
1327
1328   if (TREE_CODE (*idx) != SSA_NAME)
1329     return true;
1330
1331   iv = get_iv (dta->ivopts_data, *idx);
1332   if (!iv)
1333     return false;
1334
1335   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1336           *&x[0], which is not folded and does not trigger the
1337           ARRAY_REF path below.  */
1338   *idx = iv->base;
1339
1340   if (integer_zerop (iv->step))
1341     return true;
1342
1343   if (TREE_CODE (base) == ARRAY_REF)
1344     {
1345       step = array_ref_element_size (base);
1346
1347       /* We only handle addresses whose step is an integer constant.  */
1348       if (TREE_CODE (step) != INTEGER_CST)
1349         return false;
1350     }
1351   else
1352     /* The step for pointer arithmetics already is 1 byte.  */
1353     step = build_int_cst (sizetype, 1);
1354
1355   iv_base = iv->base;
1356   iv_step = iv->step;
1357   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1358                             sizetype, &iv_base, &iv_step, dta->stmt,
1359                             false))
1360     {
1361       /* The index might wrap.  */
1362       return false;
1363     }
1364
1365   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1366   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1367
1368   return true;
1369 }
1370
1371 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1372    object is passed to it in DATA.  */
1373
1374 static bool
1375 idx_record_use (tree base, tree *idx,
1376                 void *data)
1377 {
1378   find_interesting_uses_op (data, *idx);
1379   if (TREE_CODE (base) == ARRAY_REF)
1380     {
1381       find_interesting_uses_op (data, array_ref_element_size (base));
1382       find_interesting_uses_op (data, array_ref_low_bound (base));
1383     }
1384   return true;
1385 }
1386
1387 /* Returns true if memory reference REF may be unaligned.  */
1388
1389 static bool
1390 may_be_unaligned_p (tree ref)
1391 {
1392   tree base;
1393   tree base_type;
1394   HOST_WIDE_INT bitsize;
1395   HOST_WIDE_INT bitpos;
1396   tree toffset;
1397   enum machine_mode mode;
1398   int unsignedp, volatilep;
1399   unsigned base_align;
1400
1401   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1402      thus they are not misaligned.  */
1403   if (TREE_CODE (ref) == TARGET_MEM_REF)
1404     return false;
1405
1406   /* The test below is basically copy of what expr.c:normal_inner_ref
1407      does to check whether the object must be loaded by parts when
1408      STRICT_ALIGNMENT is true.  */
1409   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1410                               &unsignedp, &volatilep, true);
1411   base_type = TREE_TYPE (base);
1412   base_align = TYPE_ALIGN (base_type);
1413
1414   if (mode != BLKmode
1415       && (base_align < GET_MODE_ALIGNMENT (mode)
1416           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1417           || bitpos % BITS_PER_UNIT != 0))
1418     return true;
1419
1420   return false;
1421 }
1422
1423 /* Return true if EXPR may be non-addressable.   */
1424
1425 static bool
1426 may_be_nonaddressable_p (tree expr)
1427 {
1428   switch (TREE_CODE (expr))
1429     {
1430     case COMPONENT_REF:
1431       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1432              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1433
1434     case ARRAY_REF:
1435     case ARRAY_RANGE_REF:
1436       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1437
1438     case VIEW_CONVERT_EXPR:
1439       /* This kind of view-conversions may wrap non-addressable objects
1440          and make them look addressable.  After some processing the
1441          non-addressability may be uncovered again, causing ADDR_EXPRs
1442          of inappropriate objects to be built.  */
1443       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1444              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1445
1446     default:
1447       break;
1448     }
1449
1450   return false;
1451 }
1452
1453 /* Finds addresses in *OP_P inside STMT.  */
1454
1455 static void
1456 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1457 {
1458   tree base = *op_p, step = build_int_cst (sizetype, 0);
1459   struct iv *civ;
1460   struct ifs_ivopts_data ifs_ivopts_data;
1461
1462   /* Do not play with volatile memory references.  A bit too conservative,
1463      perhaps, but safe.  */
1464   if (stmt_ann (stmt)->has_volatile_ops)
1465     goto fail;
1466
1467   /* Ignore bitfields for now.  Not really something terribly complicated
1468      to handle.  TODO.  */
1469   if (TREE_CODE (base) == BIT_FIELD_REF)
1470     goto fail;
1471
1472   if (may_be_nonaddressable_p (base))
1473     goto fail;
1474
1475   if (STRICT_ALIGNMENT
1476       && may_be_unaligned_p (base))
1477     goto fail;
1478
1479   base = unshare_expr (base);
1480
1481   if (TREE_CODE (base) == TARGET_MEM_REF)
1482     {
1483       tree type = build_pointer_type (TREE_TYPE (base));
1484       tree astep;
1485
1486       if (TMR_BASE (base)
1487           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1488         {
1489           civ = get_iv (data, TMR_BASE (base));
1490           if (!civ)
1491             goto fail;
1492
1493           TMR_BASE (base) = civ->base;
1494           step = civ->step;
1495         }
1496       if (TMR_INDEX (base)
1497           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1498         {
1499           civ = get_iv (data, TMR_INDEX (base));
1500           if (!civ)
1501             goto fail;
1502
1503           TMR_INDEX (base) = civ->base;
1504           astep = civ->step;
1505
1506           if (astep)
1507             {
1508               if (TMR_STEP (base))
1509                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1510
1511               step = fold_build2 (PLUS_EXPR, type, step, astep);
1512             }
1513         }
1514
1515       if (integer_zerop (step))
1516         goto fail;
1517       base = tree_mem_ref_addr (type, base);
1518     }
1519   else
1520     {
1521       ifs_ivopts_data.ivopts_data = data;
1522       ifs_ivopts_data.stmt = stmt;
1523       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1524       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1525           || integer_zerop (ifs_ivopts_data.step))
1526         goto fail;
1527       step = ifs_ivopts_data.step;
1528
1529       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1530       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1531
1532       base = build_fold_addr_expr (base);
1533
1534       /* Substituting bases of IVs into the base expression might
1535          have caused folding opportunities.  */
1536       if (TREE_CODE (base) == ADDR_EXPR)
1537         {
1538           tree *ref = &TREE_OPERAND (base, 0);
1539           while (handled_component_p (*ref))
1540             ref = &TREE_OPERAND (*ref, 0);
1541           if (TREE_CODE (*ref) == INDIRECT_REF)
1542             *ref = fold_indirect_ref (*ref);
1543         }
1544     }
1545
1546   civ = alloc_iv (base, step);
1547   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1548   return;
1549
1550 fail:
1551   for_each_index (op_p, idx_record_use, data);
1552 }
1553
1554 /* Finds and records invariants used in STMT.  */
1555
1556 static void
1557 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1558 {
1559   ssa_op_iter iter;
1560   use_operand_p use_p;
1561   tree op;
1562
1563   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1564     {
1565       op = USE_FROM_PTR (use_p);
1566       record_invariant (data, op, false);
1567     }
1568 }
1569
1570 /* Finds interesting uses of induction variables in the statement STMT.  */
1571
1572 static void
1573 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1574 {
1575   struct iv *iv;
1576   tree op, lhs, rhs;
1577   ssa_op_iter iter;
1578   use_operand_p use_p;
1579
1580   find_invariants_stmt (data, stmt);
1581
1582   if (TREE_CODE (stmt) == COND_EXPR)
1583     {
1584       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1585       return;
1586     }
1587
1588   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1589     {
1590       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1591       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1592
1593       if (TREE_CODE (lhs) == SSA_NAME)
1594         {
1595           /* If the statement defines an induction variable, the uses are not
1596              interesting by themselves.  */
1597
1598           iv = get_iv (data, lhs);
1599
1600           if (iv && !integer_zerop (iv->step))
1601             return;
1602         }
1603
1604       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1605         {
1606         case tcc_comparison:
1607           find_interesting_uses_cond (data, stmt,
1608                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1609           return;
1610
1611         case tcc_reference:
1612           find_interesting_uses_address (data, stmt,
1613                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1614           if (REFERENCE_CLASS_P (lhs))
1615             find_interesting_uses_address (data, stmt,
1616                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1617           return;
1618
1619         default: ;
1620         }
1621
1622       if (REFERENCE_CLASS_P (lhs)
1623           && is_gimple_val (rhs))
1624         {
1625           find_interesting_uses_address (data, stmt,
1626                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1627           find_interesting_uses_op (data, rhs);
1628           return;
1629         }
1630
1631       /* TODO -- we should also handle address uses of type
1632
1633          memory = call (whatever);
1634
1635          and
1636
1637          call (memory).  */
1638     }
1639
1640   if (TREE_CODE (stmt) == PHI_NODE
1641       && bb_for_stmt (stmt) == data->current_loop->header)
1642     {
1643       lhs = PHI_RESULT (stmt);
1644       iv = get_iv (data, lhs);
1645
1646       if (iv && !integer_zerop (iv->step))
1647         return;
1648     }
1649
1650   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1651     {
1652       op = USE_FROM_PTR (use_p);
1653
1654       if (TREE_CODE (op) != SSA_NAME)
1655         continue;
1656
1657       iv = get_iv (data, op);
1658       if (!iv)
1659         continue;
1660
1661       find_interesting_uses_op (data, op);
1662     }
1663 }
1664
1665 /* Finds interesting uses of induction variables outside of loops
1666    on loop exit edge EXIT.  */
1667
1668 static void
1669 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1670 {
1671   tree phi, def;
1672
1673   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1674     {
1675       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1676       if (is_gimple_reg (def))
1677         find_interesting_uses_op (data, def);
1678     }
1679 }
1680
1681 /* Finds uses of the induction variables that are interesting.  */
1682
1683 static void
1684 find_interesting_uses (struct ivopts_data *data)
1685 {
1686   basic_block bb;
1687   block_stmt_iterator bsi;
1688   tree phi;
1689   basic_block *body = get_loop_body (data->current_loop);
1690   unsigned i;
1691   struct version_info *info;
1692   edge e;
1693
1694   if (dump_file && (dump_flags & TDF_DETAILS))
1695     fprintf (dump_file, "Uses:\n\n");
1696
1697   for (i = 0; i < data->current_loop->num_nodes; i++)
1698     {
1699       edge_iterator ei;
1700       bb = body[i];
1701
1702       FOR_EACH_EDGE (e, ei, bb->succs)
1703         if (e->dest != EXIT_BLOCK_PTR
1704             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705           find_interesting_uses_outside (data, e);
1706
1707       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708         find_interesting_uses_stmt (data, phi);
1709       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1711     }
1712
1713   if (dump_file && (dump_flags & TDF_DETAILS))
1714     {
1715       bitmap_iterator bi;
1716
1717       fprintf (dump_file, "\n");
1718
1719       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1720         {
1721           info = ver_info (data, i);
1722           if (info->inv_id)
1723             {
1724               fprintf (dump_file, "  ");
1725               print_generic_expr (dump_file, info->name, TDF_SLIM);
1726               fprintf (dump_file, " is invariant (%d)%s\n",
1727                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1728             }
1729         }
1730
1731       fprintf (dump_file, "\n");
1732     }
1733
1734   free (body);
1735 }
1736
1737 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1738    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1739    we are at the top-level of the processed address.  */
1740
1741 static tree
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743                 unsigned HOST_WIDE_INT *offset)
1744 {
1745   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746   enum tree_code code;
1747   tree type, orig_type = TREE_TYPE (expr);
1748   unsigned HOST_WIDE_INT off0, off1, st;
1749   tree orig_expr = expr;
1750
1751   STRIP_NOPS (expr);
1752
1753   type = TREE_TYPE (expr);
1754   code = TREE_CODE (expr);
1755   *offset = 0;
1756
1757   switch (code)
1758     {
1759     case INTEGER_CST:
1760       if (!cst_and_fits_in_hwi (expr)
1761           || integer_zerop (expr))
1762         return orig_expr;
1763
1764       *offset = int_cst_value (expr);
1765       return build_int_cst (orig_type, 0);
1766
1767     case PLUS_EXPR:
1768     case MINUS_EXPR:
1769       op0 = TREE_OPERAND (expr, 0);
1770       op1 = TREE_OPERAND (expr, 1);
1771
1772       op0 = strip_offset_1 (op0, false, false, &off0);
1773       op1 = strip_offset_1 (op1, false, false, &off1);
1774
1775       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776       if (op0 == TREE_OPERAND (expr, 0)
1777           && op1 == TREE_OPERAND (expr, 1))
1778         return orig_expr;
1779
1780       if (integer_zerop (op1))
1781         expr = op0;
1782       else if (integer_zerop (op0))
1783         {
1784           if (code == PLUS_EXPR)
1785             expr = op1;
1786           else
1787             expr = fold_build1 (NEGATE_EXPR, type, op1);
1788         }
1789       else
1790         expr = fold_build2 (code, type, op0, op1);
1791
1792       return fold_convert (orig_type, expr);
1793
1794     case ARRAY_REF:
1795       if (!inside_addr)
1796         return orig_expr;
1797
1798       step = array_ref_element_size (expr);
1799       if (!cst_and_fits_in_hwi (step))
1800         break;
1801
1802       st = int_cst_value (step);
1803       op1 = TREE_OPERAND (expr, 1);
1804       op1 = strip_offset_1 (op1, false, false, &off1);
1805       *offset = off1 * st;
1806
1807       if (top_compref
1808           && integer_zerop (op1))
1809         {
1810           /* Strip the component reference completely.  */
1811           op0 = TREE_OPERAND (expr, 0);
1812           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1813           *offset += off0;
1814           return op0;
1815         }
1816       break;
1817
1818     case COMPONENT_REF:
1819       if (!inside_addr)
1820         return orig_expr;
1821
1822       tmp = component_ref_field_offset (expr);
1823       if (top_compref
1824           && cst_and_fits_in_hwi (tmp))
1825         {
1826           /* Strip the component reference completely.  */
1827           op0 = TREE_OPERAND (expr, 0);
1828           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829           *offset = off0 + int_cst_value (tmp);
1830           return op0;
1831         }
1832       break;
1833
1834     case ADDR_EXPR:
1835       op0 = TREE_OPERAND (expr, 0);
1836       op0 = strip_offset_1 (op0, true, true, &off0);
1837       *offset += off0;
1838
1839       if (op0 == TREE_OPERAND (expr, 0))
1840         return orig_expr;
1841
1842       expr = build_fold_addr_expr (op0);
1843       return fold_convert (orig_type, expr);
1844
1845     case INDIRECT_REF:
1846       inside_addr = false;
1847       break;
1848
1849     default:
1850       return orig_expr;
1851     }
1852
1853   /* Default handling of expressions for that we want to recurse into
1854      the first operand.  */
1855   op0 = TREE_OPERAND (expr, 0);
1856   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1857   *offset += off0;
1858
1859   if (op0 == TREE_OPERAND (expr, 0)
1860       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1861     return orig_expr;
1862
1863   expr = copy_node (expr);
1864   TREE_OPERAND (expr, 0) = op0;
1865   if (op1)
1866     TREE_OPERAND (expr, 1) = op1;
1867
1868   /* Inside address, we might strip the top level component references,
1869      thus changing type of the expression.  Handling of ADDR_EXPR
1870      will fix that.  */
1871   expr = fold_convert (orig_type, expr);
1872
1873   return expr;
1874 }
1875
1876 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1877
1878 static tree
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1880 {
1881   return strip_offset_1 (expr, false, false, offset);
1882 }
1883
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885    We return unsigned type with the same precision, which avoids problems
1886    with overflows.  */
1887
1888 static tree
1889 generic_type_for (tree type)
1890 {
1891   if (POINTER_TYPE_P (type))
1892     return unsigned_type_for (type);
1893
1894   if (TYPE_UNSIGNED (type))
1895     return type;
1896
1897   return unsigned_type_for (type);
1898 }
1899
1900 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1901    the bitmap to that we should store it.  */
1902
1903 static struct ivopts_data *fd_ivopts_data;
1904 static tree
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1906 {
1907   bitmap *depends_on = data;
1908   struct version_info *info;
1909
1910   if (TREE_CODE (*expr_p) != SSA_NAME)
1911     return NULL_TREE;
1912   info = name_info (fd_ivopts_data, *expr_p);
1913
1914   if (!info->inv_id || info->has_nonlin_use)
1915     return NULL_TREE;
1916
1917   if (!*depends_on)
1918     *depends_on = BITMAP_ALLOC (NULL);
1919   bitmap_set_bit (*depends_on, info->inv_id);
1920
1921   return NULL_TREE;
1922 }
1923
1924 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1925    position to POS.  If USE is not NULL, the candidate is set as related to
1926    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1927    replacement of the final value of the iv by a direct computation.  */
1928
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931                  tree base, tree step, bool important, enum iv_position pos,
1932                  struct iv_use *use, tree incremented_at)
1933 {
1934   unsigned i;
1935   struct iv_cand *cand = NULL;
1936   tree type, orig_type;
1937
1938   if (base)
1939     {
1940       orig_type = TREE_TYPE (base);
1941       type = generic_type_for (orig_type);
1942       if (type != orig_type)
1943         {
1944           base = fold_convert (type, base);
1945           step = fold_convert (type, step);
1946         }
1947     }
1948
1949   for (i = 0; i < n_iv_cands (data); i++)
1950     {
1951       cand = iv_cand (data, i);
1952
1953       if (cand->pos != pos)
1954         continue;
1955
1956       if (cand->incremented_at != incremented_at)
1957         continue;
1958
1959       if (!cand->iv)
1960         {
1961           if (!base && !step)
1962             break;
1963
1964           continue;
1965         }
1966
1967       if (!base && !step)
1968         continue;
1969
1970       if (operand_equal_p (base, cand->iv->base, 0)
1971           && operand_equal_p (step, cand->iv->step, 0))
1972         break;
1973     }
1974
1975   if (i == n_iv_cands (data))
1976     {
1977       cand = XCNEW (struct iv_cand);
1978       cand->id = i;
1979
1980       if (!base && !step)
1981         cand->iv = NULL;
1982       else
1983         cand->iv = alloc_iv (base, step);
1984
1985       cand->pos = pos;
1986       if (pos != IP_ORIGINAL && cand->iv)
1987         {
1988           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1989           cand->var_after = cand->var_before;
1990         }
1991       cand->important = important;
1992       cand->incremented_at = incremented_at;
1993       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1994
1995       if (step
1996           && TREE_CODE (step) != INTEGER_CST)
1997         {
1998           fd_ivopts_data = data;
1999           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2000         }
2001
2002       if (dump_file && (dump_flags & TDF_DETAILS))
2003         dump_cand (dump_file, cand);
2004     }
2005
2006   if (important && !cand->important)
2007     {
2008       cand->important = true;
2009       if (dump_file && (dump_flags & TDF_DETAILS))
2010         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2011     }
2012
2013   if (use)
2014     {
2015       bitmap_set_bit (use->related_cands, i);
2016       if (dump_file && (dump_flags & TDF_DETAILS))
2017         fprintf (dump_file, "Candidate %d is related to use %d\n",
2018                  cand->id, use->id);
2019     }
2020
2021   return cand;
2022 }
2023
2024 /* Returns true if incrementing the induction variable at the end of the LOOP
2025    is allowed.
2026
2027    The purpose is to avoid splitting latch edge with a biv increment, thus
2028    creating a jump, possibly confusing other optimization passes and leaving
2029    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2030    is not available (so we do not have a better alternative), or if the latch
2031    edge is already nonempty.  */
2032
2033 static bool
2034 allow_ip_end_pos_p (struct loop *loop)
2035 {
2036   if (!ip_normal_pos (loop))
2037     return true;
2038
2039   if (!empty_block_p (ip_end_pos (loop)))
2040     return true;
2041
2042   return false;
2043 }
2044
2045 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2046    position to POS.  If USE is not NULL, the candidate is set as related to
2047    it.  The candidate computation is scheduled on all available positions.  */
2048
2049 static void
2050 add_candidate (struct ivopts_data *data,
2051                tree base, tree step, bool important, struct iv_use *use)
2052 {
2053   if (ip_normal_pos (data->current_loop))
2054     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2055   if (ip_end_pos (data->current_loop)
2056       && allow_ip_end_pos_p (data->current_loop))
2057     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2058 }
2059
2060 /* Add a standard "0 + 1 * iteration" iv candidate for a
2061    type with SIZE bits.  */
2062
2063 static void
2064 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2065                                      unsigned int size)
2066 {
2067   tree type = lang_hooks.types.type_for_size (size, true);
2068   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2069                  true, NULL);
2070 }
2071
2072 /* Adds standard iv candidates.  */
2073
2074 static void
2075 add_standard_iv_candidates (struct ivopts_data *data)
2076 {
2077   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2078
2079   /* The same for a double-integer type if it is still fast enough.  */
2080   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2081     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2082 }
2083
2084
2085 /* Adds candidates bases on the old induction variable IV.  */
2086
2087 static void
2088 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2089 {
2090   tree phi, def;
2091   struct iv_cand *cand;
2092
2093   add_candidate (data, iv->base, iv->step, true, NULL);
2094
2095   /* The same, but with initial value zero.  */
2096   add_candidate (data,
2097                  build_int_cst (TREE_TYPE (iv->base), 0),
2098                  iv->step, true, NULL);
2099
2100   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2101   if (TREE_CODE (phi) == PHI_NODE)
2102     {
2103       /* Additionally record the possibility of leaving the original iv
2104          untouched.  */
2105       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2106       cand = add_candidate_1 (data,
2107                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2108                               SSA_NAME_DEF_STMT (def));
2109       cand->var_before = iv->ssa_name;
2110       cand->var_after = def;
2111     }
2112 }
2113
2114 /* Adds candidates based on the old induction variables.  */
2115
2116 static void
2117 add_old_ivs_candidates (struct ivopts_data *data)
2118 {
2119   unsigned i;
2120   struct iv *iv;
2121   bitmap_iterator bi;
2122
2123   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2124     {
2125       iv = ver_info (data, i)->iv;
2126       if (iv && iv->biv_p && !integer_zerop (iv->step))
2127         add_old_iv_candidates (data, iv);
2128     }
2129 }
2130
2131 /* Adds candidates based on the value of the induction variable IV and USE.  */
2132
2133 static void
2134 add_iv_value_candidates (struct ivopts_data *data,
2135                          struct iv *iv, struct iv_use *use)
2136 {
2137   unsigned HOST_WIDE_INT offset;
2138   tree base;
2139
2140   add_candidate (data, iv->base, iv->step, false, use);
2141
2142   /* The same, but with initial value zero.  Make such variable important,
2143      since it is generic enough so that possibly many uses may be based
2144      on it.  */
2145   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2146                  iv->step, true, use);
2147
2148   /* Third, try removing the constant offset.  */
2149   base = strip_offset (iv->base, &offset);
2150   if (offset)
2151     add_candidate (data, base, iv->step, false, use);
2152 }
2153
2154 /* Adds candidates based on the uses.  */
2155
2156 static void
2157 add_derived_ivs_candidates (struct ivopts_data *data)
2158 {
2159   unsigned i;
2160
2161   for (i = 0; i < n_iv_uses (data); i++)
2162     {
2163       struct iv_use *use = iv_use (data, i);
2164
2165       if (!use)
2166         continue;
2167
2168       switch (use->type)
2169         {
2170         case USE_NONLINEAR_EXPR:
2171         case USE_COMPARE:
2172         case USE_ADDRESS:
2173           /* Just add the ivs based on the value of the iv used here.  */
2174           add_iv_value_candidates (data, use->iv, use);
2175           break;
2176
2177         default:
2178           gcc_unreachable ();
2179         }
2180     }
2181 }
2182
2183 /* Record important candidates and add them to related_cands bitmaps
2184    if needed.  */
2185
2186 static void
2187 record_important_candidates (struct ivopts_data *data)
2188 {
2189   unsigned i;
2190   struct iv_use *use;
2191
2192   for (i = 0; i < n_iv_cands (data); i++)
2193     {
2194       struct iv_cand *cand = iv_cand (data, i);
2195
2196       if (cand->important)
2197         bitmap_set_bit (data->important_candidates, i);
2198     }
2199
2200   data->consider_all_candidates = (n_iv_cands (data)
2201                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2202
2203   if (data->consider_all_candidates)
2204     {
2205       /* We will not need "related_cands" bitmaps in this case,
2206          so release them to decrease peak memory consumption.  */
2207       for (i = 0; i < n_iv_uses (data); i++)
2208         {
2209           use = iv_use (data, i);
2210           BITMAP_FREE (use->related_cands);
2211         }
2212     }
2213   else
2214     {
2215       /* Add important candidates to the related_cands bitmaps.  */
2216       for (i = 0; i < n_iv_uses (data); i++)
2217         bitmap_ior_into (iv_use (data, i)->related_cands,
2218                          data->important_candidates);
2219     }
2220 }
2221
2222 /* Finds the candidates for the induction variables.  */
2223
2224 static void
2225 find_iv_candidates (struct ivopts_data *data)
2226 {
2227   /* Add commonly used ivs.  */
2228   add_standard_iv_candidates (data);
2229
2230   /* Add old induction variables.  */
2231   add_old_ivs_candidates (data);
2232
2233   /* Add induction variables derived from uses.  */
2234   add_derived_ivs_candidates (data);
2235
2236   /* Record the important candidates.  */
2237   record_important_candidates (data);
2238 }
2239
2240 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2241    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2242    we allocate a simple list to every use.  */
2243
2244 static void
2245 alloc_use_cost_map (struct ivopts_data *data)
2246 {
2247   unsigned i, size, s, j;
2248
2249   for (i = 0; i < n_iv_uses (data); i++)
2250     {
2251       struct iv_use *use = iv_use (data, i);
2252       bitmap_iterator bi;
2253
2254       if (data->consider_all_candidates)
2255         size = n_iv_cands (data);
2256       else
2257         {
2258           s = 0;
2259           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2260             {
2261               s++;
2262             }
2263
2264           /* Round up to the power of two, so that moduling by it is fast.  */
2265           for (size = 1; size < s; size <<= 1)
2266             continue;
2267         }
2268
2269       use->n_map_members = size;
2270       use->cost_map = XCNEWVEC (struct cost_pair, size);
2271     }
2272 }
2273
2274 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2275    on invariants DEPENDS_ON and that the value used in expressing it
2276    is VALUE.*/
2277
2278 static void
2279 set_use_iv_cost (struct ivopts_data *data,
2280                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2281                  bitmap depends_on, tree value)
2282 {
2283   unsigned i, s;
2284
2285   if (cost == INFTY)
2286     {
2287       BITMAP_FREE (depends_on);
2288       return;
2289     }
2290
2291   if (data->consider_all_candidates)
2292     {
2293       use->cost_map[cand->id].cand = cand;
2294       use->cost_map[cand->id].cost = cost;
2295       use->cost_map[cand->id].depends_on = depends_on;
2296       use->cost_map[cand->id].value = value;
2297       return;
2298     }
2299
2300   /* n_map_members is a power of two, so this computes modulo.  */
2301   s = cand->id & (use->n_map_members - 1);
2302   for (i = s; i < use->n_map_members; i++)
2303     if (!use->cost_map[i].cand)
2304       goto found;
2305   for (i = 0; i < s; i++)
2306     if (!use->cost_map[i].cand)
2307       goto found;
2308
2309   gcc_unreachable ();
2310
2311 found:
2312   use->cost_map[i].cand = cand;
2313   use->cost_map[i].cost = cost;
2314   use->cost_map[i].depends_on = depends_on;
2315   use->cost_map[i].value = value;
2316 }
2317
2318 /* Gets cost of (USE, CANDIDATE) pair.  */
2319
2320 static struct cost_pair *
2321 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2322                  struct iv_cand *cand)
2323 {
2324   unsigned i, s;
2325   struct cost_pair *ret;
2326
2327   if (!cand)
2328     return NULL;
2329
2330   if (data->consider_all_candidates)
2331     {
2332       ret = use->cost_map + cand->id;
2333       if (!ret->cand)
2334         return NULL;
2335
2336       return ret;
2337     }
2338
2339   /* n_map_members is a power of two, so this computes modulo.  */
2340   s = cand->id & (use->n_map_members - 1);
2341   for (i = s; i < use->n_map_members; i++)
2342     if (use->cost_map[i].cand == cand)
2343       return use->cost_map + i;
2344
2345   for (i = 0; i < s; i++)
2346     if (use->cost_map[i].cand == cand)
2347       return use->cost_map + i;
2348
2349   return NULL;
2350 }
2351
2352 /* Returns estimate on cost of computing SEQ.  */
2353
2354 static unsigned
2355 seq_cost (rtx seq)
2356 {
2357   unsigned cost = 0;
2358   rtx set;
2359
2360   for (; seq; seq = NEXT_INSN (seq))
2361     {
2362       set = single_set (seq);
2363       if (set)
2364         cost += rtx_cost (set, SET);
2365       else
2366         cost++;
2367     }
2368
2369   return cost;
2370 }
2371
2372 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2373 static rtx
2374 produce_memory_decl_rtl (tree obj, int *regno)
2375 {
2376   rtx x;
2377
2378   gcc_assert (obj);
2379   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2380     {
2381       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2382       x = gen_rtx_SYMBOL_REF (Pmode, name);
2383     }
2384   else
2385     x = gen_raw_REG (Pmode, (*regno)++);
2386
2387   return gen_rtx_MEM (DECL_MODE (obj), x);
2388 }
2389
2390 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2391    walk_tree.  DATA contains the actual fake register number.  */
2392
2393 static tree
2394 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2395 {
2396   tree obj = NULL_TREE;
2397   rtx x = NULL_RTX;
2398   int *regno = data;
2399
2400   switch (TREE_CODE (*expr_p))
2401     {
2402     case ADDR_EXPR:
2403       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2404            handled_component_p (*expr_p);
2405            expr_p = &TREE_OPERAND (*expr_p, 0))
2406         continue;
2407       obj = *expr_p;
2408       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2409         x = produce_memory_decl_rtl (obj, regno);
2410       break;
2411
2412     case SSA_NAME:
2413       *ws = 0;
2414       obj = SSA_NAME_VAR (*expr_p);
2415       if (!DECL_RTL_SET_P (obj))
2416         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2417       break;
2418
2419     case VAR_DECL:
2420     case PARM_DECL:
2421     case RESULT_DECL:
2422       *ws = 0;
2423       obj = *expr_p;
2424
2425       if (DECL_RTL_SET_P (obj))
2426         break;
2427
2428       if (DECL_MODE (obj) == BLKmode)
2429         x = produce_memory_decl_rtl (obj, regno);
2430       else
2431         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2432
2433       break;
2434
2435     default:
2436       break;
2437     }
2438
2439   if (x)
2440     {
2441       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2442       SET_DECL_RTL (obj, x);
2443     }
2444
2445   return NULL_TREE;
2446 }
2447
2448 /* Determines cost of the computation of EXPR.  */
2449
2450 static unsigned
2451 computation_cost (tree expr)
2452 {
2453   rtx seq, rslt;
2454   tree type = TREE_TYPE (expr);
2455   unsigned cost;
2456   /* Avoid using hard regs in ways which may be unsupported.  */
2457   int regno = LAST_VIRTUAL_REGISTER + 1;
2458
2459   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2460   start_sequence ();
2461   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2462   seq = get_insns ();
2463   end_sequence ();
2464
2465   cost = seq_cost (seq);
2466   if (MEM_P (rslt))
2467     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2468
2469   return cost;
2470 }
2471
2472 /* Returns variable containing the value of candidate CAND at statement AT.  */
2473
2474 static tree
2475 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2476 {
2477   if (stmt_after_increment (loop, cand, stmt))
2478     return cand->var_after;
2479   else
2480     return cand->var_before;
2481 }
2482
2483 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2484    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2485
2486 int
2487 tree_int_cst_sign_bit (tree t)
2488 {
2489   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2490   unsigned HOST_WIDE_INT w;
2491
2492   if (bitno < HOST_BITS_PER_WIDE_INT)
2493     w = TREE_INT_CST_LOW (t);
2494   else
2495     {
2496       w = TREE_INT_CST_HIGH (t);
2497       bitno -= HOST_BITS_PER_WIDE_INT;
2498     }
2499
2500   return (w >> bitno) & 1;
2501 }
2502
2503 /* If we can prove that TOP = cst * BOT for some constant cst,
2504    store cst to MUL and return true.  Otherwise return false.
2505    The returned value is always sign-extended, regardless of the
2506    signedness of TOP and BOT.  */
2507
2508 static bool
2509 constant_multiple_of (tree top, tree bot, double_int *mul)
2510 {
2511   tree mby;
2512   enum tree_code code;
2513   double_int res, p0, p1;
2514   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2515
2516   STRIP_NOPS (top);
2517   STRIP_NOPS (bot);
2518
2519   if (operand_equal_p (top, bot, 0))
2520     {
2521       *mul = double_int_one;
2522       return true;
2523     }
2524
2525   code = TREE_CODE (top);
2526   switch (code)
2527     {
2528     case MULT_EXPR:
2529       mby = TREE_OPERAND (top, 1);
2530       if (TREE_CODE (mby) != INTEGER_CST)
2531         return false;
2532
2533       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2534         return false;
2535
2536       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2537                               precision);
2538       return true;
2539
2540     case PLUS_EXPR:
2541     case MINUS_EXPR:
2542       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2543           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2544         return false;
2545
2546       if (code == MINUS_EXPR)
2547         p1 = double_int_neg (p1);
2548       *mul = double_int_sext (double_int_add (p0, p1), precision);
2549       return true;
2550
2551     case INTEGER_CST:
2552       if (TREE_CODE (bot) != INTEGER_CST)
2553         return false;
2554
2555       p0 = double_int_sext (tree_to_double_int (top), precision);
2556       p1 = double_int_sext (tree_to_double_int (bot), precision);
2557       if (double_int_zero_p (p1))
2558         return false;
2559       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2560                               precision);
2561       return double_int_zero_p (res);
2562
2563     default:
2564       return false;
2565     }
2566 }
2567
2568 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2569    same precision that is at least as wide as the precision of TYPE, stores
2570    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2571    type of A and B.  */
2572
2573 static tree
2574 determine_common_wider_type (tree *a, tree *b)
2575 {
2576   tree wider_type = NULL;
2577   tree suba, subb;
2578   tree atype = TREE_TYPE (*a);
2579
2580   if ((TREE_CODE (*a) == NOP_EXPR
2581        || TREE_CODE (*a) == CONVERT_EXPR))
2582     {
2583       suba = TREE_OPERAND (*a, 0);
2584       wider_type = TREE_TYPE (suba);
2585       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2586         return atype;
2587     }
2588   else
2589     return atype;
2590
2591   if ((TREE_CODE (*b) == NOP_EXPR
2592        || TREE_CODE (*b) == CONVERT_EXPR))
2593     {
2594       subb = TREE_OPERAND (*b, 0);
2595       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2596         return atype;
2597     }
2598   else
2599     return atype;
2600
2601   *a = suba;
2602   *b = subb;
2603   return wider_type;
2604 }
2605
2606 /* Determines the expression by that USE is expressed from induction variable
2607    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2608    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2609
2610 static bool
2611 get_computation_aff (struct loop *loop,
2612                      struct iv_use *use, struct iv_cand *cand, tree at,
2613                      struct affine_tree_combination *aff)
2614 {
2615   tree ubase = use->iv->base;
2616   tree ustep = use->iv->step;
2617   tree cbase = cand->iv->base;
2618   tree cstep = cand->iv->step, cstep_common;
2619   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2620   tree common_type, var;
2621   tree uutype;
2622   aff_tree cbase_aff, var_aff;
2623   double_int rat;
2624
2625   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2626     {
2627       /* We do not have a precision to express the values of use.  */
2628       return false;
2629     }
2630
2631   var = var_at_stmt (loop, cand, at);
2632   uutype = unsigned_type_for (utype);
2633
2634   /* If the conversion is not noop, perform it.  */
2635   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2636     {
2637       cstep = fold_convert (uutype, cstep);
2638       cbase = fold_convert (uutype, cbase);
2639       var = fold_convert (uutype, var);
2640     }
2641
2642   if (!constant_multiple_of (ustep, cstep, &rat))
2643     return false;
2644
2645   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2646      type, we achieve better folding by computing their difference in this
2647      wider type, and cast the result to UUTYPE.  We do not need to worry about
2648      overflows, as all the arithmetics will in the end be performed in UUTYPE
2649      anyway.  */
2650   common_type = determine_common_wider_type (&ubase, &cbase);
2651
2652   /* use = ubase - ratio * cbase + ratio * var.  */
2653   tree_to_aff_combination (ubase, common_type, aff);
2654   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2655   tree_to_aff_combination (var, uutype, &var_aff);
2656
2657   /* We need to shift the value if we are after the increment.  */
2658   if (stmt_after_increment (loop, cand, at))
2659     {
2660       aff_tree cstep_aff;
2661
2662       if (common_type != uutype)
2663         cstep_common = fold_convert (common_type, cstep);
2664       else
2665         cstep_common = cstep;
2666
2667       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2668       aff_combination_add (&cbase_aff, &cstep_aff);
2669     }
2670
2671   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2672   aff_combination_add (aff, &cbase_aff);
2673   if (common_type != uutype)
2674     aff_combination_convert (aff, uutype);
2675
2676   aff_combination_scale (&var_aff, rat);
2677   aff_combination_add (aff, &var_aff);
2678
2679   return true;
2680 }
2681
2682 /* Determines the expression by that USE is expressed from induction variable
2683    CAND at statement AT in LOOP.  The computation is unshared.  */
2684
2685 static tree
2686 get_computation_at (struct loop *loop,
2687                     struct iv_use *use, struct iv_cand *cand, tree at)
2688 {
2689   aff_tree aff;
2690   tree type = TREE_TYPE (use->iv->base);
2691
2692   if (!get_computation_aff (loop, use, cand, at, &aff))
2693     return NULL_TREE;
2694   unshare_aff_combination (&aff);
2695   return fold_convert (type, aff_combination_to_tree (&aff));
2696 }
2697
2698 /* Determines the expression by that USE is expressed from induction variable
2699    CAND in LOOP.  The computation is unshared.  */
2700
2701 static tree
2702 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2703 {
2704   return get_computation_at (loop, use, cand, use->stmt);
2705 }
2706
2707 /* Returns cost of addition in MODE.  */
2708
2709 static unsigned
2710 add_cost (enum machine_mode mode)
2711 {
2712   static unsigned costs[NUM_MACHINE_MODES];
2713   rtx seq;
2714   unsigned cost;
2715
2716   if (costs[mode])
2717     return costs[mode];
2718
2719   start_sequence ();
2720   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2721                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2722                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2723                  NULL_RTX);
2724   seq = get_insns ();
2725   end_sequence ();
2726
2727   cost = seq_cost (seq);
2728   if (!cost)
2729     cost = 1;
2730
2731   costs[mode] = cost;
2732
2733   if (dump_file && (dump_flags & TDF_DETAILS))
2734     fprintf (dump_file, "Addition in %s costs %d\n",
2735              GET_MODE_NAME (mode), cost);
2736   return cost;
2737 }
2738
2739 /* Entry in a hashtable of already known costs for multiplication.  */
2740 struct mbc_entry
2741 {
2742   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2743   enum machine_mode mode;       /* In mode.  */
2744   unsigned cost;                /* The cost.  */
2745 };
2746
2747 /* Counts hash value for the ENTRY.  */
2748
2749 static hashval_t
2750 mbc_entry_hash (const void *entry)
2751 {
2752   const struct mbc_entry *e = entry;
2753
2754   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2755 }
2756
2757 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2758
2759 static int
2760 mbc_entry_eq (const void *entry1, const void *entry2)
2761 {
2762   const struct mbc_entry *e1 = entry1;
2763   const struct mbc_entry *e2 = entry2;
2764
2765   return (e1->mode == e2->mode
2766           && e1->cst == e2->cst);
2767 }
2768
2769 /* Returns cost of multiplication by constant CST in MODE.  */
2770
2771 unsigned
2772 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2773 {
2774   static htab_t costs;
2775   struct mbc_entry **cached, act;
2776   rtx seq;
2777   unsigned cost;
2778
2779   if (!costs)
2780     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2781
2782   act.mode = mode;
2783   act.cst = cst;
2784   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2785   if (*cached)
2786     return (*cached)->cost;
2787
2788   *cached = XNEW (struct mbc_entry);
2789   (*cached)->mode = mode;
2790   (*cached)->cst = cst;
2791
2792   start_sequence ();
2793   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2794                gen_int_mode (cst, mode), NULL_RTX, 0);
2795   seq = get_insns ();
2796   end_sequence ();
2797
2798   cost = seq_cost (seq);
2799
2800   if (dump_file && (dump_flags & TDF_DETAILS))
2801     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2802              (int) cst, GET_MODE_NAME (mode), cost);
2803
2804   (*cached)->cost = cost;
2805
2806   return cost;
2807 }
2808
2809 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2810    validity for a memory reference accessing memory of mode MODE.  */
2811
2812 bool
2813 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2814 {
2815 #define MAX_RATIO 128
2816   static sbitmap valid_mult[MAX_MACHINE_MODE];
2817
2818   if (!valid_mult[mode])
2819     {
2820       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2821       rtx addr;
2822       HOST_WIDE_INT i;
2823
2824       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2825       sbitmap_zero (valid_mult[mode]);
2826       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2827       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2828         {
2829           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2830           if (memory_address_p (mode, addr))
2831             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2832         }
2833
2834       if (dump_file && (dump_flags & TDF_DETAILS))
2835         {
2836           fprintf (dump_file, "  allowed multipliers:");
2837           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2838             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2839               fprintf (dump_file, " %d", (int) i);
2840           fprintf (dump_file, "\n");
2841           fprintf (dump_file, "\n");
2842         }
2843     }
2844
2845   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2846     return false;
2847
2848   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2849 }
2850
2851 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2852    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2853    variable is omitted.  Compute the cost for a memory reference that accesses
2854    a memory location of mode MEM_MODE.
2855
2856    TODO -- there must be some better way.  This all is quite crude.  */
2857
2858 static unsigned
2859 get_address_cost (bool symbol_present, bool var_present,
2860                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2861                   enum machine_mode mem_mode)
2862 {
2863   static bool initialized[MAX_MACHINE_MODE];
2864   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2865   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2866   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2867   unsigned cost, acost;
2868   bool offset_p, ratio_p;
2869   HOST_WIDE_INT s_offset;
2870   unsigned HOST_WIDE_INT mask;
2871   unsigned bits;
2872
2873   if (!initialized[mem_mode])
2874     {
2875       HOST_WIDE_INT i;
2876       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2877       int old_cse_not_expected;
2878       unsigned sym_p, var_p, off_p, rat_p, add_c;
2879       rtx seq, addr, base;
2880       rtx reg0, reg1;
2881
2882       initialized[mem_mode] = true;
2883
2884       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2885
2886       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2887       for (i = start; i <= 1 << 20; i <<= 1)
2888         {
2889           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2890           if (!memory_address_p (mem_mode, addr))
2891             break;
2892         }
2893       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2894       off[mem_mode] = max_offset[mem_mode];
2895
2896       for (i = start; i <= 1 << 20; i <<= 1)
2897         {
2898           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2899           if (!memory_address_p (mem_mode, addr))
2900             break;
2901         }
2902       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2903
2904       if (dump_file && (dump_flags & TDF_DETAILS))
2905         {
2906           fprintf (dump_file, "get_address_cost:\n");
2907           fprintf (dump_file, "  min offset %s %d\n",
2908                    GET_MODE_NAME (mem_mode),
2909                    (int) min_offset[mem_mode]);
2910           fprintf (dump_file, "  max offset %s %d\n",
2911                    GET_MODE_NAME (mem_mode),
2912                    (int) max_offset[mem_mode]);
2913         }
2914
2915       rat[mem_mode] = 1;
2916       for (i = 2; i <= MAX_RATIO; i++)
2917         if (multiplier_allowed_in_address_p (i, mem_mode))
2918           {
2919             rat[mem_mode] = i;
2920             break;
2921           }
2922
2923       /* Compute the cost of various addressing modes.  */
2924       acost = 0;
2925       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2926       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2927
2928       for (i = 0; i < 16; i++)
2929         {
2930           sym_p = i & 1;
2931           var_p = (i >> 1) & 1;
2932           off_p = (i >> 2) & 1;
2933           rat_p = (i >> 3) & 1;
2934
2935           addr = reg0;
2936           if (rat_p)
2937             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2938                                    gen_int_mode (rat[mem_mode], Pmode));
2939
2940           if (var_p)
2941             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2942
2943           if (sym_p)
2944             {
2945               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2946               if (off_p)
2947                 base = gen_rtx_fmt_e (CONST, Pmode,
2948                                       gen_rtx_fmt_ee (PLUS, Pmode,
2949                                                       base,
2950                                                       gen_int_mode (off[mem_mode],
2951                                                                     Pmode)));
2952             }
2953           else if (off_p)
2954             base = gen_int_mode (off[mem_mode], Pmode);
2955           else
2956             base = NULL_RTX;
2957
2958           if (base)
2959             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2960
2961           start_sequence ();
2962           /* To avoid splitting addressing modes, pretend that no cse will
2963              follow.  */
2964           old_cse_not_expected = cse_not_expected;
2965           cse_not_expected = true;
2966           addr = memory_address (mem_mode, addr);
2967           cse_not_expected = old_cse_not_expected;
2968           seq = get_insns ();
2969           end_sequence ();
2970
2971           acost = seq_cost (seq);
2972           acost += address_cost (addr, mem_mode);
2973
2974           if (!acost)
2975             acost = 1;
2976           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2977         }
2978
2979       /* On some targets, it is quite expensive to load symbol to a register,
2980          which makes addresses that contain symbols look much more expensive.
2981          However, the symbol will have to be loaded in any case before the
2982          loop (and quite likely we have it in register already), so it does not
2983          make much sense to penalize them too heavily.  So make some final
2984          tweaks for the SYMBOL_PRESENT modes:
2985
2986          If VAR_PRESENT is false, and the mode obtained by changing symbol to
2987          var is cheaper, use this mode with small penalty.
2988          If VAR_PRESENT is true, try whether the mode with
2989          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
2990          if this is the case, use it.  */
2991       add_c = add_cost (Pmode);
2992       for (i = 0; i < 8; i++)
2993         {
2994           var_p = i & 1;
2995           off_p = (i >> 1) & 1;
2996           rat_p = (i >> 2) & 1;
2997
2998           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
2999           if (var_p)
3000             acost += add_c;
3001
3002           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3003             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3004         }
3005
3006       if (dump_file && (dump_flags & TDF_DETAILS))
3007         {
3008           fprintf (dump_file, "Address costs:\n");
3009
3010           for (i = 0; i < 16; i++)
3011             {
3012               sym_p = i & 1;
3013               var_p = (i >> 1) & 1;
3014               off_p = (i >> 2) & 1;
3015               rat_p = (i >> 3) & 1;
3016
3017               fprintf (dump_file, "  ");
3018               if (sym_p)
3019                 fprintf (dump_file, "sym + ");
3020               if (var_p)
3021                 fprintf (dump_file, "var + ");
3022               if (off_p)
3023                 fprintf (dump_file, "cst + ");
3024               if (rat_p)
3025                 fprintf (dump_file, "rat * ");
3026
3027               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3028               fprintf (dump_file, "index costs %d\n", acost);
3029             }
3030           fprintf (dump_file, "\n");
3031         }
3032     }
3033
3034   bits = GET_MODE_BITSIZE (Pmode);
3035   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3036   offset &= mask;
3037   if ((offset >> (bits - 1) & 1))
3038     offset |= ~mask;
3039   s_offset = offset;
3040
3041   cost = 0;
3042   offset_p = (s_offset != 0
3043               && min_offset[mem_mode] <= s_offset
3044               && s_offset <= max_offset[mem_mode]);
3045   ratio_p = (ratio != 1
3046              && multiplier_allowed_in_address_p (ratio, mem_mode));
3047
3048   if (ratio != 1 && !ratio_p)
3049     cost += multiply_by_cost (ratio, Pmode);
3050
3051   if (s_offset && !offset_p && !symbol_present)
3052     cost += add_cost (Pmode);
3053
3054   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3055   return cost + acost;
3056 }
3057
3058 /* Estimates cost of forcing expression EXPR into a variable.  */
3059
3060 unsigned
3061 force_expr_to_var_cost (tree expr)
3062 {
3063   static bool costs_initialized = false;
3064   static unsigned integer_cost;
3065   static unsigned symbol_cost;
3066   static unsigned address_cost;
3067   tree op0, op1;
3068   unsigned cost0, cost1, cost;
3069   enum machine_mode mode;
3070
3071   if (!costs_initialized)
3072     {
3073       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3074       rtx x = gen_rtx_MEM (DECL_MODE (var),
3075                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3076       tree addr;
3077       tree type = build_pointer_type (integer_type_node);
3078
3079       integer_cost = computation_cost (build_int_cst (integer_type_node,
3080                                                       2000));
3081
3082       SET_DECL_RTL (var, x);
3083       TREE_STATIC (var) = 1;
3084       addr = build1 (ADDR_EXPR, type, var);
3085       symbol_cost = computation_cost (addr) + 1;
3086
3087       address_cost
3088         = computation_cost (build2 (PLUS_EXPR, type,
3089                                     addr,
3090                                     build_int_cst (type, 2000))) + 1;
3091       if (dump_file && (dump_flags & TDF_DETAILS))
3092         {
3093           fprintf (dump_file, "force_expr_to_var_cost:\n");
3094           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3095           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3096           fprintf (dump_file, "  address %d\n", (int) address_cost);
3097           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3098           fprintf (dump_file, "\n");
3099         }
3100
3101       costs_initialized = true;
3102     }
3103
3104   STRIP_NOPS (expr);
3105
3106   if (SSA_VAR_P (expr))
3107     return 0;
3108
3109   if (TREE_INVARIANT (expr))
3110     {
3111       if (TREE_CODE (expr) == INTEGER_CST)
3112         return integer_cost;
3113
3114       if (TREE_CODE (expr) == ADDR_EXPR)
3115         {
3116           tree obj = TREE_OPERAND (expr, 0);
3117
3118           if (TREE_CODE (obj) == VAR_DECL
3119               || TREE_CODE (obj) == PARM_DECL
3120               || TREE_CODE (obj) == RESULT_DECL)
3121             return symbol_cost;
3122         }
3123
3124       return address_cost;
3125     }
3126
3127   switch (TREE_CODE (expr))
3128     {
3129     case PLUS_EXPR:
3130     case MINUS_EXPR:
3131     case MULT_EXPR:
3132       op0 = TREE_OPERAND (expr, 0);
3133       op1 = TREE_OPERAND (expr, 1);
3134       STRIP_NOPS (op0);
3135       STRIP_NOPS (op1);
3136
3137       if (is_gimple_val (op0))
3138         cost0 = 0;
3139       else
3140         cost0 = force_expr_to_var_cost (op0);
3141
3142       if (is_gimple_val (op1))
3143         cost1 = 0;
3144       else
3145         cost1 = force_expr_to_var_cost (op1);
3146
3147       break;
3148
3149     default:
3150       /* Just an arbitrary value, FIXME.  */
3151       return target_spill_cost;
3152     }
3153
3154   mode = TYPE_MODE (TREE_TYPE (expr));
3155   switch (TREE_CODE (expr))
3156     {
3157     case PLUS_EXPR:
3158     case MINUS_EXPR:
3159       cost = add_cost (mode);
3160       break;
3161
3162     case MULT_EXPR:
3163       if (cst_and_fits_in_hwi (op0))
3164         cost = multiply_by_cost (int_cst_value (op0), mode);
3165       else if (cst_and_fits_in_hwi (op1))
3166         cost = multiply_by_cost (int_cst_value (op1), mode);
3167       else
3168         return target_spill_cost;
3169       break;
3170
3171     default:
3172       gcc_unreachable ();
3173     }
3174
3175   cost += cost0;
3176   cost += cost1;
3177
3178   /* Bound the cost by target_spill_cost.  The parts of complicated
3179      computations often are either loop invariant or at least can
3180      be shared between several iv uses, so letting this grow without
3181      limits would not give reasonable results.  */
3182   return cost < target_spill_cost ? cost : target_spill_cost;
3183 }
3184
3185 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3186    invariants the computation depends on.  */
3187
3188 static unsigned
3189 force_var_cost (struct ivopts_data *data,
3190                 tree expr, bitmap *depends_on)
3191 {
3192   if (depends_on)
3193     {
3194       fd_ivopts_data = data;
3195       walk_tree (&expr, find_depends, depends_on, NULL);
3196     }
3197
3198   return force_expr_to_var_cost (expr);
3199 }
3200
3201 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3202    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3203    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3204    invariants the computation depends on.  */
3205
3206 static unsigned
3207 split_address_cost (struct ivopts_data *data,
3208                     tree addr, bool *symbol_present, bool *var_present,
3209                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3210 {
3211   tree core;
3212   HOST_WIDE_INT bitsize;
3213   HOST_WIDE_INT bitpos;
3214   tree toffset;
3215   enum machine_mode mode;
3216   int unsignedp, volatilep;
3217
3218   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3219                               &unsignedp, &volatilep, false);
3220
3221   if (toffset != 0
3222       || bitpos % BITS_PER_UNIT != 0
3223       || TREE_CODE (core) != VAR_DECL)
3224     {
3225       *symbol_present = false;
3226       *var_present = true;
3227       fd_ivopts_data = data;
3228       walk_tree (&addr, find_depends, depends_on, NULL);
3229       return target_spill_cost;
3230     }
3231
3232   *offset += bitpos / BITS_PER_UNIT;
3233   if (TREE_STATIC (core)
3234       || DECL_EXTERNAL (core))
3235     {
3236       *symbol_present = true;
3237       *var_present = false;
3238       return 0;
3239     }
3240
3241   *symbol_present = false;
3242   *var_present = true;
3243   return 0;
3244 }
3245
3246 /* Estimates cost of expressing difference of addresses E1 - E2 as
3247    var + symbol + offset.  The value of offset is added to OFFSET,
3248    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3249    part is missing.  DEPENDS_ON is a set of the invariants the computation
3250    depends on.  */
3251
3252 static unsigned
3253 ptr_difference_cost (struct ivopts_data *data,
3254                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3255                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3256 {
3257   HOST_WIDE_INT diff = 0;
3258   unsigned cost;
3259
3260   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3261
3262   if (ptr_difference_const (e1, e2, &diff))
3263     {
3264       *offset += diff;
3265       *symbol_present = false;
3266       *var_present = false;
3267       return 0;
3268     }
3269
3270   if (e2 == integer_zero_node)
3271     return split_address_cost (data, TREE_OPERAND (e1, 0),
3272                                symbol_present, var_present, offset, depends_on);
3273
3274   *symbol_present = false;
3275   *var_present = true;
3276
3277   cost = force_var_cost (data, e1, depends_on);
3278   cost += force_var_cost (data, e2, depends_on);
3279   cost += add_cost (Pmode);
3280
3281   return cost;
3282 }
3283
3284 /* Estimates cost of expressing difference E1 - E2 as
3285    var + symbol + offset.  The value of offset is added to OFFSET,
3286    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3287    part is missing.  DEPENDS_ON is a set of the invariants the computation
3288    depends on.  */
3289
3290 static unsigned
3291 difference_cost (struct ivopts_data *data,
3292                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3293                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3294 {
3295   unsigned cost;
3296   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3297   unsigned HOST_WIDE_INT off1, off2;
3298
3299   e1 = strip_offset (e1, &off1);
3300   e2 = strip_offset (e2, &off2);
3301   *offset += off1 - off2;
3302
3303   STRIP_NOPS (e1);
3304   STRIP_NOPS (e2);
3305
3306   if (TREE_CODE (e1) == ADDR_EXPR)
3307     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3308                                 depends_on);
3309   *symbol_present = false;
3310
3311   if (operand_equal_p (e1, e2, 0))
3312     {
3313       *var_present = false;
3314       return 0;
3315     }
3316   *var_present = true;
3317   if (integer_zerop (e2))
3318     return force_var_cost (data, e1, depends_on);
3319
3320   if (integer_zerop (e1))
3321     {
3322       cost = force_var_cost (data, e2, depends_on);
3323       cost += multiply_by_cost (-1, mode);
3324
3325       return cost;
3326     }
3327
3328   cost = force_var_cost (data, e1, depends_on);
3329   cost += force_var_cost (data, e2, depends_on);
3330   cost += add_cost (mode);
3331
3332   return cost;
3333 }
3334
3335 /* Determines the cost of the computation by that USE is expressed
3336    from induction variable CAND.  If ADDRESS_P is true, we just need
3337    to create an address from it, otherwise we want to get it into
3338    register.  A set of invariants we depend on is stored in
3339    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3340
3341 static unsigned
3342 get_computation_cost_at (struct ivopts_data *data,
3343                          struct iv_use *use, struct iv_cand *cand,
3344                          bool address_p, bitmap *depends_on, tree at)
3345 {
3346   tree ubase = use->iv->base, ustep = use->iv->step;
3347   tree cbase, cstep;
3348   tree utype = TREE_TYPE (ubase), ctype;
3349   unsigned HOST_WIDE_INT cstepi, offset = 0;
3350   HOST_WIDE_INT ratio, aratio;
3351   bool var_present, symbol_present;
3352   unsigned cost = 0, n_sums;
3353   double_int rat;
3354
3355   *depends_on = NULL;
3356
3357   /* Only consider real candidates.  */
3358   if (!cand->iv)
3359     return INFTY;
3360
3361   cbase = cand->iv->base;
3362   cstep = cand->iv->step;
3363   ctype = TREE_TYPE (cbase);
3364
3365   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3366     {
3367       /* We do not have a precision to express the values of use.  */
3368       return INFTY;
3369     }
3370
3371   if (address_p)
3372     {
3373       /* Do not try to express address of an object with computation based
3374          on address of a different object.  This may cause problems in rtl
3375          level alias analysis (that does not expect this to be happening,
3376          as this is illegal in C), and would be unlikely to be useful
3377          anyway.  */
3378       if (use->iv->base_object
3379           && cand->iv->base_object
3380           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3381         return INFTY;
3382     }
3383
3384   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3385     {
3386       /* TODO -- add direct handling of this case.  */
3387       goto fallback;
3388     }
3389
3390   /* CSTEPI is removed from the offset in case statement is after the
3391      increment.  If the step is not constant, we use zero instead.
3392      This is a bit imprecise (there is the extra addition), but
3393      redundancy elimination is likely to transform the code so that
3394      it uses value of the variable before increment anyway,
3395      so it is not that much unrealistic.  */
3396   if (cst_and_fits_in_hwi (cstep))
3397     cstepi = int_cst_value (cstep);
3398   else
3399     cstepi = 0;
3400
3401   if (!constant_multiple_of (ustep, cstep, &rat))
3402     return INFTY;
3403
3404   if (double_int_fits_in_shwi_p (rat))
3405     ratio = double_int_to_shwi (rat);
3406   else
3407     return INFTY;
3408
3409   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3410      or ratio == 1, it is better to handle this like
3411
3412      ubase - ratio * cbase + ratio * var
3413
3414      (also holds in the case ratio == -1, TODO.  */
3415
3416   if (cst_and_fits_in_hwi (cbase))
3417     {
3418       offset = - ratio * int_cst_value (cbase);
3419       cost += difference_cost (data,
3420                                ubase, integer_zero_node,
3421                                &symbol_present, &var_present, &offset,
3422                                depends_on);
3423     }
3424   else if (ratio == 1)
3425     {
3426       cost += difference_cost (data,
3427                                ubase, cbase,
3428                                &symbol_present, &var_present, &offset,
3429                                depends_on);
3430     }
3431   else
3432     {
3433       cost += force_var_cost (data, cbase, depends_on);
3434       cost += add_cost (TYPE_MODE (ctype));
3435       cost += difference_cost (data,
3436                                ubase, integer_zero_node,
3437                                &symbol_present, &var_present, &offset,
3438                                depends_on);
3439     }
3440
3441   /* If we are after the increment, the value of the candidate is higher by
3442      one iteration.  */
3443   if (stmt_after_increment (data->current_loop, cand, at))
3444     offset -= ratio * cstepi;
3445
3446   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3447      (symbol/var/const parts may be omitted).  If we are looking for an address,
3448      find the cost of addressing this.  */
3449   if (address_p)
3450     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3451                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3452
3453   /* Otherwise estimate the costs for computing the expression.  */
3454   aratio = ratio > 0 ? ratio : -ratio;
3455   if (!symbol_present && !var_present && !offset)
3456     {
3457       if (ratio != 1)
3458         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3459
3460       return cost;
3461     }
3462
3463   if (aratio != 1)
3464     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3465
3466   n_sums = 1;
3467   if (var_present
3468       /* Symbol + offset should be compile-time computable.  */
3469       && (symbol_present || offset))
3470     n_sums++;
3471
3472   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3473
3474 fallback:
3475   {
3476     /* Just get the expression, expand it and measure the cost.  */
3477     tree comp = get_computation_at (data->current_loop, use, cand, at);
3478
3479     if (!comp)
3480       return INFTY;
3481
3482     if (address_p)
3483       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3484
3485     return computation_cost (comp);
3486   }
3487 }
3488
3489 /* Determines the cost of the computation by that USE is expressed
3490    from induction variable CAND.  If ADDRESS_P is true, we just need
3491    to create an address from it, otherwise we want to get it into
3492    register.  A set of invariants we depend on is stored in
3493    DEPENDS_ON.  */
3494
3495 static unsigned
3496 get_computation_cost (struct ivopts_data *data,
3497                       struct iv_use *use, struct iv_cand *cand,
3498                       bool address_p, bitmap *depends_on)
3499 {
3500   return get_computation_cost_at (data,
3501                                   use, cand, address_p, depends_on, use->stmt);
3502 }
3503
3504 /* Determines cost of basing replacement of USE on CAND in a generic
3505    expression.  */
3506
3507 static bool
3508 determine_use_iv_cost_generic (struct ivopts_data *data,
3509                                struct iv_use *use, struct iv_cand *cand)
3510 {
3511   bitmap depends_on;
3512   unsigned cost;
3513
3514   /* The simple case first -- if we need to express value of the preserved
3515      original biv, the cost is 0.  This also prevents us from counting the
3516      cost of increment twice -- once at this use and once in the cost of
3517      the candidate.  */
3518   if (cand->pos == IP_ORIGINAL
3519       && cand->incremented_at == use->stmt)
3520     {
3521       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3522       return true;
3523     }
3524
3525   cost = get_computation_cost (data, use, cand, false, &depends_on);
3526   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3527
3528   return cost != INFTY;
3529 }
3530
3531 /* Determines cost of basing replacement of USE on CAND in an address.  */
3532
3533 static bool
3534 determine_use_iv_cost_address (struct ivopts_data *data,
3535                                struct iv_use *use, struct iv_cand *cand)
3536 {
3537   bitmap depends_on;
3538   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3539
3540   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3541
3542   return cost != INFTY;
3543 }
3544
3545 /* Computes value of candidate CAND at position AT in iteration NITER, and
3546    stores it to VAL.  */
3547
3548 static void
3549 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3550                aff_tree *val)
3551 {
3552   aff_tree step, delta, nit;
3553   struct iv *iv = cand->iv;
3554   tree type = TREE_TYPE (iv->base);
3555
3556   tree_to_aff_combination (iv->step, type, &step);
3557   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3558   aff_combination_convert (&nit, type);
3559   aff_combination_mult (&nit, &step, &delta);
3560   if (stmt_after_increment (loop, cand, at))
3561     aff_combination_add (&delta, &step);
3562
3563   tree_to_aff_combination (iv->base, type, val);
3564   aff_combination_add (val, &delta);
3565 }
3566
3567 /* Returns period of induction variable iv.  */
3568
3569 static tree
3570 iv_period (struct iv *iv)
3571 {
3572   tree step = iv->step, period, type;
3573   tree pow2div;
3574
3575   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3576
3577   /* Period of the iv is gcd (step, type range).  Since type range is power
3578      of two, it suffices to determine the maximum power of two that divides
3579      step.  */
3580   pow2div = num_ending_zeros (step);
3581   type = unsigned_type_for (TREE_TYPE (step));
3582
3583   period = build_low_bits_mask (type,
3584                                 (TYPE_PRECISION (type)
3585                                  - tree_low_cst (pow2div, 1)));
3586
3587   return period;
3588 }
3589
3590 /* Returns the comparison operator used when eliminating the iv USE.  */
3591
3592 static enum tree_code
3593 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3594 {
3595   struct loop *loop = data->current_loop;
3596   basic_block ex_bb;
3597   edge exit;
3598
3599   ex_bb = bb_for_stmt (use->stmt);
3600   exit = EDGE_SUCC (ex_bb, 0);
3601   if (flow_bb_inside_loop_p (loop, exit->dest))
3602     exit = EDGE_SUCC (ex_bb, 1);
3603
3604   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3605 }
3606
3607 /* Check whether it is possible to express the condition in USE by comparison
3608    of candidate CAND.  If so, store the value compared with to BOUND.  */
3609
3610 static bool
3611 may_eliminate_iv (struct ivopts_data *data,
3612                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3613 {
3614   basic_block ex_bb;
3615   edge exit;
3616   tree nit, nit_type;
3617   tree wider_type, period, per_type;
3618   struct loop *loop = data->current_loop;
3619   aff_tree bnd;
3620
3621   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3622     return false;
3623
3624   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3625      for other conditions inside loop body.  */
3626   ex_bb = bb_for_stmt (use->stmt);
3627   if (use->stmt != last_stmt (ex_bb)
3628       || TREE_CODE (use->stmt) != COND_EXPR)
3629     return false;
3630   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3631     return false;
3632
3633   exit = EDGE_SUCC (ex_bb, 0);
3634   if (flow_bb_inside_loop_p (loop, exit->dest))
3635     exit = EDGE_SUCC (ex_bb, 1);
3636   if (flow_bb_inside_loop_p (loop, exit->dest))
3637     return false;
3638
3639   nit = niter_for_exit (data, exit);
3640   if (!nit)
3641     return false;
3642
3643   nit_type = TREE_TYPE (nit);
3644
3645   /* Determine whether we may use the variable to test whether niter iterations
3646      elapsed.  This is the case iff the period of the induction variable is
3647      greater than the number of iterations.  */
3648   period = iv_period (cand->iv);
3649   if (!period)
3650     return false;
3651   per_type = TREE_TYPE (period);
3652
3653   wider_type = TREE_TYPE (period);
3654   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3655     wider_type = per_type;
3656   else
3657     wider_type = nit_type;
3658
3659   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3660                                       fold_convert (wider_type, period),
3661                                       fold_convert (wider_type, nit))))
3662     return false;
3663
3664   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3665   *bound = aff_combination_to_tree (&bnd);
3666   return true;
3667 }
3668
3669 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3670
3671 static bool
3672 determine_use_iv_cost_condition (struct ivopts_data *data,
3673                                  struct iv_use *use, struct iv_cand *cand)
3674 {
3675   tree bound = NULL_TREE, op, cond;
3676   bitmap depends_on = NULL;
3677   unsigned cost;
3678
3679   /* Only consider real candidates.  */
3680   if (!cand->iv)
3681     {
3682       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3683       return false;
3684     }
3685
3686   if (may_eliminate_iv (data, use, cand, &bound))
3687     {
3688       cost = force_var_cost (data, bound, &depends_on);
3689
3690       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3691       return cost != INFTY;
3692     }
3693
3694   /* The induction variable elimination failed; just express the original
3695      giv.  If it is compared with an invariant, note that we cannot get
3696      rid of it.  */
3697   cost = get_computation_cost (data, use, cand, false, &depends_on);
3698
3699   cond = *use->op_p;
3700   if (TREE_CODE (cond) != SSA_NAME)
3701     {
3702       op = TREE_OPERAND (cond, 0);
3703       if (TREE_CODE (op) == SSA_NAME
3704           && !integer_zerop (get_iv (data, op)->step))
3705         op = TREE_OPERAND (cond, 1);
3706       if (TREE_CODE (op) == SSA_NAME)
3707         {
3708           op = get_iv (data, op)->base;
3709           fd_ivopts_data = data;
3710           walk_tree (&op, find_depends, &depends_on, NULL);
3711         }
3712     }
3713
3714   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3715   return cost != INFTY;
3716 }
3717
3718 /* Determines cost of basing replacement of USE on CAND.  Returns false
3719    if USE cannot be based on CAND.  */
3720
3721 static bool
3722 determine_use_iv_cost (struct ivopts_data *data,
3723                        struct iv_use *use, struct iv_cand *cand)
3724 {
3725   switch (use->type)
3726     {
3727     case USE_NONLINEAR_EXPR:
3728       return determine_use_iv_cost_generic (data, use, cand);
3729
3730     case USE_ADDRESS:
3731       return determine_use_iv_cost_address (data, use, cand);
3732
3733     case USE_COMPARE:
3734       return determine_use_iv_cost_condition (data, use, cand);
3735
3736     default:
3737       gcc_unreachable ();
3738     }
3739 }
3740
3741 /* Determines costs of basing the use of the iv on an iv candidate.  */
3742
3743 static void
3744 determine_use_iv_costs (struct ivopts_data *data)
3745 {
3746   unsigned i, j;
3747   struct iv_use *use;
3748   struct iv_cand *cand;
3749   bitmap to_clear = BITMAP_ALLOC (NULL);
3750
3751   alloc_use_cost_map (data);
3752
3753   for (i = 0; i < n_iv_uses (data); i++)
3754     {
3755       use = iv_use (data, i);
3756
3757       if (data->consider_all_candidates)
3758         {
3759           for (j = 0; j < n_iv_cands (data); j++)
3760             {
3761               cand = iv_cand (data, j);
3762               determine_use_iv_cost (data, use, cand);
3763             }
3764         }
3765       else
3766         {
3767           bitmap_iterator bi;
3768
3769           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3770             {
3771               cand = iv_cand (data, j);
3772               if (!determine_use_iv_cost (data, use, cand))
3773                 bitmap_set_bit (to_clear, j);
3774             }
3775
3776           /* Remove the candidates for that the cost is infinite from
3777              the list of related candidates.  */
3778           bitmap_and_compl_into (use->related_cands, to_clear);
3779           bitmap_clear (to_clear);
3780         }
3781     }
3782
3783   BITMAP_FREE (to_clear);
3784
3785   if (dump_file && (dump_flags & TDF_DETAILS))
3786     {
3787       fprintf (dump_file, "Use-candidate costs:\n");
3788
3789       for (i = 0; i < n_iv_uses (data); i++)
3790         {
3791           use = iv_use (data, i);
3792
3793           fprintf (dump_file, "Use %d:\n", i);
3794           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3795           for (j = 0; j < use->n_map_members; j++)
3796             {
3797               if (!use->cost_map[j].cand
3798                   || use->cost_map[j].cost == INFTY)
3799                 continue;
3800
3801               fprintf (dump_file, "  %d\t%d\t",
3802                        use->cost_map[j].cand->id,
3803                        use->cost_map[j].cost);
3804               if (use->cost_map[j].depends_on)
3805                 bitmap_print (dump_file,
3806                               use->cost_map[j].depends_on, "","");
3807               fprintf (dump_file, "\n");
3808             }
3809
3810           fprintf (dump_file, "\n");
3811         }
3812       fprintf (dump_file, "\n");
3813     }
3814 }
3815
3816 /* Determines cost of the candidate CAND.  */
3817
3818 static void
3819 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3820 {
3821   unsigned cost_base, cost_step;
3822   tree base;
3823
3824   if (!cand->iv)
3825     {
3826       cand->cost = 0;
3827       return;
3828     }
3829
3830   /* There are two costs associated with the candidate -- its increment
3831      and its initialization.  The second is almost negligible for any loop
3832      that rolls enough, so we take it just very little into account.  */
3833
3834   base = cand->iv->base;
3835   cost_base = force_var_cost (data, base, NULL);
3836   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3837
3838   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3839
3840   /* Prefer the original iv unless we may gain something by replacing it;
3841      this is not really relevant for artificial ivs created by other
3842      passes.  */
3843   if (cand->pos == IP_ORIGINAL
3844       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3845     cand->cost--;
3846
3847   /* Prefer not to insert statements into latch unless there are some
3848      already (so that we do not create unnecessary jumps).  */
3849   if (cand->pos == IP_END
3850       && empty_block_p (ip_end_pos (data->current_loop)))
3851     cand->cost++;
3852 }
3853
3854 /* Determines costs of computation of the candidates.  */
3855
3856 static void
3857 determine_iv_costs (struct ivopts_data *data)
3858 {
3859   unsigned i;
3860
3861   if (dump_file && (dump_flags & TDF_DETAILS))
3862     {
3863       fprintf (dump_file, "Candidate costs:\n");
3864       fprintf (dump_file, "  cand\tcost\n");
3865     }
3866
3867   for (i = 0; i < n_iv_cands (data); i++)
3868     {
3869       struct iv_cand *cand = iv_cand (data, i);
3870
3871       determine_iv_cost (data, cand);
3872
3873       if (dump_file && (dump_flags & TDF_DETAILS))
3874         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3875     }
3876
3877   if (dump_file && (dump_flags & TDF_DETAILS))
3878     fprintf (dump_file, "\n");
3879 }
3880
3881 /* Calculates cost for having SIZE induction variables.  */
3882
3883 static unsigned
3884 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3885 {
3886   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3887 }
3888
3889 /* For each size of the induction variable set determine the penalty.  */
3890
3891 static void
3892 determine_set_costs (struct ivopts_data *data)
3893 {
3894   unsigned j, n;
3895   tree phi, op;
3896   struct loop *loop = data->current_loop;
3897   bitmap_iterator bi;
3898
3899   /* We use the following model (definitely improvable, especially the
3900      cost function -- TODO):
3901
3902      We estimate the number of registers available (using MD data), name it A.
3903
3904      We estimate the number of registers used by the loop, name it U.  This
3905      number is obtained as the number of loop phi nodes (not counting virtual
3906      registers and bivs) + the number of variables from outside of the loop.
3907
3908      We set a reserve R (free regs that are used for temporary computations,
3909      etc.).  For now the reserve is a constant 3.
3910
3911      Let I be the number of induction variables.
3912
3913      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3914         make a lot of ivs without a reason).
3915      -- if A - R < U + I <= A, the cost is I * PRES_COST
3916      -- if U + I > A, the cost is I * PRES_COST and
3917         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3918
3919   if (dump_file && (dump_flags & TDF_DETAILS))
3920     {
3921       fprintf (dump_file, "Global costs:\n");
3922       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3923       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3924       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3925       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3926     }
3927
3928   n = 0;
3929   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3930     {
3931       op = PHI_RESULT (phi);
3932
3933       if (!is_gimple_reg (op))
3934         continue;
3935
3936       if (get_iv (data, op))
3937         continue;
3938
3939       n++;
3940     }
3941
3942   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3943     {
3944       struct version_info *info = ver_info (data, j);
3945
3946       if (info->inv_id && info->has_nonlin_use)
3947         n++;
3948     }
3949
3950   data->regs_used = n;
3951   if (dump_file && (dump_flags & TDF_DETAILS))
3952     fprintf (dump_file, "  regs_used %d\n", n);
3953
3954   if (dump_file && (dump_flags & TDF_DETAILS))
3955     {
3956       fprintf (dump_file, "  cost for size:\n");
3957       fprintf (dump_file, "  ivs\tcost\n");
3958       for (j = 0; j <= 2 * target_avail_regs; j++)
3959         fprintf (dump_file, "  %d\t%d\n", j,
3960                  ivopts_global_cost_for_size (data, j));
3961       fprintf (dump_file, "\n");
3962     }
3963 }
3964
3965 /* Returns true if A is a cheaper cost pair than B.  */
3966
3967 static bool
3968 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3969 {
3970   if (!a)
3971     return false;
3972
3973   if (!b)
3974     return true;
3975
3976   if (a->cost < b->cost)
3977     return true;
3978
3979   if (a->cost > b->cost)
3980     return false;
3981
3982   /* In case the costs are the same, prefer the cheaper candidate.  */
3983   if (a->cand->cost < b->cand->cost)
3984     return true;
3985
3986   return false;
3987 }
3988
3989 /* Computes the cost field of IVS structure.  */
3990
3991 static void
3992 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3993 {
3994   unsigned cost = 0;
3995
3996   cost += ivs->cand_use_cost;
3997   cost += ivs->cand_cost;
3998   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3999
4000   ivs->cost = cost;
4001 }
4002
4003 /* Remove invariants in set INVS to set IVS.  */
4004
4005 static void
4006 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4007 {
4008   bitmap_iterator bi;
4009   unsigned iid;
4010
4011   if (!invs)
4012     return;
4013
4014   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4015     {
4016       ivs->n_invariant_uses[iid]--;
4017       if (ivs->n_invariant_uses[iid] == 0)
4018         ivs->n_regs--;
4019     }
4020 }
4021
4022 /* Set USE not to be expressed by any candidate in IVS.  */
4023
4024 static void
4025 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4026                  struct iv_use *use)
4027 {
4028   unsigned uid = use->id, cid;
4029   struct cost_pair *cp;
4030
4031   cp = ivs->cand_for_use[uid];
4032   if (!cp)
4033     return;
4034   cid = cp->cand->id;
4035
4036   ivs->bad_uses++;
4037   ivs->cand_for_use[uid] = NULL;
4038   ivs->n_cand_uses[cid]--;
4039
4040   if (ivs->n_cand_uses[cid] == 0)
4041     {
4042       bitmap_clear_bit (ivs->cands, cid);
4043       /* Do not count the pseudocandidates.  */
4044       if (cp->cand->iv)
4045         ivs->n_regs--;
4046       ivs->n_cands--;
4047       ivs->cand_cost -= cp->cand->cost;
4048
4049       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4050     }
4051
4052   ivs->cand_use_cost -= cp->cost;
4053
4054   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4055   iv_ca_recount_cost (data, ivs);
4056 }
4057
4058 /* Add invariants in set INVS to set IVS.  */
4059
4060 static void
4061 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4062 {
4063   bitmap_iterator bi;
4064   unsigned iid;
4065
4066   if (!invs)
4067     return;
4068
4069   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4070     {
4071       ivs->n_invariant_uses[iid]++;
4072       if (ivs->n_invariant_uses[iid] == 1)
4073         ivs->n_regs++;
4074     }
4075 }
4076
4077 /* Set cost pair for USE in set IVS to CP.  */
4078
4079 static void
4080 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4081               struct iv_use *use, struct cost_pair *cp)
4082 {
4083   unsigned uid = use->id, cid;
4084
4085   if (ivs->cand_for_use[uid] == cp)
4086     return;
4087
4088   if (ivs->cand_for_use[uid])
4089     iv_ca_set_no_cp (data, ivs, use);
4090
4091   if (cp)
4092     {
4093       cid = cp->cand->id;
4094
4095       ivs->bad_uses--;
4096       ivs->cand_for_use[uid] = cp;
4097       ivs->n_cand_uses[cid]++;
4098       if (ivs->n_cand_uses[cid] == 1)
4099         {
4100           bitmap_set_bit (ivs->cands, cid);
4101           /* Do not count the pseudocandidates.  */
4102           if (cp->cand->iv)
4103             ivs->n_regs++;
4104           ivs->n_cands++;
4105           ivs->cand_cost += cp->cand->cost;
4106
4107           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4108         }
4109
4110       ivs->cand_use_cost += cp->cost;
4111       iv_ca_set_add_invariants (ivs, cp->depends_on);
4112       iv_ca_recount_cost (data, ivs);
4113     }
4114 }
4115
4116 /* Extend set IVS by expressing USE by some of the candidates in it
4117    if possible.  */
4118
4119 static void
4120 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4121                struct iv_use *use)
4122 {
4123   struct cost_pair *best_cp = NULL, *cp;
4124   bitmap_iterator bi;
4125   unsigned i;
4126
4127   gcc_assert (ivs->upto >= use->id);
4128
4129   if (ivs->upto == use->id)
4130     {
4131       ivs->upto++;
4132       ivs->bad_uses++;
4133     }
4134
4135   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4136     {
4137       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4138
4139       if (cheaper_cost_pair (cp, best_cp))
4140         best_cp = cp;
4141     }
4142
4143   iv_ca_set_cp (data, ivs, use, best_cp);
4144 }
4145
4146 /* Get cost for assignment IVS.  */
4147
4148 static unsigned
4149 iv_ca_cost (struct iv_ca *ivs)
4150 {
4151   return (ivs->bad_uses ? INFTY : ivs->cost);
4152 }
4153
4154 /* Returns true if all dependences of CP are among invariants in IVS.  */
4155
4156 static bool
4157 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4158 {
4159   unsigned i;
4160   bitmap_iterator bi;
4161
4162   if (!cp->depends_on)
4163     return true;
4164
4165   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4166     {
4167       if (ivs->n_invariant_uses[i] == 0)
4168         return false;
4169     }
4170
4171   return true;
4172 }
4173
4174 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4175    it before NEXT_CHANGE.  */
4176
4177 static struct iv_ca_delta *
4178 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4179                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4180 {
4181   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4182
4183   change->use = use;
4184   change->old_cp = old_cp;
4185   change->new_cp = new_cp;
4186   change->next_change = next_change;
4187
4188   return change;
4189 }
4190
4191 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4192    are rewritten.  */
4193
4194 static struct iv_ca_delta *
4195 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4196 {
4197   struct iv_ca_delta *last;
4198
4199   if (!l2)
4200     return l1;
4201
4202   if (!l1)
4203     return l2;
4204
4205   for (last = l1; last->next_change; last = last->next_change)
4206     continue;
4207   last->next_change = l2;
4208
4209   return l1;
4210 }
4211
4212 /* Returns candidate by that USE is expressed in IVS.  */
4213
4214 static struct cost_pair *
4215 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4216 {
4217   return ivs->cand_for_use[use->id];
4218 }
4219
4220 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4221
4222 static struct iv_ca_delta *
4223 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4224 {
4225   struct iv_ca_delta *act, *next, *prev = NULL;
4226   struct cost_pair *tmp;
4227
4228   for (act = delta; act; act = next)
4229     {
4230       next = act->next_change;
4231       act->next_change = prev;
4232       prev = act;
4233
4234       tmp = act->old_cp;
4235       act->old_cp = act->new_cp;
4236       act->new_cp = tmp;
4237     }
4238
4239   return prev;
4240 }
4241
4242 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4243    reverted instead.  */
4244
4245 static void
4246 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4247                     struct iv_ca_delta *delta, bool forward)
4248 {
4249   struct cost_pair *from, *to;
4250   struct iv_ca_delta *act;
4251
4252   if (!forward)
4253     delta = iv_ca_delta_reverse (delta);
4254
4255   for (act = delta; act; act = act->next_change)
4256     {
4257       from = act->old_cp;
4258       to = act->new_cp;
4259       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4260       iv_ca_set_cp (data, ivs, act->use, to);
4261     }
4262
4263   if (!forward)
4264     iv_ca_delta_reverse (delta);
4265 }
4266
4267 /* Returns true if CAND is used in IVS.  */
4268
4269 static bool
4270 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4271 {
4272   return ivs->n_cand_uses[cand->id] > 0;
4273 }
4274
4275 /* Returns number of induction variable candidates in the set IVS.  */
4276
4277 static unsigned
4278 iv_ca_n_cands (struct iv_ca *ivs)
4279 {
4280   return ivs->n_cands;
4281 }
4282
4283 /* Free the list of changes DELTA.  */
4284
4285 static void
4286 iv_ca_delta_free (struct iv_ca_delta **delta)
4287 {
4288   struct iv_ca_delta *act, *next;
4289
4290   for (act = *delta; act; act = next)
4291     {
4292       next = act->next_change;
4293       free (act);
4294     }
4295
4296   *delta = NULL;
4297 }
4298
4299 /* Allocates new iv candidates assignment.  */
4300
4301 static struct iv_ca *
4302 iv_ca_new (struct ivopts_data *data)
4303 {
4304   struct iv_ca *nw = XNEW (struct iv_ca);
4305
4306   nw->upto = 0;
4307   nw->bad_uses = 0;
4308   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4309   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4310   nw->cands = BITMAP_ALLOC (NULL);
4311   nw->n_cands = 0;
4312   nw->n_regs = 0;
4313   nw->cand_use_cost = 0;
4314   nw->cand_cost = 0;
4315   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4316   nw->cost = 0;
4317
4318   return nw;
4319 }
4320
4321 /* Free memory occupied by the set IVS.  */
4322
4323 static void
4324 iv_ca_free (struct iv_ca **ivs)
4325 {
4326   free ((*ivs)->cand_for_use);
4327   free ((*ivs)->n_cand_uses);
4328   BITMAP_FREE ((*ivs)->cands);
4329   free ((*ivs)->n_invariant_uses);
4330   free (*ivs);
4331   *ivs = NULL;
4332 }
4333
4334 /* Dumps IVS to FILE.  */
4335
4336 static void
4337 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4338 {
4339   const char *pref = "  invariants ";
4340   unsigned i;
4341
4342   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4343   bitmap_print (file, ivs->cands, "  candidates ","\n");
4344
4345   for (i = 1; i <= data->max_inv_id; i++)
4346     if (ivs->n_invariant_uses[i])
4347       {
4348         fprintf (file, "%s%d", pref, i);
4349         pref = ", ";
4350       }
4351   fprintf (file, "\n");
4352 }
4353
4354 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4355    new set, and store differences in DELTA.  Number of induction variables
4356    in the new set is stored to N_IVS.  */
4357
4358 static unsigned
4359 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4360               struct iv_cand *cand, struct iv_ca_delta **delta,
4361               unsigned *n_ivs)
4362 {
4363   unsigned i, cost;
4364   struct iv_use *use;
4365   struct cost_pair *old_cp, *new_cp;
4366
4367   *delta = NULL;
4368   for (i = 0; i < ivs->upto; i++)
4369     {
4370       use = iv_use (data, i);
4371       old_cp = iv_ca_cand_for_use (ivs, use);
4372
4373       if (old_cp
4374           && old_cp->cand == cand)
4375         continue;
4376
4377       new_cp = get_use_iv_cost (data, use, cand);
4378       if (!new_cp)
4379         continue;
4380
4381       if (!iv_ca_has_deps (ivs, new_cp))
4382         continue;
4383
4384       if (!cheaper_cost_pair (new_cp, old_cp))
4385         continue;
4386
4387       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4388     }
4389
4390   iv_ca_delta_commit (data, ivs, *delta, true);
4391   cost = iv_ca_cost (ivs);
4392   if (n_ivs)
4393     *n_ivs = iv_ca_n_cands (ivs);
4394   iv_ca_delta_commit (data, ivs, *delta, false);
4395
4396   return cost;
4397 }
4398
4399 /* Try narrowing set IVS by removing CAND.  Return the cost of
4400    the new set and store the differences in DELTA.  */
4401
4402 static unsigned
4403 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4404               struct iv_cand *cand, struct iv_ca_delta **delta)
4405 {
4406   unsigned i, ci;
4407   struct iv_use *use;
4408   struct cost_pair *old_cp, *new_cp, *cp;
4409   bitmap_iterator bi;
4410   struct iv_cand *cnd;
4411   unsigned cost;
4412
4413   *delta = NULL;
4414   for (i = 0; i < n_iv_uses (data); i++)
4415     {
4416       use = iv_use (data, i);
4417
4418       old_cp = iv_ca_cand_for_use (ivs, use);
4419       if (old_cp->cand != cand)
4420         continue;
4421
4422       new_cp = NULL;
4423
4424       if (data->consider_all_candidates)
4425         {
4426           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4427             {
4428               if (ci == cand->id)
4429                 continue;
4430
4431               cnd = iv_cand (data, ci);
4432
4433               cp = get_use_iv_cost (data, use, cnd);
4434               if (!cp)
4435                 continue;
4436               if (!iv_ca_has_deps (ivs, cp))
4437                 continue;
4438
4439               if (!cheaper_cost_pair (cp, new_cp))
4440                 continue;
4441
4442               new_cp = cp;
4443             }
4444         }
4445       else
4446         {
4447           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4448             {
4449               if (ci == cand->id)
4450                 continue;
4451
4452               cnd = iv_cand (data, ci);
4453
4454               cp = get_use_iv_cost (data, use, cnd);
4455               if (!cp)
4456                 continue;
4457               if (!iv_ca_has_deps (ivs, cp))
4458                 continue;
4459
4460               if (!cheaper_cost_pair (cp, new_cp))
4461                 continue;
4462
4463               new_cp = cp;
4464             }
4465         }
4466
4467       if (!new_cp)
4468         {
4469           iv_ca_delta_free (delta);
4470           return INFTY;
4471         }
4472
4473       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4474     }
4475
4476   iv_ca_delta_commit (data, ivs, *delta, true);
4477   cost = iv_ca_cost (ivs);
4478   iv_ca_delta_commit (data, ivs, *delta, false);
4479
4480   return cost;
4481 }
4482
4483 /* Try optimizing the set of candidates IVS by removing candidates different
4484    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4485    differences in DELTA.  */
4486
4487 static unsigned
4488 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4489              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4490 {
4491   bitmap_iterator bi;
4492   struct iv_ca_delta *act_delta, *best_delta;
4493   unsigned i, best_cost, acost;
4494   struct iv_cand *cand;
4495
4496   best_delta = NULL;
4497   best_cost = iv_ca_cost (ivs);
4498
4499   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4500     {
4501       cand = iv_cand (data, i);
4502
4503       if (cand == except_cand)
4504         continue;
4505
4506       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4507
4508       if (acost < best_cost)
4509         {
4510           best_cost = acost;
4511           iv_ca_delta_free (&best_delta);
4512           best_delta = act_delta;
4513         }
4514       else
4515         iv_ca_delta_free (&act_delta);
4516     }
4517
4518   if (!best_delta)
4519     {
4520       *delta = NULL;
4521       return best_cost;
4522     }
4523
4524   /* Recurse to possibly remove other unnecessary ivs.  */
4525   iv_ca_delta_commit (data, ivs, best_delta, true);
4526   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4527   iv_ca_delta_commit (data, ivs, best_delta, false);
4528   *delta = iv_ca_delta_join (best_delta, *delta);
4529   return best_cost;
4530 }
4531
4532 /* Tries to extend the sets IVS in the best possible way in order
4533    to express the USE.  */
4534
4535 static bool
4536 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4537                   struct iv_use *use)
4538 {
4539   unsigned best_cost, act_cost;
4540   unsigned i;
4541   bitmap_iterator bi;
4542   struct iv_cand *cand;
4543   struct iv_ca_delta *best_delta = NULL, *act_delta;
4544   struct cost_pair *cp;
4545
4546   iv_ca_add_use (data, ivs, use);
4547   best_cost = iv_ca_cost (ivs);
4548
4549   cp = iv_ca_cand_for_use (ivs, use);
4550   if (cp)
4551     {
4552       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4553       iv_ca_set_no_cp (data, ivs, use);
4554     }
4555
4556   /* First try important candidates.  Only if it fails, try the specific ones.
4557      Rationale -- in loops with many variables the best choice often is to use
4558      just one generic biv.  If we added here many ivs specific to the uses,
4559      the optimization algorithm later would be likely to get stuck in a local
4560      minimum, thus causing us to create too many ivs.  The approach from
4561      few ivs to more seems more likely to be successful -- starting from few
4562      ivs, replacing an expensive use by a specific iv should always be a
4563      win.  */
4564   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4565     {
4566       cand = iv_cand (data, i);
4567
4568       if (iv_ca_cand_used_p (ivs, cand))
4569         continue;
4570
4571       cp = get_use_iv_cost (data, use, cand);
4572       if (!cp)
4573         continue;
4574
4575       iv_ca_set_cp (data, ivs, use, cp);
4576       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4577       iv_ca_set_no_cp (data, ivs, use);
4578       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4579
4580       if (act_cost < best_cost)
4581         {
4582           best_cost = act_cost;
4583
4584           iv_ca_delta_free (&best_delta);
4585           best_delta = act_delta;
4586         }
4587       else
4588         iv_ca_delta_free (&act_delta);
4589     }
4590
4591   if (best_cost == INFTY)
4592     {
4593       for (i = 0; i < use->n_map_members; i++)
4594         {
4595           cp = use->cost_map + i;
4596           cand = cp->cand;
4597           if (!cand)
4598             continue;
4599
4600           /* Already tried this.  */
4601           if (cand->important)
4602             continue;
4603
4604           if (iv_ca_cand_used_p (ivs, cand))
4605             continue;
4606
4607           act_delta = NULL;
4608           iv_ca_set_cp (data, ivs, use, cp);
4609           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4610           iv_ca_set_no_cp (data, ivs, use);
4611           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4612                                        cp, act_delta);
4613
4614           if (act_cost < best_cost)
4615             {
4616               best_cost = act_cost;
4617
4618               if (best_delta)
4619                 iv_ca_delta_free (&best_delta);
4620               best_delta = act_delta;
4621             }
4622           else
4623             iv_ca_delta_free (&act_delta);
4624         }
4625     }
4626
4627   iv_ca_delta_commit (data, ivs, best_delta, true);
4628   iv_ca_delta_free (&best_delta);
4629
4630   return (best_cost != INFTY);
4631 }
4632
4633 /* Finds an initial assignment of candidates to uses.  */
4634
4635 static struct iv_ca *
4636 get_initial_solution (struct ivopts_data *data)
4637 {
4638   struct iv_ca *ivs = iv_ca_new (data);
4639   unsigned i;
4640
4641   for (i = 0; i < n_iv_uses (data); i++)
4642     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4643       {
4644         iv_ca_free (&ivs);
4645         return NULL;
4646       }
4647
4648   return ivs;
4649 }
4650
4651 /* Tries to improve set of induction variables IVS.  */
4652
4653 static bool
4654 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4655 {
4656   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4657   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4658   struct iv_cand *cand;
4659
4660   /* Try extending the set of induction variables by one.  */
4661   for (i = 0; i < n_iv_cands (data); i++)
4662     {
4663       cand = iv_cand (data, i);
4664
4665       if (iv_ca_cand_used_p (ivs, cand))
4666         continue;
4667
4668       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4669       if (!act_delta)
4670         continue;
4671
4672       /* If we successfully added the candidate and the set is small enough,
4673          try optimizing it by removing other candidates.  */
4674       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4675         {
4676           iv_ca_delta_commit (data, ivs, act_delta, true);
4677           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4678           iv_ca_delta_commit (data, ivs, act_delta, false);
4679           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4680         }
4681
4682       if (acost < best_cost)
4683         {
4684           best_cost = acost;
4685           iv_ca_delta_free (&best_delta);
4686           best_delta = act_delta;
4687         }
4688       else
4689         iv_ca_delta_free (&act_delta);
4690     }
4691
4692   if (!best_delta)
4693     {
4694       /* Try removing the candidates from the set instead.  */
4695       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4696
4697       /* Nothing more we can do.  */
4698       if (!best_delta)
4699         return false;
4700     }
4701
4702   iv_ca_delta_commit (data, ivs, best_delta, true);
4703   gcc_assert (best_cost == iv_ca_cost (ivs));
4704   iv_ca_delta_free (&best_delta);
4705   return true;
4706 }
4707
4708 /* Attempts to find the optimal set of induction variables.  We do simple
4709    greedy heuristic -- we try to replace at most one candidate in the selected
4710    solution and remove the unused ivs while this improves the cost.  */
4711
4712 static struct iv_ca *
4713 find_optimal_iv_set (struct ivopts_data *data)
4714 {
4715   unsigned i;
4716   struct iv_ca *set;
4717   struct iv_use *use;
4718
4719   /* Get the initial solution.  */
4720   set = get_initial_solution (data);
4721   if (!set)
4722     {
4723       if (dump_file && (dump_flags & TDF_DETAILS))
4724         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4725       return NULL;
4726     }
4727
4728   if (dump_file && (dump_flags & TDF_DETAILS))
4729     {
4730       fprintf (dump_file, "Initial set of candidates:\n");
4731       iv_ca_dump (data, dump_file, set);
4732     }
4733
4734   while (try_improve_iv_set (data, set))
4735     {
4736       if (dump_file && (dump_flags & TDF_DETAILS))
4737         {
4738           fprintf (dump_file, "Improved to:\n");
4739           iv_ca_dump (data, dump_file, set);
4740         }
4741     }
4742
4743   if (dump_file && (dump_flags & TDF_DETAILS))
4744     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4745
4746   for (i = 0; i < n_iv_uses (data); i++)
4747     {
4748       use = iv_use (data, i);
4749       use->selected = iv_ca_cand_for_use (set, use)->cand;
4750     }
4751
4752   return set;
4753 }
4754
4755 /* Creates a new induction variable corresponding to CAND.  */
4756
4757 static void
4758 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4759 {
4760   block_stmt_iterator incr_pos;
4761   tree base;
4762   bool after = false;
4763
4764   if (!cand->iv)
4765     return;
4766
4767   switch (cand->pos)
4768     {
4769     case IP_NORMAL:
4770       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4771       break;
4772
4773     case IP_END:
4774       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4775       after = true;
4776       break;
4777
4778     case IP_ORIGINAL:
4779       /* Mark that the iv is preserved.  */
4780       name_info (data, cand->var_before)->preserve_biv = true;
4781       name_info (data, cand->var_after)->preserve_biv = true;
4782
4783       /* Rewrite the increment so that it uses var_before directly.  */
4784       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4785
4786       return;
4787     }
4788
4789   gimple_add_tmp_var (cand->var_before);
4790   add_referenced_var (cand->var_before);
4791
4792   base = unshare_expr (cand->iv->base);
4793
4794   create_iv (base, unshare_expr (cand->iv->step),
4795              cand->var_before, data->current_loop,
4796              &incr_pos, after, &cand->var_before, &cand->var_after);
4797 }
4798
4799 /* Creates new induction variables described in SET.  */
4800
4801 static void
4802 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4803 {
4804   unsigned i;
4805   struct iv_cand *cand;
4806   bitmap_iterator bi;
4807
4808   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4809     {
4810       cand = iv_cand (data, i);
4811       create_new_iv (data, cand);
4812     }
4813 }
4814
4815 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4816    is true, remove also the ssa name defined by the statement.  */
4817
4818 static void
4819 remove_statement (tree stmt, bool including_defined_name)
4820 {
4821   if (TREE_CODE (stmt) == PHI_NODE)
4822     {
4823       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4824     }
4825   else
4826     {
4827       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4828
4829       bsi_remove (&bsi, true);
4830     }
4831 }
4832
4833 /* Rewrites USE (definition of iv used in a nonlinear expression)
4834    using candidate CAND.  */
4835
4836 static void
4837 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4838                             struct iv_use *use, struct iv_cand *cand)
4839 {
4840   tree comp;
4841   tree op, stmts, tgt, ass;
4842   block_stmt_iterator bsi, pbsi;
4843
4844   /* An important special case -- if we are asked to express value of
4845      the original iv by itself, just exit; there is no need to
4846      introduce a new computation (that might also need casting the
4847      variable to unsigned and back).  */
4848   if (cand->pos == IP_ORIGINAL
4849       && cand->incremented_at == use->stmt)
4850     {
4851       tree step, ctype, utype;
4852       enum tree_code incr_code = PLUS_EXPR;
4853
4854       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4855       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4856
4857       step = cand->iv->step;
4858       ctype = TREE_TYPE (step);
4859       utype = TREE_TYPE (cand->var_after);
4860       if (TREE_CODE (step) == NEGATE_EXPR)
4861         {
4862           incr_code = MINUS_EXPR;
4863           step = TREE_OPERAND (step, 0);
4864         }
4865
4866       /* Check whether we may leave the computation unchanged.
4867          This is the case only if it does not rely on other
4868          computations in the loop -- otherwise, the computation
4869          we rely upon may be removed in remove_unused_ivs,
4870          thus leading to ICE.  */
4871       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4872       if (TREE_CODE (op) == PLUS_EXPR
4873           || TREE_CODE (op) == MINUS_EXPR)
4874         {
4875           if (TREE_OPERAND (op, 0) == cand->var_before)
4876             op = TREE_OPERAND (op, 1);
4877           else if (TREE_CODE (op) == PLUS_EXPR
4878                    && TREE_OPERAND (op, 1) == cand->var_before)
4879             op = TREE_OPERAND (op, 0);
4880           else
4881             op = NULL_TREE;
4882         }
4883       else
4884         op = NULL_TREE;
4885
4886       if (op
4887           && (TREE_CODE (op) == INTEGER_CST
4888               || operand_equal_p (op, step, 0)))
4889         return;
4890
4891       /* Otherwise, add the necessary computations to express
4892          the iv.  */
4893       op = fold_convert (ctype, cand->var_before);
4894       comp = fold_convert (utype,
4895                            build2 (incr_code, ctype, op,
4896                                    unshare_expr (step)));
4897     }
4898   else
4899     {
4900       comp = get_computation (data->current_loop, use, cand);
4901       gcc_assert (comp != NULL_TREE);
4902     }
4903
4904   switch (TREE_CODE (use->stmt))
4905     {
4906     case PHI_NODE:
4907       tgt = PHI_RESULT (use->stmt);
4908
4909       /* If we should keep the biv, do not replace it.  */
4910       if (name_info (data, tgt)->preserve_biv)
4911         return;
4912
4913       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4914       while (!bsi_end_p (pbsi)
4915              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4916         {
4917           bsi = pbsi;
4918           bsi_next (&pbsi);
4919         }
4920       break;
4921
4922     case GIMPLE_MODIFY_STMT:
4923       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4924       bsi = bsi_for_stmt (use->stmt);
4925       break;
4926
4927     default:
4928       gcc_unreachable ();
4929     }
4930
4931   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4932
4933   if (TREE_CODE (use->stmt) == PHI_NODE)
4934     {
4935       if (stmts)
4936         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4937       ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4938       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4939       remove_statement (use->stmt, false);
4940       SSA_NAME_DEF_STMT (tgt) = ass;
4941     }
4942   else
4943     {
4944       if (stmts)
4945         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4946       GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4947     }
4948 }
4949
4950 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4951    for_each_index.  */
4952
4953 static bool
4954 idx_remove_ssa_names (tree base, tree *idx,
4955                       void *data ATTRIBUTE_UNUSED)
4956 {
4957   tree *op;
4958
4959   if (TREE_CODE (*idx) == SSA_NAME)
4960     *idx = SSA_NAME_VAR (*idx);
4961
4962   if (TREE_CODE (base) == ARRAY_REF)
4963     {
4964       op = &TREE_OPERAND (base, 2);
4965       if (*op
4966           && TREE_CODE (*op) == SSA_NAME)
4967         *op = SSA_NAME_VAR (*op);
4968       op = &TREE_OPERAND (base, 3);
4969       if (*op
4970           && TREE_CODE (*op) == SSA_NAME)
4971         *op = SSA_NAME_VAR (*op);
4972     }
4973
4974   return true;
4975 }
4976
4977 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4978
4979 static tree
4980 unshare_and_remove_ssa_names (tree ref)
4981 {
4982   ref = unshare_expr (ref);
4983   for_each_index (&ref, idx_remove_ssa_names, NULL);
4984
4985   return ref;
4986 }
4987
4988 /* Extract the alias analysis info for the memory reference REF.  There are
4989    several ways how this information may be stored and what precisely is
4990    its semantics depending on the type of the reference, but there always is
4991    somewhere hidden one _DECL node that is used to determine the set of
4992    virtual operands for the reference.  The code below deciphers this jungle
4993    and extracts this single useful piece of information.  */
4994
4995 static tree
4996 get_ref_tag (tree ref, tree orig)
4997 {
4998   tree var = get_base_address (ref);
4999   tree aref = NULL_TREE, tag, sv;
5000   HOST_WIDE_INT offset, size, maxsize;
5001
5002   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5003     {
5004       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5005       if (ref)
5006         break;
5007     }
5008
5009   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5010     return unshare_expr (sv);
5011
5012   if (!var)
5013     return NULL_TREE;
5014
5015   if (TREE_CODE (var) == INDIRECT_REF)
5016     {
5017       /* If the base is a dereference of a pointer, first check its name memory
5018          tag.  If it does not have one, use its symbol memory tag.  */
5019       var = TREE_OPERAND (var, 0);
5020       if (TREE_CODE (var) != SSA_NAME)
5021         return NULL_TREE;
5022
5023       if (SSA_NAME_PTR_INFO (var))
5024         {
5025           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5026           if (tag)
5027             return tag;
5028         }
5029
5030       var = SSA_NAME_VAR (var);
5031       tag = symbol_mem_tag (var);
5032       gcc_assert (tag != NULL_TREE);
5033       return tag;
5034     }
5035   else
5036     {
5037       if (!DECL_P (var))
5038         return NULL_TREE;
5039
5040       tag = symbol_mem_tag (var);
5041       if (tag)
5042         return tag;
5043
5044       return var;
5045     }
5046 }
5047
5048 /* Copies the reference information from OLD_REF to NEW_REF.  */
5049
5050 static void
5051 copy_ref_info (tree new_ref, tree old_ref)
5052 {
5053   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5054     copy_mem_ref_info (new_ref, old_ref);
5055   else
5056     {
5057       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5058       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5059     }
5060 }
5061
5062 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5063
5064 static void
5065 rewrite_use_address (struct ivopts_data *data,
5066                      struct iv_use *use, struct iv_cand *cand)
5067 {
5068   aff_tree aff;
5069   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5070   tree ref;
5071   bool ok;
5072
5073   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5074   gcc_assert (ok);
5075   unshare_aff_combination (&aff);
5076
5077   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5078   copy_ref_info (ref, *use->op_p);
5079   *use->op_p = ref;
5080 }
5081
5082 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5083    candidate CAND.  */
5084
5085 static void
5086 rewrite_use_compare (struct ivopts_data *data,
5087                      struct iv_use *use, struct iv_cand *cand)
5088 {
5089   tree comp;
5090   tree *op_p, cond, op, stmts, bound;
5091   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5092   enum tree_code compare;
5093   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5094
5095   bound = cp->value;
5096   if (bound)
5097     {
5098       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5099       tree var_type = TREE_TYPE (var);
5100
5101       compare = iv_elimination_compare (data, use);
5102       bound = fold_convert (var_type, bound);
5103       op = force_gimple_operand (unshare_expr (bound), &stmts,
5104                                  true, NULL_TREE);
5105
5106       if (stmts)
5107         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5108
5109       *use->op_p = build2 (compare, boolean_type_node, var, op);
5110       update_stmt (use->stmt);
5111       return;
5112     }
5113
5114   /* The induction variable elimination failed; just express the original
5115      giv.  */
5116   comp = get_computation (data->current_loop, use, cand);
5117   gcc_assert (comp != NULL_TREE);
5118
5119   cond = *use->op_p;
5120   op_p = &TREE_OPERAND (cond, 0);
5121   if (TREE_CODE (*op_p) != SSA_NAME
5122       || integer_zerop (get_iv (data, *op_p)->step))
5123     op_p = &TREE_OPERAND (cond, 1);
5124
5125   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5126   if (stmts)
5127     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5128
5129   *op_p = op;
5130 }
5131
5132 /* Rewrites USE using candidate CAND.  */
5133
5134 static void
5135 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5136 {
5137   push_stmt_changes (&use->stmt);
5138
5139   switch (use->type)
5140     {
5141       case USE_NONLINEAR_EXPR:
5142         rewrite_use_nonlinear_expr (data, use, cand);
5143         break;
5144
5145       case USE_ADDRESS:
5146         rewrite_use_address (data, use, cand);
5147         break;
5148
5149       case USE_COMPARE:
5150         rewrite_use_compare (data, use, cand);
5151         break;
5152
5153       default:
5154         gcc_unreachable ();
5155     }
5156
5157   pop_stmt_changes (&use->stmt);
5158 }
5159
5160 /* Rewrite the uses using the selected induction variables.  */
5161
5162 static void
5163 rewrite_uses (struct ivopts_data *data)
5164 {
5165   unsigned i;
5166   struct iv_cand *cand;
5167   struct iv_use *use;
5168
5169   for (i = 0; i < n_iv_uses (data); i++)
5170     {
5171       use = iv_use (data, i);
5172       cand = use->selected;
5173       gcc_assert (cand);
5174
5175       rewrite_use (data, use, cand);
5176     }
5177 }
5178
5179 /* Removes the ivs that are not used after rewriting.  */
5180
5181 static void
5182 remove_unused_ivs (struct ivopts_data *data)
5183 {
5184   unsigned j;
5185   bitmap_iterator bi;
5186
5187   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5188     {
5189       struct version_info *info;
5190
5191       info = ver_info (data, j);
5192       if (info->iv
5193           && !integer_zerop (info->iv->step)
5194           && !info->inv_id
5195           && !info->iv->have_use_for
5196           && !info->preserve_biv)
5197         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5198     }
5199 }
5200
5201 /* Frees data allocated by the optimization of a single loop.  */
5202
5203 static void
5204 free_loop_data (struct ivopts_data *data)
5205 {
5206   unsigned i, j;
5207   bitmap_iterator bi;
5208   tree obj;
5209
5210   htab_empty (data->niters);
5211
5212   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5213     {
5214       struct version_info *info;
5215
5216       info = ver_info (data, i);
5217       if (info->iv)
5218         free (info->iv);
5219       info->iv = NULL;
5220       info->has_nonlin_use = false;
5221       info->preserve_biv = false;
5222       info->inv_id = 0;
5223     }
5224   bitmap_clear (data->relevant);
5225   bitmap_clear (data->important_candidates);
5226
5227   for (i = 0; i < n_iv_uses (data); i++)
5228     {
5229       struct iv_use *use = iv_use (data, i);
5230
5231       free (use->iv);
5232       BITMAP_FREE (use->related_cands);
5233       for (j = 0; j < use->n_map_members; j++)
5234         if (use->cost_map[j].depends_on)
5235           BITMAP_FREE (use->cost_map[j].depends_on);
5236       free (use->cost_map);
5237       free (use);
5238     }
5239   VEC_truncate (iv_use_p, data->iv_uses, 0);
5240
5241   for (i = 0; i < n_iv_cands (data); i++)
5242     {
5243       struct iv_cand *cand = iv_cand (data, i);
5244
5245       if (cand->iv)
5246         free (cand->iv);
5247       if (cand->depends_on)
5248         BITMAP_FREE (cand->depends_on);
5249       free (cand);
5250     }
5251   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5252
5253   if (data->version_info_size < num_ssa_names)
5254     {
5255       data->version_info_size = 2 * num_ssa_names;
5256       free (data->version_info);
5257       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5258     }
5259
5260   data->max_inv_id = 0;
5261
5262   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5263     SET_DECL_RTL (obj, NULL_RTX);
5264
5265   VEC_truncate (tree, decl_rtl_to_reset, 0);
5266 }
5267
5268 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5269    loop tree.  */
5270
5271 static void
5272 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5273 {
5274   free_loop_data (data);
5275   free (data->version_info);
5276   BITMAP_FREE (data->relevant);
5277   BITMAP_FREE (data->important_candidates);
5278   htab_delete (data->niters);
5279
5280   VEC_free (tree, heap, decl_rtl_to_reset);
5281   VEC_free (iv_use_p, heap, data->iv_uses);
5282   VEC_free (iv_cand_p, heap, data->iv_candidates);
5283 }
5284
5285 /* Optimizes the LOOP.  Returns true if anything changed.  */
5286
5287 static bool
5288 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5289 {
5290   bool changed = false;
5291   struct iv_ca *iv_ca;
5292   edge exit;
5293
5294   data->current_loop = loop;
5295
5296   if (dump_file && (dump_flags & TDF_DETAILS))
5297     {
5298       fprintf (dump_file, "Processing loop %d\n", loop->num);
5299
5300       exit = single_dom_exit (loop);
5301       if (exit)
5302         {
5303           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5304                    exit->src->index, exit->dest->index);
5305           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5306           fprintf (dump_file, "\n");
5307         }
5308
5309       fprintf (dump_file, "\n");
5310     }
5311
5312   /* For each ssa name determines whether it behaves as an induction variable
5313      in some loop.  */
5314   if (!find_induction_variables (data))
5315     goto finish;
5316
5317   /* Finds interesting uses (item 1).  */
5318   find_interesting_uses (data);
5319   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5320     goto finish;
5321
5322   /* Finds candidates for the induction variables (item 2).  */
5323   find_iv_candidates (data);
5324
5325   /* Calculates the costs (item 3, part 1).  */
5326   determine_use_iv_costs (data);
5327   determine_iv_costs (data);
5328   determine_set_costs (data);
5329
5330   /* Find the optimal set of induction variables (item 3, part 2).  */
5331   iv_ca = find_optimal_iv_set (data);
5332   if (!iv_ca)
5333     goto finish;
5334   changed = true;
5335
5336   /* Create the new induction variables (item 4, part 1).  */
5337   create_new_ivs (data, iv_ca);
5338   iv_ca_free (&iv_ca);
5339
5340   /* Rewrite the uses (item 4, part 2).  */
5341   rewrite_uses (data);
5342
5343   /* Remove the ivs that are unused after rewriting.  */
5344   remove_unused_ivs (data);
5345
5346   /* We have changed the structure of induction variables; it might happen
5347      that definitions in the scev database refer to some of them that were
5348      eliminated.  */
5349   scev_reset ();
5350
5351 finish:
5352   free_loop_data (data);
5353
5354   return changed;
5355 }
5356
5357 /* Main entry point.  Optimizes induction variables in loops.  */
5358
5359 void
5360 tree_ssa_iv_optimize (void)
5361 {
5362   struct loop *loop;
5363   struct ivopts_data data;
5364   loop_iterator li;
5365
5366   tree_ssa_iv_optimize_init (&data);
5367
5368   /* Optimize the loops starting with the innermost ones.  */
5369   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5370     {
5371       if (dump_file && (dump_flags & TDF_DETAILS))
5372         flow_loop_dump (loop, dump_file, NULL, 1);
5373
5374       tree_ssa_iv_optimize_loop (&data, loop);
5375     }
5376
5377   tree_ssa_iv_optimize_finalize (&data);
5378 }