gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Types of uses.  */
 125 enum use_type
 126 {
 127   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 128   USE_ADDRESS,          /* Use in an address.  */
 129   USE_COMPARE           /* Use is a compare.  */
 130 };
 131
 132 /* The candidate - cost pair.  */
 133 struct cost_pair
 134 {
 135   struct iv_cand *cand; /* The candidate.  */
 136   unsigned cost;        /* The cost.  */
 137   bitmap depends_on;    /* The list of invariants that have to be
 138                            preserved.  */
 139   tree value;           /* For final value elimination, the expression for
 140                            the final value of the iv.  For iv elimination,
 141                            the new bound to compare with.  */
 142 };
 143
 144 /* Use.  */
 145 struct iv_use
 146 {
 147   unsigned id;          /* The id of the use.  */
 148   enum use_type type;   /* Type of the use.  */
 149   struct iv *iv;        /* The induction variable it is based on.  */
 150   tree stmt;            /* Statement in that it occurs.  */
 151   tree *op_p;           /* The place where it occurs.  */
 152   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 153                            important ones.  */
 154
 155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 156   struct cost_pair *cost_map;
 157                         /* The costs wrto the iv candidates.  */
 158
 159   struct iv_cand *selected;
 160                         /* The selected candidate.  */
 161 };
 162
 163 /* The position where the iv is computed.  */
 164 enum iv_position
 165 {
 166   IP_NORMAL,            /* At the end, just before the exit condition.  */
 167   IP_END,               /* At the end of the latch block.  */
 168   IP_ORIGINAL           /* The original biv.  */
 169 };
 170
 171 /* The induction variable candidate.  */
 172 struct iv_cand
 173 {
 174   unsigned id;          /* The number of the candidate.  */
 175   bool important;       /* Whether this is an "important" candidate, i.e. such
 176                            that it should be considered by all uses.  */
 177   enum iv_position pos; /* Where it is computed.  */
 178   tree incremented_at;  /* For original biv, the statement where it is
 179                            incremented.  */
 180   tree var_before;      /* The variable used for it before increment.  */
 181   tree var_after;       /* The variable used for it after increment.  */
 182   struct iv *iv;        /* The value of the candidate.  NULL for
 183                            "pseudocandidate" used to indicate the possibility
 184                            to replace the final value of an iv by direct
 185                            computation of the value.  */
 186   unsigned cost;        /* Cost of the candidate.  */
 187   bitmap depends_on;    /* The list of invariants that are used in step of the
 188                            biv.  */
 189 };
 190
 191 /* The data used by the induction variable optimizations.  */
 192
 193 typedef struct iv_use *iv_use_p;
 194 DEF_VEC_P(iv_use_p);
 195 DEF_VEC_ALLOC_P(iv_use_p,heap);
 196
 197 typedef struct iv_cand *iv_cand_p;
 198 DEF_VEC_P(iv_cand_p);
 199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 200
 201 struct ivopts_data
 202 {
 203   /* The currently optimized loop.  */
 204   struct loop *current_loop;
 205
 206   /* Number of registers used in it.  */
 207   unsigned regs_used;
 208
 209   /* Numbers of iterations for all exits of the current loop.  */
 210   htab_t niters;
 211
 212   /* The size of version_info array allocated.  */
 213   unsigned version_info_size;
 214
 215   /* The array of information for the ssa names.  */
 216   struct version_info *version_info;
 217
 218   /* The bitmap of indices in version_info whose value was changed.  */
 219   bitmap relevant;
 220
 221   /* The maximum invariant id.  */
 222   unsigned max_inv_id;
 223
 224   /* The uses of induction variables.  */
 225   VEC(iv_use_p,heap) *iv_uses;
 226
 227   /* The candidates.  */
 228   VEC(iv_cand_p,heap) *iv_candidates;
 229
 230   /* A bitmap of important candidates.  */
 231   bitmap important_candidates;
 232
 233   /* Whether to consider just related and important candidates when replacing a
 234      use.  */
 235   bool consider_all_candidates;
 236 };
 237
 238 /* An assignment of iv candidates to uses.  */
 239
 240 struct iv_ca
 241 {
 242   /* The number of uses covered by the assignment.  */
 243   unsigned upto;
 244
 245   /* Number of uses that cannot be expressed by the candidates in the set.  */
 246   unsigned bad_uses;
 247
 248   /* Candidate assigned to a use, together with the related costs.  */
 249   struct cost_pair **cand_for_use;
 250
 251   /* Number of times each candidate is used.  */
 252   unsigned *n_cand_uses;
 253
 254   /* The candidates used.  */
 255   bitmap cands;
 256
 257   /* The number of candidates in the set.  */
 258   unsigned n_cands;
 259
 260   /* Total number of registers needed.  */
 261   unsigned n_regs;
 262
 263   /* Total cost of expressing uses.  */
 264   unsigned cand_use_cost;
 265
 266   /* Total cost of candidates.  */
 267   unsigned cand_cost;
 268
 269   /* Number of times each invariant is used.  */
 270   unsigned *n_invariant_uses;
 271
 272   /* Total cost of the assignment.  */
 273   unsigned cost;
 274 };
 275
 276 /* Difference of two iv candidate assignments.  */
 277
 278 struct iv_ca_delta
 279 {
 280   /* Changed use.  */
 281   struct iv_use *use;
 282
 283   /* An old assignment (for rollback purposes).  */
 284   struct cost_pair *old_cp;
 285
 286   /* A new assignment.  */
 287   struct cost_pair *new_cp;
 288
 289   /* Next change in the list.  */
 290   struct iv_ca_delta *next_change;
 291 };
 292
 293 /* Bound on number of candidates below that all candidates are considered.  */
 294
 295 #define CONSIDER_ALL_CANDIDATES_BOUND \
 296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 297
 298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 299    optimizing such a loop would help, and it would take ages).  */
 300
 301 #define MAX_CONSIDERED_USES \
 302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 303
 304 /* If there are at most this number of ivs in the set, try removing unnecessary
 305    ivs from the set always.  */
 306
 307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 309
 310 /* The list of trees for that the decl_rtl field must be reset is stored
 311    here.  */
 312
 313 static VEC(tree,heap) *decl_rtl_to_reset;
 314
 315 /* Number of uses recorded in DATA.  */
 316
 317 static inline unsigned
 318 n_iv_uses (struct ivopts_data *data)
 319 {
 320   return VEC_length (iv_use_p, data->iv_uses);
 321 }
 322
 323 /* Ith use recorded in DATA.  */
 324
 325 static inline struct iv_use *
 326 iv_use (struct ivopts_data *data, unsigned i)
 327 {
 328   return VEC_index (iv_use_p, data->iv_uses, i);
 329 }
 330
 331 /* Number of candidates recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_cands (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_cand_p, data->iv_candidates);
 337 }
 338
 339 /* Ith candidate recorded in DATA.  */
 340
 341 static inline struct iv_cand *
 342 iv_cand (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_cand_p, data->iv_candidates, i);
 345 }
 346
 347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 348
 349 edge
 350 single_dom_exit (struct loop *loop)
 351 {
 352   edge exit = single_exit (loop);
 353
 354   if (!exit)
 355     return NULL;
 356
 357   if (!just_once_each_iteration_p (loop, exit->src))
 358     return NULL;
 359
 360   return exit;
 361 }
 362
 363 /* Dumps information about the induction variable IV to FILE.  */
 364
 365 extern void dump_iv (FILE *, struct iv *);
 366 void
 367 dump_iv (FILE *file, struct iv *iv)
 368 {
 369   if (iv->ssa_name)
 370     {
 371       fprintf (file, "ssa name ");
 372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 373       fprintf (file, "\n");
 374     }
 375
 376   fprintf (file, "  type ");
 377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 378   fprintf (file, "\n");
 379
 380   if (iv->step)
 381     {
 382       fprintf (file, "  base ");
 383       print_generic_expr (file, iv->base, TDF_SLIM);
 384       fprintf (file, "\n");
 385
 386       fprintf (file, "  step ");
 387       print_generic_expr (file, iv->step, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390   else
 391     {
 392       fprintf (file, "  invariant ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395     }
 396
 397   if (iv->base_object)
 398     {
 399       fprintf (file, "  base object ");
 400       print_generic_expr (file, iv->base_object, TDF_SLIM);
 401       fprintf (file, "\n");
 402     }
 403
 404   if (iv->biv_p)
 405     fprintf (file, "  is a biv\n");
 406 }
 407
 408 /* Dumps information about the USE to FILE.  */
 409
 410 extern void dump_use (FILE *, struct iv_use *);
 411 void
 412 dump_use (FILE *file, struct iv_use *use)
 413 {
 414   fprintf (file, "use %d\n", use->id);
 415
 416   switch (use->type)
 417     {
 418     case USE_NONLINEAR_EXPR:
 419       fprintf (file, "  generic\n");
 420       break;
 421
 422     case USE_ADDRESS:
 423       fprintf (file, "  address\n");
 424       break;
 425
 426     case USE_COMPARE:
 427       fprintf (file, "  compare\n");
 428       break;
 429
 430     default:
 431       gcc_unreachable ();
 432     }
 433
 434   fprintf (file, "  in statement ");
 435   print_generic_expr (file, use->stmt, TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   fprintf (file, "  at position ");
 439   if (use->op_p)
 440     print_generic_expr (file, *use->op_p, TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   dump_iv (file, use->iv);
 444
 445   if (use->related_cands)
 446     {
 447       fprintf (file, "  related candidates ");
 448       dump_bitmap (file, use->related_cands);
 449     }
 450 }
 451
 452 /* Dumps information about the uses to FILE.  */
 453
 454 extern void dump_uses (FILE *, struct ivopts_data *);
 455 void
 456 dump_uses (FILE *file, struct ivopts_data *data)
 457 {
 458   unsigned i;
 459   struct iv_use *use;
 460
 461   for (i = 0; i < n_iv_uses (data); i++)
 462     {
 463       use = iv_use (data, i);
 464
 465       dump_use (file, use);
 466       fprintf (file, "\n");
 467     }
 468 }
 469
 470 /* Dumps information about induction variable candidate CAND to FILE.  */
 471
 472 extern void dump_cand (FILE *, struct iv_cand *);
 473 void
 474 dump_cand (FILE *file, struct iv_cand *cand)
 475 {
 476   struct iv *iv = cand->iv;
 477
 478   fprintf (file, "candidate %d%s\n",
 479            cand->id, cand->important ? " (important)" : "");
 480
 481   if (cand->depends_on)
 482     {
 483       fprintf (file, "  depends on ");
 484       dump_bitmap (file, cand->depends_on);
 485     }
 486
 487   if (!iv)
 488     {
 489       fprintf (file, "  final value replacement\n");
 490       return;
 491     }
 492
 493   switch (cand->pos)
 494     {
 495     case IP_NORMAL:
 496       fprintf (file, "  incremented before exit test\n");
 497       break;
 498
 499     case IP_END:
 500       fprintf (file, "  incremented at end\n");
 501       break;
 502
 503     case IP_ORIGINAL:
 504       fprintf (file, "  original biv\n");
 505       break;
 506     }
 507
 508   dump_iv (file, iv);
 509 }
 510
 511 /* Returns the info for ssa version VER.  */
 512
 513 static inline struct version_info *
 514 ver_info (struct ivopts_data *data, unsigned ver)
 515 {
 516   return data->version_info + ver;
 517 }
 518
 519 /* Returns the info for ssa name NAME.  */
 520
 521 static inline struct version_info *
 522 name_info (struct ivopts_data *data, tree name)
 523 {
 524   return ver_info (data, SSA_NAME_VERSION (name));
 525 }
 526
 527 /* Checks whether there exists number X such that X * B = A, counting modulo
 528    2^BITS.  */
 529
 530 static bool
 531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 532         HOST_WIDE_INT *x)
 533 {
 534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 535   unsigned HOST_WIDE_INT inv, ex, val;
 536   unsigned i;
 537
 538   a &= mask;
 539   b &= mask;
 540
 541   /* First divide the whole equation by 2 as long as possible.  */
 542   while (!(a & 1) && !(b & 1))
 543     {
 544       a >>= 1;
 545       b >>= 1;
 546       bits--;
 547       mask >>= 1;
 548     }
 549
 550   if (!(b & 1))
 551     {
 552       /* If b is still even, a is odd and there is no such x.  */
 553       return false;
 554     }
 555
 556   /* Find the inverse of b.  We compute it as
 557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 558   inv = 1;
 559   ex = b;
 560   for (i = 0; i < bits - 1; i++)
 561     {
 562       inv = (inv * ex) & mask;
 563       ex = (ex * ex) & mask;
 564     }
 565
 566   val = (a * inv) & mask;
 567
 568   gcc_assert (((val * b) & mask) == a);
 569
 570   if ((val >> (bits - 1)) & 1)
 571     val |= ~mask;
 572
 573   *x = val;
 574
 575   return true;
 576 }
 577
 578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 579    emitted in LOOP.  */
 580
 581 static bool
 582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 583 {
 584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 585
 586   gcc_assert (bb);
 587
 588   if (sbb == loop->latch)
 589     return true;
 590
 591   if (sbb != bb)
 592     return false;
 593
 594   return stmt == last_stmt (bb);
 595 }
 596
 597 /* Returns true if STMT if after the place where the original induction
 598    variable CAND is incremented.  */
 599
 600 static bool
 601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 602 {
 603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 604   basic_block stmt_bb = bb_for_stmt (stmt);
 605   block_stmt_iterator bsi;
 606
 607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 608     return false;
 609
 610   if (stmt_bb != cand_bb)
 611     return true;
 612
 613   /* Scan the block from the end, since the original ivs are usually
 614      incremented at the end of the loop body.  */
 615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 616     {
 617       if (bsi_stmt (bsi) == cand->incremented_at)
 618         return false;
 619       if (bsi_stmt (bsi) == stmt)
 620         return true;
 621     }
 622 }
 623
 624 /* Returns true if STMT if after the place where the induction variable
 625    CAND is incremented in LOOP.  */
 626
 627 static bool
 628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 629 {
 630   switch (cand->pos)
 631     {
 632     case IP_END:
 633       return false;
 634
 635     case IP_NORMAL:
 636       return stmt_after_ip_normal_pos (loop, stmt);
 637
 638     case IP_ORIGINAL:
 639       return stmt_after_ip_original_pos (cand, stmt);
 640
 641     default:
 642       gcc_unreachable ();
 643     }
 644 }
 645
 646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 647
 648 static bool
 649 abnormal_ssa_name_p (tree exp)
 650 {
 651   if (!exp)
 652     return false;
 653
 654   if (TREE_CODE (exp) != SSA_NAME)
 655     return false;
 656
 657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 658 }
 659
 660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 661    abnormal phi node.  Callback for for_each_index.  */
 662
 663 static bool
 664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 665                                   void *data ATTRIBUTE_UNUSED)
 666 {
 667   if (TREE_CODE (base) == ARRAY_REF)
 668     {
 669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 670         return false;
 671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 672         return false;
 673     }
 674
 675   return !abnormal_ssa_name_p (*index);
 676 }
 677
 678 /* Returns true if EXPR contains a ssa name that occurs in an
 679    abnormal phi node.  */
 680
 681 bool
 682 contains_abnormal_ssa_name_p (tree expr)
 683 {
 684   enum tree_code code;
 685   enum tree_code_class class;
 686
 687   if (!expr)
 688     return false;
 689
 690   code = TREE_CODE (expr);
 691   class = TREE_CODE_CLASS (code);
 692
 693   if (code == SSA_NAME)
 694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 695
 696   if (code == INTEGER_CST
 697       || is_gimple_min_invariant (expr))
 698     return false;
 699
 700   if (code == ADDR_EXPR)
 701     return !for_each_index (&TREE_OPERAND (expr, 0),
 702                             idx_contains_abnormal_ssa_name_p,
 703                             NULL);
 704
 705   switch (class)
 706     {
 707     case tcc_binary:
 708     case tcc_comparison:
 709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 710         return true;
 711
 712       /* Fallthru.  */
 713     case tcc_unary:
 714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 715         return true;
 716
 717       break;
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722
 723   return false;
 724 }
 725
 726 /* Element of the table in that we cache the numbers of iterations obtained
 727    from exits of the loop.  */
 728
 729 struct nfe_cache_elt
 730 {
 731   /* The edge for that the number of iterations is cached.  */
 732   edge exit;
 733
 734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 735      determined.  */
 736   tree niter;
 737 };
 738
 739 /* Hash function for nfe_cache_elt E.  */
 740
 741 static hashval_t
 742 nfe_hash (const void *e)
 743 {
 744   const struct nfe_cache_elt *elt = e;
 745
 746   return htab_hash_pointer (elt->exit);
 747 }
 748
 749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 750
 751 static int
 752 nfe_eq (const void *e1, const void *e2)
 753 {
 754   const struct nfe_cache_elt *elt1 = e1;
 755
 756   return elt1->exit == e2;
 757 }
 758
 759 /*  Returns tree describing number of iterations determined from
 760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 761
 762 static tree
 763 niter_for_exit (struct ivopts_data *data, edge exit)
 764 {
 765   struct nfe_cache_elt *nfe_desc;
 766   struct tree_niter_desc desc;
 767   PTR *slot;
 768
 769   slot = htab_find_slot_with_hash (data->niters, exit,
 770                                    htab_hash_pointer (exit),
 771                                    INSERT);
 772
 773   if (!*slot)
 774     {
 775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 776       nfe_desc->exit = exit;
 777
 778       /* Try to determine number of iterations.  We must know it
 779          unconditionally (i.e., without possibility of # of iterations
 780          being zero).  Also, we cannot safely work with ssa names that
 781          appear in phi nodes on abnormal edges, so that we do not create
 782          overlapping life ranges for them (PR 27283).  */
 783       if (number_of_iterations_exit (data->current_loop,
 784                                      exit, &desc, true)
 785           && zero_p (desc.may_be_zero)
 786           && !contains_abnormal_ssa_name_p (desc.niter))
 787         nfe_desc->niter = desc.niter;
 788       else
 789         nfe_desc->niter = NULL_TREE;
 790     }
 791   else
 792     nfe_desc = *slot;
 793
 794   return nfe_desc->niter;
 795 }
 796
 797 /* Returns tree describing number of iterations determined from
 798    single dominating exit of DATA->current_loop, or NULL if something
 799    goes wrong.  */
 800
 801 static tree
 802 niter_for_single_dom_exit (struct ivopts_data *data)
 803 {
 804   edge exit = single_dom_exit (data->current_loop);
 805
 806   if (!exit)
 807     return NULL;
 808
 809   return niter_for_exit (data, exit);
 810 }
 811
 812 /* Initializes data structures used by the iv optimization pass, stored
 813    in DATA.  */
 814
 815 static void
 816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 817 {
 818   data->version_info_size = 2 * num_ssa_names;
 819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 820   data->relevant = BITMAP_ALLOC (NULL);
 821   data->important_candidates = BITMAP_ALLOC (NULL);
 822   data->max_inv_id = 0;
 823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 827 }
 828
 829 /* Returns a memory object to that EXPR points.  In case we are able to
 830    determine that it does not point to any such object, NULL is returned.  */
 831
 832 static tree
 833 determine_base_object (tree expr)
 834 {
 835   enum tree_code code = TREE_CODE (expr);
 836   tree base, obj, op0, op1;
 837
 838   /* If this is a pointer casted to any type, we need to determine
 839      the base object for the pointer; so handle conversions before
 840      throwing away non-pointer expressions.  */
 841   if (TREE_CODE (expr) == NOP_EXPR
 842       || TREE_CODE (expr) == CONVERT_EXPR)
 843     return determine_base_object (TREE_OPERAND (expr, 0));
 844
 845   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 846     return NULL_TREE;
 847
 848   switch (code)
 849     {
 850     case INTEGER_CST:
 851       return NULL_TREE;
 852
 853     case ADDR_EXPR:
 854       obj = TREE_OPERAND (expr, 0);
 855       base = get_base_address (obj);
 856
 857       if (!base)
 858         return expr;
 859
 860       if (TREE_CODE (base) == INDIRECT_REF)
 861         return determine_base_object (TREE_OPERAND (base, 0));
 862
 863       return fold_convert (ptr_type_node,
 864                            build_fold_addr_expr (base));
 865
 866     case PLUS_EXPR:
 867     case MINUS_EXPR:
 868       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 869       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 870
 871       if (!op1)
 872         return op0;
 873
 874       if (!op0)
 875         return (code == PLUS_EXPR
 876                 ? op1
 877                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 878
 879       return fold_build2 (code, ptr_type_node, op0, op1);
 880
 881     default:
 882       return fold_convert (ptr_type_node, expr);
 883     }
 884 }
 885
 886 /* Allocates an induction variable with given initial value BASE and step STEP
 887    for loop LOOP.  */
 888
 889 static struct iv *
 890 alloc_iv (tree base, tree step)
 891 {
 892   struct iv *iv = XCNEW (struct iv);
 893
 894   if (step && integer_zerop (step))
 895     step = NULL_TREE;
 896
 897   iv->base = base;
 898   iv->base_object = determine_base_object (base);
 899   iv->step = step;
 900   iv->biv_p = false;
 901   iv->have_use_for = false;
 902   iv->use_id = 0;
 903   iv->ssa_name = NULL_TREE;
 904
 905   return iv;
 906 }
 907
 908 /* Sets STEP and BASE for induction variable IV.  */
 909
 910 static void
 911 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 912 {
 913   struct version_info *info = name_info (data, iv);
 914
 915   gcc_assert (!info->iv);
 916
 917   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 918   info->iv = alloc_iv (base, step);
 919   info->iv->ssa_name = iv;
 920 }
 921
 922 /* Finds induction variable declaration for VAR.  */
 923
 924 static struct iv *
 925 get_iv (struct ivopts_data *data, tree var)
 926 {
 927   basic_block bb;
 928
 929   if (!name_info (data, var)->iv)
 930     {
 931       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 932
 933       if (!bb
 934           || !flow_bb_inside_loop_p (data->current_loop, bb))
 935         set_iv (data, var, var, NULL_TREE);
 936     }
 937
 938   return name_info (data, var)->iv;
 939 }
 940
 941 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 942    not define a simple affine biv with nonzero step.  */
 943
 944 static tree
 945 determine_biv_step (tree phi)
 946 {
 947   struct loop *loop = bb_for_stmt (phi)->loop_father;
 948   tree name = PHI_RESULT (phi);
 949   affine_iv iv;
 950
 951   if (!is_gimple_reg (name))
 952     return NULL_TREE;
 953
 954   if (!simple_iv (loop, phi, name, &iv, true))
 955     return NULL_TREE;
 956
 957   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 958 }
 959
 960 /* Finds basic ivs.  */
 961
 962 static bool
 963 find_bivs (struct ivopts_data *data)
 964 {
 965   tree phi, step, type, base;
 966   bool found = false;
 967   struct loop *loop = data->current_loop;
 968
 969   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 970     {
 971       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 972         continue;
 973
 974       step = determine_biv_step (phi);
 975       if (!step)
 976         continue;
 977
 978       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 979       base = expand_simple_operations (base);
 980       if (contains_abnormal_ssa_name_p (base)
 981           || contains_abnormal_ssa_name_p (step))
 982         continue;
 983
 984       type = TREE_TYPE (PHI_RESULT (phi));
 985       base = fold_convert (type, base);
 986       if (step)
 987         step = fold_convert (type, step);
 988
 989       set_iv (data, PHI_RESULT (phi), base, step);
 990       found = true;
 991     }
 992
 993   return found;
 994 }
 995
 996 /* Marks basic ivs.  */
 997
 998 static void
 999 mark_bivs (struct ivopts_data *data)
1000 {
1001   tree phi, var;
1002   struct iv *iv, *incr_iv;
1003   struct loop *loop = data->current_loop;
1004   basic_block incr_bb;
1005
1006   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007     {
1008       iv = get_iv (data, PHI_RESULT (phi));
1009       if (!iv)
1010         continue;
1011
1012       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1013       incr_iv = get_iv (data, var);
1014       if (!incr_iv)
1015         continue;
1016
1017       /* If the increment is in the subloop, ignore it.  */
1018       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1019       if (incr_bb->loop_father != data->current_loop
1020           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021         continue;
1022
1023       iv->biv_p = true;
1024       incr_iv->biv_p = true;
1025     }
1026 }
1027
1028 /* Checks whether STMT defines a linear induction variable and stores its
1029    parameters to IV.  */
1030
1031 static bool
1032 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1033 {
1034   tree lhs;
1035   struct loop *loop = data->current_loop;
1036
1037   iv->base = NULL_TREE;
1038   iv->step = NULL_TREE;
1039
1040   if (TREE_CODE (stmt) != MODIFY_EXPR)
1041     return false;
1042
1043   lhs = TREE_OPERAND (stmt, 0);
1044   if (TREE_CODE (lhs) != SSA_NAME)
1045     return false;
1046
1047   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1048     return false;
1049   iv->base = expand_simple_operations (iv->base);
1050
1051   if (contains_abnormal_ssa_name_p (iv->base)
1052       || contains_abnormal_ssa_name_p (iv->step))
1053     return false;
1054
1055   return true;
1056 }
1057
1058 /* Finds general ivs in statement STMT.  */
1059
1060 static void
1061 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1062 {
1063   affine_iv iv;
1064
1065   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1066     return;
1067
1068   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1069 }
1070
1071 /* Finds general ivs in basic block BB.  */
1072
1073 static void
1074 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1075 {
1076   block_stmt_iterator bsi;
1077
1078   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1079     find_givs_in_stmt (data, bsi_stmt (bsi));
1080 }
1081
1082 /* Finds general ivs.  */
1083
1084 static void
1085 find_givs (struct ivopts_data *data)
1086 {
1087   struct loop *loop = data->current_loop;
1088   basic_block *body = get_loop_body_in_dom_order (loop);
1089   unsigned i;
1090
1091   for (i = 0; i < loop->num_nodes; i++)
1092     find_givs_in_bb (data, body[i]);
1093   free (body);
1094 }
1095
1096 /* For each ssa name defined in LOOP determines whether it is an induction
1097    variable and if so, its initial value and step.  */
1098
1099 static bool
1100 find_induction_variables (struct ivopts_data *data)
1101 {
1102   unsigned i;
1103   bitmap_iterator bi;
1104
1105   if (!find_bivs (data))
1106     return false;
1107
1108   find_givs (data);
1109   mark_bivs (data);
1110
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     {
1113       tree niter = niter_for_single_dom_exit (data);
1114
1115       if (niter)
1116         {
1117           fprintf (dump_file, "  number of iterations ");
1118           print_generic_expr (dump_file, niter, TDF_SLIM);
1119           fprintf (dump_file, "\n\n");
1120         };
1121
1122       fprintf (dump_file, "Induction variables:\n\n");
1123
1124       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125         {
1126           if (ver_info (data, i)->iv)
1127             dump_iv (dump_file, ver_info (data, i)->iv);
1128         }
1129     }
1130
1131   return true;
1132 }
1133
1134 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1135
1136 static struct iv_use *
1137 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1138             tree stmt, enum use_type use_type)
1139 {
1140   struct iv_use *use = XCNEW (struct iv_use);
1141
1142   use->id = n_iv_uses (data);
1143   use->type = use_type;
1144   use->iv = iv;
1145   use->stmt = stmt;
1146   use->op_p = use_p;
1147   use->related_cands = BITMAP_ALLOC (NULL);
1148
1149   /* To avoid showing ssa name in the dumps, if it was not reset by the
1150      caller.  */
1151   iv->ssa_name = NULL_TREE;
1152
1153   if (dump_file && (dump_flags & TDF_DETAILS))
1154     dump_use (dump_file, use);
1155
1156   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157
1158   return use;
1159 }
1160
1161 /* Checks whether OP is a loop-level invariant and if so, records it.
1162    NONLINEAR_USE is true if the invariant is used in a way we do not
1163    handle specially.  */
1164
1165 static void
1166 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 {
1168   basic_block bb;
1169   struct version_info *info;
1170
1171   if (TREE_CODE (op) != SSA_NAME
1172       || !is_gimple_reg (op))
1173     return;
1174
1175   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1176   if (bb
1177       && flow_bb_inside_loop_p (data->current_loop, bb))
1178     return;
1179
1180   info = name_info (data, op);
1181   info->name = op;
1182   info->has_nonlin_use |= nonlinear_use;
1183   if (!info->inv_id)
1184     info->inv_id = ++data->max_inv_id;
1185   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1186 }
1187
1188 /* Checks whether the use OP is interesting and if so, records it.  */
1189
1190 static struct iv_use *
1191 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 {
1193   struct iv *iv;
1194   struct iv *civ;
1195   tree stmt;
1196   struct iv_use *use;
1197
1198   if (TREE_CODE (op) != SSA_NAME)
1199     return NULL;
1200
1201   iv = get_iv (data, op);
1202   if (!iv)
1203     return NULL;
1204
1205   if (iv->have_use_for)
1206     {
1207       use = iv_use (data, iv->use_id);
1208
1209       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1210       return use;
1211     }
1212
1213   if (zero_p (iv->step))
1214     {
1215       record_invariant (data, op, true);
1216       return NULL;
1217     }
1218   iv->have_use_for = true;
1219
1220   civ = XNEW (struct iv);
1221   *civ = *iv;
1222
1223   stmt = SSA_NAME_DEF_STMT (op);
1224   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1225               || TREE_CODE (stmt) == MODIFY_EXPR);
1226
1227   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1228   iv->use_id = use->id;
1229
1230   return use;
1231 }
1232
1233 /* Checks whether the condition *COND_P in STMT is interesting
1234    and if so, records it.  */
1235
1236 static void
1237 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 {
1239   tree *op0_p;
1240   tree *op1_p;
1241   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1242   struct iv const_iv;
1243   tree zero = integer_zero_node;
1244
1245   const_iv.step = NULL_TREE;
1246
1247   if (TREE_CODE (*cond_p) != SSA_NAME
1248       && !COMPARISON_CLASS_P (*cond_p))
1249     return;
1250
1251   if (TREE_CODE (*cond_p) == SSA_NAME)
1252     {
1253       op0_p = cond_p;
1254       op1_p = &zero;
1255     }
1256   else
1257     {
1258       op0_p = &TREE_OPERAND (*cond_p, 0);
1259       op1_p = &TREE_OPERAND (*cond_p, 1);
1260     }
1261
1262   if (TREE_CODE (*op0_p) == SSA_NAME)
1263     iv0 = get_iv (data, *op0_p);
1264   else
1265     iv0 = &const_iv;
1266
1267   if (TREE_CODE (*op1_p) == SSA_NAME)
1268     iv1 = get_iv (data, *op1_p);
1269   else
1270     iv1 = &const_iv;
1271
1272   if (/* When comparing with non-invariant value, we may not do any senseful
1273          induction variable elimination.  */
1274       (!iv0 || !iv1)
1275       /* Eliminating condition based on two ivs would be nontrivial.
1276          ??? TODO -- it is not really important to handle this case.  */
1277       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278     {
1279       find_interesting_uses_op (data, *op0_p);
1280       find_interesting_uses_op (data, *op1_p);
1281       return;
1282     }
1283
1284   if (zero_p (iv0->step) && zero_p (iv1->step))
1285     {
1286       /* If both are invariants, this is a work for unswitching.  */
1287       return;
1288     }
1289
1290   civ = XNEW (struct iv);
1291   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1292   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1293 }
1294
1295 /* Returns true if expression EXPR is obviously invariant in LOOP,
1296    i.e. if all its operands are defined outside of the LOOP.  */
1297
1298 bool
1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 {
1301   basic_block def_bb;
1302   unsigned i, len;
1303
1304   if (is_gimple_min_invariant (expr))
1305     return true;
1306
1307   if (TREE_CODE (expr) == SSA_NAME)
1308     {
1309       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1310       if (def_bb
1311           && flow_bb_inside_loop_p (loop, def_bb))
1312         return false;
1313
1314       return true;
1315     }
1316
1317   if (!EXPR_P (expr))
1318     return false;
1319
1320   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1321   for (i = 0; i < len; i++)
1322     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1323       return false;
1324
1325   return true;
1326 }
1327
1328 /* Cumulates the steps of indices into DATA and replaces their values with the
1329    initial ones.  Returns false when the value of the index cannot be determined.
1330    Callback for for_each_index.  */
1331
1332 struct ifs_ivopts_data
1333 {
1334   struct ivopts_data *ivopts_data;
1335   tree stmt;
1336   tree *step_p;
1337 };
1338
1339 static bool
1340 idx_find_step (tree base, tree *idx, void *data)
1341 {
1342   struct ifs_ivopts_data *dta = data;
1343   struct iv *iv;
1344   tree step, iv_base, iv_step, lbound, off;
1345   struct loop *loop = dta->ivopts_data->current_loop;
1346
1347   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1348       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1349     return false;
1350
1351   /* If base is a component ref, require that the offset of the reference
1352      be invariant.  */
1353   if (TREE_CODE (base) == COMPONENT_REF)
1354     {
1355       off = component_ref_field_offset (base);
1356       return expr_invariant_in_loop_p (loop, off);
1357     }
1358
1359   /* If base is array, first check whether we will be able to move the
1360      reference out of the loop (in order to take its address in strength
1361      reduction).  In order for this to work we need both lower bound
1362      and step to be loop invariants.  */
1363   if (TREE_CODE (base) == ARRAY_REF)
1364     {
1365       step = array_ref_element_size (base);
1366       lbound = array_ref_low_bound (base);
1367
1368       if (!expr_invariant_in_loop_p (loop, step)
1369           || !expr_invariant_in_loop_p (loop, lbound))
1370         return false;
1371     }
1372
1373   if (TREE_CODE (*idx) != SSA_NAME)
1374     return true;
1375
1376   iv = get_iv (dta->ivopts_data, *idx);
1377   if (!iv)
1378     return false;
1379
1380   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1381           *&x[0], which is not folded and does not trigger the
1382           ARRAY_REF path below.  */
1383   *idx = iv->base;
1384
1385   if (!iv->step)
1386     return true;
1387
1388   if (TREE_CODE (base) == ARRAY_REF)
1389     {
1390       step = array_ref_element_size (base);
1391
1392       /* We only handle addresses whose step is an integer constant.  */
1393       if (TREE_CODE (step) != INTEGER_CST)
1394         return false;
1395     }
1396   else
1397     /* The step for pointer arithmetics already is 1 byte.  */
1398     step = build_int_cst (sizetype, 1);
1399
1400   iv_base = iv->base;
1401   iv_step = iv->step;
1402   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1403                             sizetype, &iv_base, &iv_step, dta->stmt,
1404                             false))
1405     {
1406       /* The index might wrap.  */
1407       return false;
1408     }
1409
1410   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1411
1412   if (!*dta->step_p)
1413     *dta->step_p = step;
1414   else
1415     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1416
1417   return true;
1418 }
1419
1420 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1421    object is passed to it in DATA.  */
1422
1423 static bool
1424 idx_record_use (tree base, tree *idx,
1425                 void *data)
1426 {
1427   find_interesting_uses_op (data, *idx);
1428   if (TREE_CODE (base) == ARRAY_REF)
1429     {
1430       find_interesting_uses_op (data, array_ref_element_size (base));
1431       find_interesting_uses_op (data, array_ref_low_bound (base));
1432     }
1433   return true;
1434 }
1435
1436 /* Returns true if memory reference REF may be unaligned.  */
1437
1438 static bool
1439 may_be_unaligned_p (tree ref)
1440 {
1441   tree base;
1442   tree base_type;
1443   HOST_WIDE_INT bitsize;
1444   HOST_WIDE_INT bitpos;
1445   tree toffset;
1446   enum machine_mode mode;
1447   int unsignedp, volatilep;
1448   unsigned base_align;
1449
1450   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451      thus they are not misaligned.  */
1452   if (TREE_CODE (ref) == TARGET_MEM_REF)
1453     return false;
1454
1455   /* The test below is basically copy of what expr.c:normal_inner_ref
1456      does to check whether the object must be loaded by parts when
1457      STRICT_ALIGNMENT is true.  */
1458   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1459                               &unsignedp, &volatilep, true);
1460   base_type = TREE_TYPE (base);
1461   base_align = TYPE_ALIGN (base_type);
1462
1463   if (mode != BLKmode
1464       && (base_align < GET_MODE_ALIGNMENT (mode)
1465           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1466           || bitpos % BITS_PER_UNIT != 0))
1467     return true;
1468
1469   return false;
1470 }
1471
1472 /* Return true if EXPR may be non-addressable.   */
1473
1474 static bool
1475 may_be_nonaddressable_p (tree expr)
1476 {
1477   switch (TREE_CODE (expr))
1478     {
1479     case COMPONENT_REF:
1480       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1481              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1482
1483     case ARRAY_REF:
1484     case ARRAY_RANGE_REF:
1485       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1486
1487     case VIEW_CONVERT_EXPR:
1488       /* This kind of view-conversions may wrap non-addressable objects
1489          and make them look addressable.  After some processing the
1490          non-addressability may be uncovered again, causing ADDR_EXPRs
1491          of inappropriate objects to be built.  */
1492       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1493              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1494
1495     default:
1496       break;
1497     }
1498
1499   return false;
1500 }
1501
1502 /* Finds addresses in *OP_P inside STMT.  */
1503
1504 static void
1505 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 {
1507   tree base = *op_p, step = NULL;
1508   struct iv *civ;
1509   struct ifs_ivopts_data ifs_ivopts_data;
1510
1511   /* Do not play with volatile memory references.  A bit too conservative,
1512      perhaps, but safe.  */
1513   if (stmt_ann (stmt)->has_volatile_ops)
1514     goto fail;
1515
1516   /* Ignore bitfields for now.  Not really something terribly complicated
1517      to handle.  TODO.  */
1518   if (TREE_CODE (base) == BIT_FIELD_REF)
1519     goto fail;
1520
1521   if (may_be_nonaddressable_p (base))
1522     goto fail;
1523
1524   if (STRICT_ALIGNMENT
1525       && may_be_unaligned_p (base))
1526     goto fail;
1527
1528   base = unshare_expr (base);
1529
1530   if (TREE_CODE (base) == TARGET_MEM_REF)
1531     {
1532       tree type = build_pointer_type (TREE_TYPE (base));
1533       tree astep;
1534
1535       if (TMR_BASE (base)
1536           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1537         {
1538           civ = get_iv (data, TMR_BASE (base));
1539           if (!civ)
1540             goto fail;
1541
1542           TMR_BASE (base) = civ->base;
1543           step = civ->step;
1544         }
1545       if (TMR_INDEX (base)
1546           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1547         {
1548           civ = get_iv (data, TMR_INDEX (base));
1549           if (!civ)
1550             goto fail;
1551
1552           TMR_INDEX (base) = civ->base;
1553           astep = civ->step;
1554
1555           if (astep)
1556             {
1557               if (TMR_STEP (base))
1558                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1559
1560               if (step)
1561                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1562               else
1563                 step = astep;
1564             }
1565         }
1566
1567       if (zero_p (step))
1568         goto fail;
1569       base = tree_mem_ref_addr (type, base);
1570     }
1571   else
1572     {
1573       ifs_ivopts_data.ivopts_data = data;
1574       ifs_ivopts_data.stmt = stmt;
1575       ifs_ivopts_data.step_p = &step;
1576       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1577           || zero_p (step))
1578         goto fail;
1579
1580       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1581       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1582
1583       base = build_fold_addr_expr (base);
1584
1585       /* Substituting bases of IVs into the base expression might
1586          have caused folding opportunities.  */
1587       if (TREE_CODE (base) == ADDR_EXPR)
1588         {
1589           tree *ref = &TREE_OPERAND (base, 0);
1590           while (handled_component_p (*ref))
1591             ref = &TREE_OPERAND (*ref, 0);
1592           if (TREE_CODE (*ref) == INDIRECT_REF)
1593             *ref = fold_indirect_ref (*ref);
1594         }
1595     }
1596
1597   civ = alloc_iv (base, step);
1598   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599   return;
1600
1601 fail:
1602   for_each_index (op_p, idx_record_use, data);
1603 }
1604
1605 /* Finds and records invariants used in STMT.  */
1606
1607 static void
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 {
1610   ssa_op_iter iter;
1611   use_operand_p use_p;
1612   tree op;
1613
1614   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615     {
1616       op = USE_FROM_PTR (use_p);
1617       record_invariant (data, op, false);
1618     }
1619 }
1620
1621 /* Finds interesting uses of induction variables in the statement STMT.  */
1622
1623 static void
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 {
1626   struct iv *iv;
1627   tree op, lhs, rhs;
1628   ssa_op_iter iter;
1629   use_operand_p use_p;
1630
1631   find_invariants_stmt (data, stmt);
1632
1633   if (TREE_CODE (stmt) == COND_EXPR)
1634     {
1635       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636       return;
1637     }
1638
1639   if (TREE_CODE (stmt) == MODIFY_EXPR)
1640     {
1641       lhs = TREE_OPERAND (stmt, 0);
1642       rhs = TREE_OPERAND (stmt, 1);
1643
1644       if (TREE_CODE (lhs) == SSA_NAME)
1645         {
1646           /* If the statement defines an induction variable, the uses are not
1647              interesting by themselves.  */
1648
1649           iv = get_iv (data, lhs);
1650
1651           if (iv && !zero_p (iv->step))
1652             return;
1653         }
1654
1655       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656         {
1657         case tcc_comparison:
1658           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659           return;
1660
1661         case tcc_reference:
1662           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663           if (REFERENCE_CLASS_P (lhs))
1664             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665           return;
1666
1667         default: ;
1668         }
1669
1670       if (REFERENCE_CLASS_P (lhs)
1671           && is_gimple_val (rhs))
1672         {
1673           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674           find_interesting_uses_op (data, rhs);
1675           return;
1676         }
1677
1678       /* TODO -- we should also handle address uses of type
1679
1680          memory = call (whatever);
1681
1682          and
1683
1684          call (memory).  */
1685     }
1686
1687   if (TREE_CODE (stmt) == PHI_NODE
1688       && bb_for_stmt (stmt) == data->current_loop->header)
1689     {
1690       lhs = PHI_RESULT (stmt);
1691       iv = get_iv (data, lhs);
1692
1693       if (iv && !zero_p (iv->step))
1694         return;
1695     }
1696
1697   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698     {
1699       op = USE_FROM_PTR (use_p);
1700
1701       if (TREE_CODE (op) != SSA_NAME)
1702         continue;
1703
1704       iv = get_iv (data, op);
1705       if (!iv)
1706         continue;
1707
1708       find_interesting_uses_op (data, op);
1709     }
1710 }
1711
1712 /* Finds interesting uses of induction variables outside of loops
1713    on loop exit edge EXIT.  */
1714
1715 static void
1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 {
1718   tree phi, def;
1719
1720   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721     {
1722       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723       find_interesting_uses_op (data, def);
1724     }
1725 }
1726
1727 /* Finds uses of the induction variables that are interesting.  */
1728
1729 static void
1730 find_interesting_uses (struct ivopts_data *data)
1731 {
1732   basic_block bb;
1733   block_stmt_iterator bsi;
1734   tree phi;
1735   basic_block *body = get_loop_body (data->current_loop);
1736   unsigned i;
1737   struct version_info *info;
1738   edge e;
1739
1740   if (dump_file && (dump_flags & TDF_DETAILS))
1741     fprintf (dump_file, "Uses:\n\n");
1742
1743   for (i = 0; i < data->current_loop->num_nodes; i++)
1744     {
1745       edge_iterator ei;
1746       bb = body[i];
1747
1748       FOR_EACH_EDGE (e, ei, bb->succs)
1749         if (e->dest != EXIT_BLOCK_PTR
1750             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751           find_interesting_uses_outside (data, e);
1752
1753       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754         find_interesting_uses_stmt (data, phi);
1755       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757     }
1758
1759   if (dump_file && (dump_flags & TDF_DETAILS))
1760     {
1761       bitmap_iterator bi;
1762
1763       fprintf (dump_file, "\n");
1764
1765       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766         {
1767           info = ver_info (data, i);
1768           if (info->inv_id)
1769             {
1770               fprintf (dump_file, "  ");
1771               print_generic_expr (dump_file, info->name, TDF_SLIM);
1772               fprintf (dump_file, " is invariant (%d)%s\n",
1773                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774             }
1775         }
1776
1777       fprintf (dump_file, "\n");
1778     }
1779
1780   free (body);
1781 }
1782
1783 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1784    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1785    we are at the top-level of the processed address.  */
1786
1787 static tree
1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789                 unsigned HOST_WIDE_INT *offset)
1790 {
1791   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792   enum tree_code code;
1793   tree type, orig_type = TREE_TYPE (expr);
1794   unsigned HOST_WIDE_INT off0, off1, st;
1795   tree orig_expr = expr;
1796
1797   STRIP_NOPS (expr);
1798
1799   type = TREE_TYPE (expr);
1800   code = TREE_CODE (expr);
1801   *offset = 0;
1802
1803   switch (code)
1804     {
1805     case INTEGER_CST:
1806       if (!cst_and_fits_in_hwi (expr)
1807           || zero_p (expr))
1808         return orig_expr;
1809
1810       *offset = int_cst_value (expr);
1811       return build_int_cst (orig_type, 0);
1812
1813     case PLUS_EXPR:
1814     case MINUS_EXPR:
1815       op0 = TREE_OPERAND (expr, 0);
1816       op1 = TREE_OPERAND (expr, 1);
1817
1818       op0 = strip_offset_1 (op0, false, false, &off0);
1819       op1 = strip_offset_1 (op1, false, false, &off1);
1820
1821       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822       if (op0 == TREE_OPERAND (expr, 0)
1823           && op1 == TREE_OPERAND (expr, 1))
1824         return orig_expr;
1825
1826       if (zero_p (op1))
1827         expr = op0;
1828       else if (zero_p (op0))
1829         {
1830           if (code == PLUS_EXPR)
1831             expr = op1;
1832           else
1833             expr = fold_build1 (NEGATE_EXPR, type, op1);
1834         }
1835       else
1836         expr = fold_build2 (code, type, op0, op1);
1837
1838       return fold_convert (orig_type, expr);
1839
1840     case ARRAY_REF:
1841       if (!inside_addr)
1842         return orig_expr;
1843
1844       step = array_ref_element_size (expr);
1845       if (!cst_and_fits_in_hwi (step))
1846         break;
1847
1848       st = int_cst_value (step);
1849       op1 = TREE_OPERAND (expr, 1);
1850       op1 = strip_offset_1 (op1, false, false, &off1);
1851       *offset = off1 * st;
1852
1853       if (top_compref
1854           && zero_p (op1))
1855         {
1856           /* Strip the component reference completely.  */
1857           op0 = TREE_OPERAND (expr, 0);
1858           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859           *offset += off0;
1860           return op0;
1861         }
1862       break;
1863
1864     case COMPONENT_REF:
1865       if (!inside_addr)
1866         return orig_expr;
1867
1868       tmp = component_ref_field_offset (expr);
1869       if (top_compref
1870           && cst_and_fits_in_hwi (tmp))
1871         {
1872           /* Strip the component reference completely.  */
1873           op0 = TREE_OPERAND (expr, 0);
1874           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875           *offset = off0 + int_cst_value (tmp);
1876           return op0;
1877         }
1878       break;
1879
1880     case ADDR_EXPR:
1881       op0 = TREE_OPERAND (expr, 0);
1882       op0 = strip_offset_1 (op0, true, true, &off0);
1883       *offset += off0;
1884
1885       if (op0 == TREE_OPERAND (expr, 0))
1886         return orig_expr;
1887
1888       expr = build_fold_addr_expr (op0);
1889       return fold_convert (orig_type, expr);
1890
1891     case INDIRECT_REF:
1892       inside_addr = false;
1893       break;
1894
1895     default:
1896       return orig_expr;
1897     }
1898
1899   /* Default handling of expressions for that we want to recurse into
1900      the first operand.  */
1901   op0 = TREE_OPERAND (expr, 0);
1902   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903   *offset += off0;
1904
1905   if (op0 == TREE_OPERAND (expr, 0)
1906       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907     return orig_expr;
1908
1909   expr = copy_node (expr);
1910   TREE_OPERAND (expr, 0) = op0;
1911   if (op1)
1912     TREE_OPERAND (expr, 1) = op1;
1913
1914   /* Inside address, we might strip the top level component references,
1915      thus changing type of the expression.  Handling of ADDR_EXPR
1916      will fix that.  */
1917   expr = fold_convert (orig_type, expr);
1918
1919   return expr;
1920 }
1921
1922 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1923
1924 static tree
1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 {
1927   return strip_offset_1 (expr, false, false, offset);
1928 }
1929
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931    We return unsigned type with the same precision, which avoids problems
1932    with overflows.  */
1933
1934 static tree
1935 generic_type_for (tree type)
1936 {
1937   if (POINTER_TYPE_P (type))
1938     return unsigned_type_for (type);
1939
1940   if (TYPE_UNSIGNED (type))
1941     return type;
1942
1943   return unsigned_type_for (type);
1944 }
1945
1946 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1947    the bitmap to that we should store it.  */
1948
1949 static struct ivopts_data *fd_ivopts_data;
1950 static tree
1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 {
1953   bitmap *depends_on = data;
1954   struct version_info *info;
1955
1956   if (TREE_CODE (*expr_p) != SSA_NAME)
1957     return NULL_TREE;
1958   info = name_info (fd_ivopts_data, *expr_p);
1959
1960   if (!info->inv_id || info->has_nonlin_use)
1961     return NULL_TREE;
1962
1963   if (!*depends_on)
1964     *depends_on = BITMAP_ALLOC (NULL);
1965   bitmap_set_bit (*depends_on, info->inv_id);
1966
1967   return NULL_TREE;
1968 }
1969
1970 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1971    position to POS.  If USE is not NULL, the candidate is set as related to
1972    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1973    replacement of the final value of the iv by a direct computation.  */
1974
1975 static struct iv_cand *
1976 add_candidate_1 (struct ivopts_data *data,
1977                  tree base, tree step, bool important, enum iv_position pos,
1978                  struct iv_use *use, tree incremented_at)
1979 {
1980   unsigned i;
1981   struct iv_cand *cand = NULL;
1982   tree type, orig_type;
1983
1984   if (base)
1985     {
1986       orig_type = TREE_TYPE (base);
1987       type = generic_type_for (orig_type);
1988       if (type != orig_type)
1989         {
1990           base = fold_convert (type, base);
1991           if (step)
1992             step = fold_convert (type, step);
1993         }
1994     }
1995
1996   for (i = 0; i < n_iv_cands (data); i++)
1997     {
1998       cand = iv_cand (data, i);
1999
2000       if (cand->pos != pos)
2001         continue;
2002
2003       if (cand->incremented_at != incremented_at)
2004         continue;
2005
2006       if (!cand->iv)
2007         {
2008           if (!base && !step)
2009             break;
2010
2011           continue;
2012         }
2013
2014       if (!base && !step)
2015         continue;
2016
2017       if (!operand_equal_p (base, cand->iv->base, 0))
2018         continue;
2019
2020       if (zero_p (cand->iv->step))
2021         {
2022           if (zero_p (step))
2023             break;
2024         }
2025       else
2026         {
2027           if (step && operand_equal_p (step, cand->iv->step, 0))
2028             break;
2029         }
2030     }
2031
2032   if (i == n_iv_cands (data))
2033     {
2034       cand = XCNEW (struct iv_cand);
2035       cand->id = i;
2036
2037       if (!base && !step)
2038         cand->iv = NULL;
2039       else
2040         cand->iv = alloc_iv (base, step);
2041
2042       cand->pos = pos;
2043       if (pos != IP_ORIGINAL && cand->iv)
2044         {
2045           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046           cand->var_after = cand->var_before;
2047         }
2048       cand->important = important;
2049       cand->incremented_at = incremented_at;
2050       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051
2052       if (step
2053           && TREE_CODE (step) != INTEGER_CST)
2054         {
2055           fd_ivopts_data = data;
2056           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057         }
2058
2059       if (dump_file && (dump_flags & TDF_DETAILS))
2060         dump_cand (dump_file, cand);
2061     }
2062
2063   if (important && !cand->important)
2064     {
2065       cand->important = true;
2066       if (dump_file && (dump_flags & TDF_DETAILS))
2067         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068     }
2069
2070   if (use)
2071     {
2072       bitmap_set_bit (use->related_cands, i);
2073       if (dump_file && (dump_flags & TDF_DETAILS))
2074         fprintf (dump_file, "Candidate %d is related to use %d\n",
2075                  cand->id, use->id);
2076     }
2077
2078   return cand;
2079 }
2080
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2082    is allowed.
2083
2084    The purpose is to avoid splitting latch edge with a biv increment, thus
2085    creating a jump, possibly confusing other optimization passes and leaving
2086    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2087    is not available (so we do not have a better alternative), or if the latch
2088    edge is already nonempty.  */
2089
2090 static bool
2091 allow_ip_end_pos_p (struct loop *loop)
2092 {
2093   if (!ip_normal_pos (loop))
2094     return true;
2095
2096   if (!empty_block_p (ip_end_pos (loop)))
2097     return true;
2098
2099   return false;
2100 }
2101
2102 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2103    position to POS.  If USE is not NULL, the candidate is set as related to
2104    it.  The candidate computation is scheduled on all available positions.  */
2105
2106 static void
2107 add_candidate (struct ivopts_data *data,
2108                tree base, tree step, bool important, struct iv_use *use)
2109 {
2110   if (ip_normal_pos (data->current_loop))
2111     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112   if (ip_end_pos (data->current_loop)
2113       && allow_ip_end_pos_p (data->current_loop))
2114     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115 }
2116
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118    type with SIZE bits.  */
2119
2120 static void
2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122                                      unsigned int size)
2123 {
2124   tree type = lang_hooks.types.type_for_size (size, true);
2125   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126                  true, NULL);
2127 }
2128
2129 /* Adds standard iv candidates.  */
2130
2131 static void
2132 add_standard_iv_candidates (struct ivopts_data *data)
2133 {
2134   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135
2136   /* The same for a double-integer type if it is still fast enough.  */
2137   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139 }
2140
2141
2142 /* Adds candidates bases on the old induction variable IV.  */
2143
2144 static void
2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 {
2147   tree phi, def;
2148   struct iv_cand *cand;
2149
2150   add_candidate (data, iv->base, iv->step, true, NULL);
2151
2152   /* The same, but with initial value zero.  */
2153   add_candidate (data,
2154                  build_int_cst (TREE_TYPE (iv->base), 0),
2155                  iv->step, true, NULL);
2156
2157   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158   if (TREE_CODE (phi) == PHI_NODE)
2159     {
2160       /* Additionally record the possibility of leaving the original iv
2161          untouched.  */
2162       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163       cand = add_candidate_1 (data,
2164                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165                               SSA_NAME_DEF_STMT (def));
2166       cand->var_before = iv->ssa_name;
2167       cand->var_after = def;
2168     }
2169 }
2170
2171 /* Adds candidates based on the old induction variables.  */
2172
2173 static void
2174 add_old_ivs_candidates (struct ivopts_data *data)
2175 {
2176   unsigned i;
2177   struct iv *iv;
2178   bitmap_iterator bi;
2179
2180   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181     {
2182       iv = ver_info (data, i)->iv;
2183       if (iv && iv->biv_p && !zero_p (iv->step))
2184         add_old_iv_candidates (data, iv);
2185     }
2186 }
2187
2188 /* Adds candidates based on the value of the induction variable IV and USE.  */
2189
2190 static void
2191 add_iv_value_candidates (struct ivopts_data *data,
2192                          struct iv *iv, struct iv_use *use)
2193 {
2194   unsigned HOST_WIDE_INT offset;
2195   tree base;
2196
2197   add_candidate (data, iv->base, iv->step, false, use);
2198
2199   /* The same, but with initial value zero.  Make such variable important,
2200      since it is generic enough so that possibly many uses may be based
2201      on it.  */
2202   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203                  iv->step, true, use);
2204
2205   /* Third, try removing the constant offset.  */
2206   base = strip_offset (iv->base, &offset);
2207   if (offset)
2208     add_candidate (data, base, iv->step, false, use);
2209 }
2210
2211 /* Adds candidates based on the uses.  */
2212
2213 static void
2214 add_derived_ivs_candidates (struct ivopts_data *data)
2215 {
2216   unsigned i;
2217
2218   for (i = 0; i < n_iv_uses (data); i++)
2219     {
2220       struct iv_use *use = iv_use (data, i);
2221
2222       if (!use)
2223         continue;
2224
2225       switch (use->type)
2226         {
2227         case USE_NONLINEAR_EXPR:
2228         case USE_COMPARE:
2229         case USE_ADDRESS:
2230           /* Just add the ivs based on the value of the iv used here.  */
2231           add_iv_value_candidates (data, use->iv, use);
2232           break;
2233
2234         default:
2235           gcc_unreachable ();
2236         }
2237     }
2238 }
2239
2240 /* Record important candidates and add them to related_cands bitmaps
2241    if needed.  */
2242
2243 static void
2244 record_important_candidates (struct ivopts_data *data)
2245 {
2246   unsigned i;
2247   struct iv_use *use;
2248
2249   for (i = 0; i < n_iv_cands (data); i++)
2250     {
2251       struct iv_cand *cand = iv_cand (data, i);
2252
2253       if (cand->important)
2254         bitmap_set_bit (data->important_candidates, i);
2255     }
2256
2257   data->consider_all_candidates = (n_iv_cands (data)
2258                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2259
2260   if (data->consider_all_candidates)
2261     {
2262       /* We will not need "related_cands" bitmaps in this case,
2263          so release them to decrease peak memory consumption.  */
2264       for (i = 0; i < n_iv_uses (data); i++)
2265         {
2266           use = iv_use (data, i);
2267           BITMAP_FREE (use->related_cands);
2268         }
2269     }
2270   else
2271     {
2272       /* Add important candidates to the related_cands bitmaps.  */
2273       for (i = 0; i < n_iv_uses (data); i++)
2274         bitmap_ior_into (iv_use (data, i)->related_cands,
2275                          data->important_candidates);
2276     }
2277 }
2278
2279 /* Finds the candidates for the induction variables.  */
2280
2281 static void
2282 find_iv_candidates (struct ivopts_data *data)
2283 {
2284   /* Add commonly used ivs.  */
2285   add_standard_iv_candidates (data);
2286
2287   /* Add old induction variables.  */
2288   add_old_ivs_candidates (data);
2289
2290   /* Add induction variables derived from uses.  */
2291   add_derived_ivs_candidates (data);
2292
2293   /* Record the important candidates.  */
2294   record_important_candidates (data);
2295 }
2296
2297 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2298    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2299    we allocate a simple list to every use.  */
2300
2301 static void
2302 alloc_use_cost_map (struct ivopts_data *data)
2303 {
2304   unsigned i, size, s, j;
2305
2306   for (i = 0; i < n_iv_uses (data); i++)
2307     {
2308       struct iv_use *use = iv_use (data, i);
2309       bitmap_iterator bi;
2310
2311       if (data->consider_all_candidates)
2312         size = n_iv_cands (data);
2313       else
2314         {
2315           s = 0;
2316           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2317             {
2318               s++;
2319             }
2320
2321           /* Round up to the power of two, so that moduling by it is fast.  */
2322           for (size = 1; size < s; size <<= 1)
2323             continue;
2324         }
2325
2326       use->n_map_members = size;
2327       use->cost_map = XCNEWVEC (struct cost_pair, size);
2328     }
2329 }
2330
2331 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2332    on invariants DEPENDS_ON and that the value used in expressing it
2333    is VALUE.*/
2334
2335 static void
2336 set_use_iv_cost (struct ivopts_data *data,
2337                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2338                  bitmap depends_on, tree value)
2339 {
2340   unsigned i, s;
2341
2342   if (cost == INFTY)
2343     {
2344       BITMAP_FREE (depends_on);
2345       return;
2346     }
2347
2348   if (data->consider_all_candidates)
2349     {
2350       use->cost_map[cand->id].cand = cand;
2351       use->cost_map[cand->id].cost = cost;
2352       use->cost_map[cand->id].depends_on = depends_on;
2353       use->cost_map[cand->id].value = value;
2354       return;
2355     }
2356
2357   /* n_map_members is a power of two, so this computes modulo.  */
2358   s = cand->id & (use->n_map_members - 1);
2359   for (i = s; i < use->n_map_members; i++)
2360     if (!use->cost_map[i].cand)
2361       goto found;
2362   for (i = 0; i < s; i++)
2363     if (!use->cost_map[i].cand)
2364       goto found;
2365
2366   gcc_unreachable ();
2367
2368 found:
2369   use->cost_map[i].cand = cand;
2370   use->cost_map[i].cost = cost;
2371   use->cost_map[i].depends_on = depends_on;
2372   use->cost_map[i].value = value;
2373 }
2374
2375 /* Gets cost of (USE, CANDIDATE) pair.  */
2376
2377 static struct cost_pair *
2378 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2379                  struct iv_cand *cand)
2380 {
2381   unsigned i, s;
2382   struct cost_pair *ret;
2383
2384   if (!cand)
2385     return NULL;
2386
2387   if (data->consider_all_candidates)
2388     {
2389       ret = use->cost_map + cand->id;
2390       if (!ret->cand)
2391         return NULL;
2392
2393       return ret;
2394     }
2395
2396   /* n_map_members is a power of two, so this computes modulo.  */
2397   s = cand->id & (use->n_map_members - 1);
2398   for (i = s; i < use->n_map_members; i++)
2399     if (use->cost_map[i].cand == cand)
2400       return use->cost_map + i;
2401
2402   for (i = 0; i < s; i++)
2403     if (use->cost_map[i].cand == cand)
2404       return use->cost_map + i;
2405
2406   return NULL;
2407 }
2408
2409 /* Returns estimate on cost of computing SEQ.  */
2410
2411 static unsigned
2412 seq_cost (rtx seq)
2413 {
2414   unsigned cost = 0;
2415   rtx set;
2416
2417   for (; seq; seq = NEXT_INSN (seq))
2418     {
2419       set = single_set (seq);
2420       if (set)
2421         cost += rtx_cost (set, SET);
2422       else
2423         cost++;
2424     }
2425
2426   return cost;
2427 }
2428
2429 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2430 static rtx
2431 produce_memory_decl_rtl (tree obj, int *regno)
2432 {
2433   rtx x;
2434
2435   gcc_assert (obj);
2436   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2437     {
2438       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2439       x = gen_rtx_SYMBOL_REF (Pmode, name);
2440     }
2441   else
2442     x = gen_raw_REG (Pmode, (*regno)++);
2443
2444   return gen_rtx_MEM (DECL_MODE (obj), x);
2445 }
2446
2447 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2448    walk_tree.  DATA contains the actual fake register number.  */
2449
2450 static tree
2451 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2452 {
2453   tree obj = NULL_TREE;
2454   rtx x = NULL_RTX;
2455   int *regno = data;
2456
2457   switch (TREE_CODE (*expr_p))
2458     {
2459     case ADDR_EXPR:
2460       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2461            handled_component_p (*expr_p);
2462            expr_p = &TREE_OPERAND (*expr_p, 0))
2463         continue;
2464       obj = *expr_p;
2465       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2466         x = produce_memory_decl_rtl (obj, regno);
2467       break;
2468
2469     case SSA_NAME:
2470       *ws = 0;
2471       obj = SSA_NAME_VAR (*expr_p);
2472       if (!DECL_RTL_SET_P (obj))
2473         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2474       break;
2475
2476     case VAR_DECL:
2477     case PARM_DECL:
2478     case RESULT_DECL:
2479       *ws = 0;
2480       obj = *expr_p;
2481
2482       if (DECL_RTL_SET_P (obj))
2483         break;
2484
2485       if (DECL_MODE (obj) == BLKmode)
2486         x = produce_memory_decl_rtl (obj, regno);
2487       else
2488         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2489
2490       break;
2491
2492     default:
2493       break;
2494     }
2495
2496   if (x)
2497     {
2498       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2499       SET_DECL_RTL (obj, x);
2500     }
2501
2502   return NULL_TREE;
2503 }
2504
2505 /* Determines cost of the computation of EXPR.  */
2506
2507 static unsigned
2508 computation_cost (tree expr)
2509 {
2510   rtx seq, rslt;
2511   tree type = TREE_TYPE (expr);
2512   unsigned cost;
2513   /* Avoid using hard regs in ways which may be unsupported.  */
2514   int regno = LAST_VIRTUAL_REGISTER + 1;
2515
2516   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2517   start_sequence ();
2518   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2519   seq = get_insns ();
2520   end_sequence ();
2521
2522   cost = seq_cost (seq);
2523   if (MEM_P (rslt))
2524     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2525
2526   return cost;
2527 }
2528
2529 /* Returns variable containing the value of candidate CAND at statement AT.  */
2530
2531 static tree
2532 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2533 {
2534   if (stmt_after_increment (loop, cand, stmt))
2535     return cand->var_after;
2536   else
2537     return cand->var_before;
2538 }
2539
2540 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2541    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2542
2543 int
2544 tree_int_cst_sign_bit (tree t)
2545 {
2546   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2547   unsigned HOST_WIDE_INT w;
2548
2549   if (bitno < HOST_BITS_PER_WIDE_INT)
2550     w = TREE_INT_CST_LOW (t);
2551   else
2552     {
2553       w = TREE_INT_CST_HIGH (t);
2554       bitno -= HOST_BITS_PER_WIDE_INT;
2555     }
2556
2557   return (w >> bitno) & 1;
2558 }
2559
2560 /* If we can prove that TOP = cst * BOT for some constant cst,
2561    store cst to MUL and return true.  Otherwise return false.
2562    The returned value is always sign-extended, regardless of the
2563    signedness of TOP and BOT.  */
2564
2565 static bool
2566 constant_multiple_of (tree top, tree bot, double_int *mul)
2567 {
2568   tree mby;
2569   enum tree_code code;
2570   double_int res, p0, p1;
2571   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2572
2573   STRIP_NOPS (top);
2574   STRIP_NOPS (bot);
2575
2576   if (operand_equal_p (top, bot, 0))
2577     {
2578       *mul = double_int_one;
2579       return true;
2580     }
2581
2582   code = TREE_CODE (top);
2583   switch (code)
2584     {
2585     case MULT_EXPR:
2586       mby = TREE_OPERAND (top, 1);
2587       if (TREE_CODE (mby) != INTEGER_CST)
2588         return false;
2589
2590       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2591         return false;
2592
2593       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2594                               precision);
2595       return true;
2596
2597     case PLUS_EXPR:
2598     case MINUS_EXPR:
2599       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2600           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2601         return false;
2602
2603       if (code == MINUS_EXPR)
2604         p1 = double_int_neg (p1);
2605       *mul = double_int_sext (double_int_add (p0, p1), precision);
2606       return true;
2607
2608     case INTEGER_CST:
2609       if (TREE_CODE (bot) != INTEGER_CST)
2610         return false;
2611
2612       p0 = double_int_sext (tree_to_double_int (bot), precision);
2613       p1 = double_int_sext (tree_to_double_int (top), precision);
2614       if (double_int_zero_p (p1))
2615         return false;
2616       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2617                               precision);
2618       return double_int_zero_p (res);
2619
2620     default:
2621       return false;
2622     }
2623 }
2624
2625 /* Sets COMB to CST.  */
2626
2627 static void
2628 aff_combination_const (struct affine_tree_combination *comb, tree type,
2629                        unsigned HOST_WIDE_INT cst)
2630 {
2631   unsigned prec = TYPE_PRECISION (type);
2632
2633   comb->type = type;
2634   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2635
2636   comb->n = 0;
2637   comb->rest = NULL_TREE;
2638   comb->offset = cst & comb->mask;
2639 }
2640
2641 /* Sets COMB to single element ELT.  */
2642
2643 static void
2644 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2645 {
2646   unsigned prec = TYPE_PRECISION (type);
2647
2648   comb->type = type;
2649   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2650
2651   comb->n = 1;
2652   comb->elts[0] = elt;
2653   comb->coefs[0] = 1;
2654   comb->rest = NULL_TREE;
2655   comb->offset = 0;
2656 }
2657
2658 /* Scales COMB by SCALE.  */
2659
2660 static void
2661 aff_combination_scale (struct affine_tree_combination *comb,
2662                        unsigned HOST_WIDE_INT scale)
2663 {
2664   unsigned i, j;
2665
2666   if (scale == 1)
2667     return;
2668
2669   if (scale == 0)
2670     {
2671       aff_combination_const (comb, comb->type, 0);
2672       return;
2673     }
2674
2675   comb->offset = (scale * comb->offset) & comb->mask;
2676   for (i = 0, j = 0; i < comb->n; i++)
2677     {
2678       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2679       comb->elts[j] = comb->elts[i];
2680       if (comb->coefs[j] != 0)
2681         j++;
2682     }
2683   comb->n = j;
2684
2685   if (comb->rest)
2686     {
2687       if (comb->n < MAX_AFF_ELTS)
2688         {
2689           comb->coefs[comb->n] = scale;
2690           comb->elts[comb->n] = comb->rest;
2691           comb->rest = NULL_TREE;
2692           comb->n++;
2693         }
2694       else
2695         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2696                                   build_int_cst_type (comb->type, scale));
2697     }
2698 }
2699
2700 /* Adds ELT * SCALE to COMB.  */
2701
2702 static void
2703 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2704                          unsigned HOST_WIDE_INT scale)
2705 {
2706   unsigned i;
2707
2708   if (scale == 0)
2709     return;
2710
2711   for (i = 0; i < comb->n; i++)
2712     if (operand_equal_p (comb->elts[i], elt, 0))
2713       {
2714         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2715         if (comb->coefs[i])
2716           return;
2717
2718         comb->n--;
2719         comb->coefs[i] = comb->coefs[comb->n];
2720         comb->elts[i] = comb->elts[comb->n];
2721
2722         if (comb->rest)
2723           {
2724             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2725             comb->coefs[comb->n] = 1;
2726             comb->elts[comb->n] = comb->rest;
2727             comb->rest = NULL_TREE;
2728             comb->n++;
2729           }
2730         return;
2731       }
2732   if (comb->n < MAX_AFF_ELTS)
2733     {
2734       comb->coefs[comb->n] = scale;
2735       comb->elts[comb->n] = elt;
2736       comb->n++;
2737       return;
2738     }
2739
2740   if (scale == 1)
2741     elt = fold_convert (comb->type, elt);
2742   else
2743     elt = fold_build2 (MULT_EXPR, comb->type,
2744                        fold_convert (comb->type, elt),
2745                        build_int_cst_type (comb->type, scale));
2746
2747   if (comb->rest)
2748     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2749   else
2750     comb->rest = elt;
2751 }
2752
2753 /* Adds COMB2 to COMB1.  */
2754
2755 static void
2756 aff_combination_add (struct affine_tree_combination *comb1,
2757                      struct affine_tree_combination *comb2)
2758 {
2759   unsigned i;
2760
2761   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2762   for (i = 0; i < comb2->n; i++)
2763     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2764   if (comb2->rest)
2765     aff_combination_add_elt (comb1, comb2->rest, 1);
2766 }
2767
2768 /* Convert COMB to TYPE.  */
2769
2770 static void
2771 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2772 {
2773   unsigned prec = TYPE_PRECISION (type);
2774   unsigned i;
2775
2776   /* If the precision of both types is the same, it suffices to change the type
2777      of the whole combination -- the elements are allowed to have another type
2778      equivalent wrto STRIP_NOPS.  */
2779   if (prec == TYPE_PRECISION (comb->type))
2780     {
2781       comb->type = type;
2782       return;
2783     }
2784
2785   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2786   comb->offset = comb->offset & comb->mask;
2787
2788   /* The type of the elements can be different from comb->type only as
2789      much as what STRIP_NOPS would remove.  We can just directly cast
2790      to TYPE.  */
2791   for (i = 0; i < comb->n; i++)
2792     comb->elts[i] = fold_convert (type, comb->elts[i]);
2793   if (comb->rest)
2794     comb->rest = fold_convert (type, comb->rest);
2795
2796   comb->type = type;
2797 }
2798
2799 /* Splits EXPR into an affine combination of parts.  */
2800
2801 static void
2802 tree_to_aff_combination (tree expr, tree type,
2803                          struct affine_tree_combination *comb)
2804 {
2805   struct affine_tree_combination tmp;
2806   enum tree_code code;
2807   tree cst, core, toffset;
2808   HOST_WIDE_INT bitpos, bitsize;
2809   enum machine_mode mode;
2810   int unsignedp, volatilep;
2811
2812   STRIP_NOPS (expr);
2813
2814   code = TREE_CODE (expr);
2815   switch (code)
2816     {
2817     case INTEGER_CST:
2818       aff_combination_const (comb, type, int_cst_value (expr));
2819       return;
2820
2821     case PLUS_EXPR:
2822     case MINUS_EXPR:
2823       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825       if (code == MINUS_EXPR)
2826         aff_combination_scale (&tmp, -1);
2827       aff_combination_add (comb, &tmp);
2828       return;
2829
2830     case MULT_EXPR:
2831       cst = TREE_OPERAND (expr, 1);
2832       if (TREE_CODE (cst) != INTEGER_CST)
2833         break;
2834       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835       aff_combination_scale (comb, int_cst_value (cst));
2836       return;
2837
2838     case NEGATE_EXPR:
2839       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840       aff_combination_scale (comb, -1);
2841       return;
2842
2843     case ADDR_EXPR:
2844       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845                                   &toffset, &mode, &unsignedp, &volatilep,
2846                                   false);
2847       if (bitpos % BITS_PER_UNIT != 0)
2848         break;
2849       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850       core = build_fold_addr_expr (core);
2851       if (TREE_CODE (core) == ADDR_EXPR)
2852         aff_combination_add_elt (comb, core, 1);
2853       else
2854         {
2855           tree_to_aff_combination (core, type, &tmp);
2856           aff_combination_add (comb, &tmp);
2857         }
2858       if (toffset)
2859         {
2860           tree_to_aff_combination (toffset, type, &tmp);
2861           aff_combination_add (comb, &tmp);
2862         }
2863       return;
2864
2865     default:
2866       break;
2867     }
2868
2869   aff_combination_elt (comb, type, expr);
2870 }
2871
2872 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2873
2874 static tree
2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876                  unsigned HOST_WIDE_INT mask)
2877 {
2878   enum tree_code code;
2879
2880   scale &= mask;
2881   elt = fold_convert (type, elt);
2882
2883   if (scale == 1)
2884     {
2885       if (!expr)
2886         return elt;
2887
2888       return fold_build2 (PLUS_EXPR, type, expr, elt);
2889     }
2890
2891   if (scale == mask)
2892     {
2893       if (!expr)
2894         return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896       return fold_build2 (MINUS_EXPR, type, expr, elt);
2897     }
2898
2899   if (!expr)
2900     return fold_build2 (MULT_EXPR, type, elt,
2901                         build_int_cst_type (type, scale));
2902
2903   if ((scale | (mask >> 1)) == mask)
2904     {
2905       /* Scale is negative.  */
2906       code = MINUS_EXPR;
2907       scale = (-scale) & mask;
2908     }
2909   else
2910     code = PLUS_EXPR;
2911
2912   elt = fold_build2 (MULT_EXPR, type, elt,
2913                      build_int_cst_type (type, scale));
2914   return fold_build2 (code, type, expr, elt);
2915 }
2916
2917 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2918
2919 static void
2920 unshare_aff_combination (struct affine_tree_combination *comb)
2921 {
2922   unsigned i;
2923
2924   for (i = 0; i < comb->n; i++)
2925     comb->elts[i] = unshare_expr (comb->elts[i]);
2926   if (comb->rest)
2927     comb->rest = unshare_expr (comb->rest);
2928 }
2929
2930 /* Makes tree from the affine combination COMB.  */
2931
2932 static tree
2933 aff_combination_to_tree (struct affine_tree_combination *comb)
2934 {
2935   tree type = comb->type;
2936   tree expr = comb->rest;
2937   unsigned i;
2938   unsigned HOST_WIDE_INT off, sgn;
2939
2940   if (comb->n == 0 && comb->offset == 0)
2941     {
2942       if (expr)
2943         {
2944           /* Handle the special case produced by get_computation_aff when
2945              the type does not fit in HOST_WIDE_INT.  */
2946           return fold_convert (type, expr);
2947         }
2948       else
2949         return build_int_cst (type, 0);
2950     }
2951
2952   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2953
2954   for (i = 0; i < comb->n; i++)
2955     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2956                             comb->mask);
2957
2958   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2959     {
2960       /* Offset is negative.  */
2961       off = (-comb->offset) & comb->mask;
2962       sgn = comb->mask;
2963     }
2964   else
2965     {
2966       off = comb->offset;
2967       sgn = 1;
2968     }
2969   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2970                           comb->mask);
2971 }
2972
2973 /* Folds EXPR using the affine expressions framework.  */
2974
2975 static tree
2976 fold_affine_expr (tree expr)
2977 {
2978   tree type = TREE_TYPE (expr);
2979   struct affine_tree_combination comb;
2980
2981   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2982     return expr;
2983
2984   tree_to_aff_combination (expr, type, &comb);
2985   return aff_combination_to_tree (&comb);
2986 }
2987
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989    same precision that is at least as wide as the precision of TYPE, stores
2990    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2991    type of A and B.  */
2992
2993 static tree
2994 determine_common_wider_type (tree *a, tree *b)
2995 {
2996   tree wider_type = NULL;
2997   tree suba, subb;
2998   tree atype = TREE_TYPE (*a);
2999
3000   if ((TREE_CODE (*a) == NOP_EXPR
3001        || TREE_CODE (*a) == CONVERT_EXPR))
3002     {
3003       suba = TREE_OPERAND (*a, 0);
3004       wider_type = TREE_TYPE (suba);
3005       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3006         return atype;
3007     }
3008   else
3009     return atype;
3010
3011   if ((TREE_CODE (*b) == NOP_EXPR
3012        || TREE_CODE (*b) == CONVERT_EXPR))
3013     {
3014       subb = TREE_OPERAND (*b, 0);
3015       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3016         return atype;
3017     }
3018   else
3019     return atype;
3020
3021   *a = suba;
3022   *b = subb;
3023   return wider_type;
3024 }
3025
3026 /* Determines the expression by that USE is expressed from induction variable
3027    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3028    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3029
3030 static bool
3031 get_computation_aff (struct loop *loop,
3032                      struct iv_use *use, struct iv_cand *cand, tree at,
3033                      struct affine_tree_combination *aff)
3034 {
3035   tree ubase = use->iv->base;
3036   tree ustep = use->iv->step;
3037   tree cbase = cand->iv->base;
3038   tree cstep = cand->iv->step;
3039   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3040   tree common_type;
3041   tree uutype;
3042   tree expr, delta;
3043   tree ratio;
3044   unsigned HOST_WIDE_INT ustepi, cstepi;
3045   HOST_WIDE_INT ratioi;
3046   struct affine_tree_combination cbase_aff, expr_aff;
3047   tree cstep_orig = cstep, ustep_orig = ustep;
3048   double_int rat;
3049
3050   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3051     {
3052       /* We do not have a precision to express the values of use.  */
3053       return false;
3054     }
3055
3056   expr = var_at_stmt (loop, cand, at);
3057
3058   if (TREE_TYPE (expr) != ctype)
3059     {
3060       /* This may happen with the original ivs.  */
3061       expr = fold_convert (ctype, expr);
3062     }
3063
3064   if (TYPE_UNSIGNED (utype))
3065     uutype = utype;
3066   else
3067     {
3068       uutype = unsigned_type_for (utype);
3069       ubase = fold_convert (uutype, ubase);
3070       ustep = fold_convert (uutype, ustep);
3071     }
3072
3073   if (uutype != ctype)
3074     {
3075       expr = fold_convert (uutype, expr);
3076       cbase = fold_convert (uutype, cbase);
3077       cstep = fold_convert (uutype, cstep);
3078
3079       /* If the conversion is not noop, we must take it into account when
3080          considering the value of the step.  */
3081       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3082         cstep_orig = cstep;
3083     }
3084
3085   if (cst_and_fits_in_hwi (cstep_orig)
3086       && cst_and_fits_in_hwi (ustep_orig))
3087     {
3088       ustepi = int_cst_value (ustep_orig);
3089       cstepi = int_cst_value (cstep_orig);
3090
3091       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3092         {
3093           /* TODO maybe consider case when ustep divides cstep and the ratio is
3094              a power of 2 (so that the division is fast to execute)?  We would
3095              need to be much more careful with overflows etc. then.  */
3096           return false;
3097         }
3098
3099       ratio = build_int_cst_type (uutype, ratioi);
3100     }
3101   else
3102     {
3103       if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3104         return false;
3105       ratio = double_int_to_tree (uutype, rat);
3106
3107       /* Ratioi is only used to detect special cases when the multiplicative
3108          factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3109          set it to 0.  */
3110       if (double_int_fits_in_shwi_p (rat))
3111         ratioi = double_int_to_shwi (rat);
3112       else
3113         ratioi = 0;
3114     }
3115
3116   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3117      type, we achieve better folding by computing their difference in this
3118      wider type, and cast the result to UUTYPE.  We do not need to worry about
3119      overflows, as all the arithmetics will in the end be performed in UUTYPE
3120      anyway.  */
3121   common_type = determine_common_wider_type (&ubase, &cbase);
3122
3123   /* We may need to shift the value if we are after the increment.  */
3124   if (stmt_after_increment (loop, cand, at))
3125     {
3126       if (uutype != common_type)
3127         cstep = fold_convert (common_type, cstep);
3128       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3129     }
3130
3131   /* use = ubase - ratio * cbase + ratio * var.
3132
3133      In general case ubase + ratio * (var - cbase) could be better (one less
3134      multiplication), but often it is possible to eliminate redundant parts
3135      of computations from (ubase - ratio * cbase) term, and if it does not
3136      happen, fold is able to apply the distributive law to obtain this form
3137      anyway.  */
3138
3139   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3140     {
3141       /* Let's compute in trees and just return the result in AFF.  This case
3142          should not be very common, and fold itself is not that bad either,
3143          so making the aff. functions more complicated to handle this case
3144          is not that urgent.  */
3145       if (ratioi == 1)
3146         {
3147           delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3148           if (uutype != common_type)
3149             delta = fold_convert (uutype, delta);
3150           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3151         }
3152       else if (ratioi == -1)
3153         {
3154           delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3155           if (uutype != common_type)
3156             delta = fold_convert (uutype, delta);
3157           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3158         }
3159       else
3160         {
3161           delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3162           delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3163           if (uutype != common_type)
3164             delta = fold_convert (uutype, delta);
3165           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3166           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3167         }
3168
3169       aff->type = uutype;
3170       aff->n = 0;
3171       aff->offset = 0;
3172       aff->mask = 0;
3173       aff->rest = expr;
3174       return true;
3175     }
3176
3177   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3178      possible to compute ratioi.  */
3179   gcc_assert (ratioi);
3180
3181   tree_to_aff_combination (ubase, common_type, aff);
3182   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3183   tree_to_aff_combination (expr, uutype, &expr_aff);
3184   aff_combination_scale (&cbase_aff, -ratioi);
3185   aff_combination_scale (&expr_aff, ratioi);
3186   aff_combination_add (aff, &cbase_aff);
3187   if (common_type != uutype)
3188     aff_combination_convert (uutype, aff);
3189   aff_combination_add (aff, &expr_aff);
3190
3191   return true;
3192 }
3193
3194 /* Determines the expression by that USE is expressed from induction variable
3195    CAND at statement AT in LOOP.  The computation is unshared.  */
3196
3197 static tree
3198 get_computation_at (struct loop *loop,
3199                     struct iv_use *use, struct iv_cand *cand, tree at)
3200 {
3201   struct affine_tree_combination aff;
3202   tree type = TREE_TYPE (use->iv->base);
3203
3204   if (!get_computation_aff (loop, use, cand, at, &aff))
3205     return NULL_TREE;
3206   unshare_aff_combination (&aff);
3207   return fold_convert (type, aff_combination_to_tree (&aff));
3208 }
3209
3210 /* Determines the expression by that USE is expressed from induction variable
3211    CAND in LOOP.  The computation is unshared.  */
3212
3213 static tree
3214 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3215 {
3216   return get_computation_at (loop, use, cand, use->stmt);
3217 }
3218
3219 /* Returns cost of addition in MODE.  */
3220
3221 static unsigned
3222 add_cost (enum machine_mode mode)
3223 {
3224   static unsigned costs[NUM_MACHINE_MODES];
3225   rtx seq;
3226   unsigned cost;
3227
3228   if (costs[mode])
3229     return costs[mode];
3230
3231   start_sequence ();
3232   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3233                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3234                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3235                  NULL_RTX);
3236   seq = get_insns ();
3237   end_sequence ();
3238
3239   cost = seq_cost (seq);
3240   if (!cost)
3241     cost = 1;
3242
3243   costs[mode] = cost;
3244
3245   if (dump_file && (dump_flags & TDF_DETAILS))
3246     fprintf (dump_file, "Addition in %s costs %d\n",
3247              GET_MODE_NAME (mode), cost);
3248   return cost;
3249 }
3250
3251 /* Entry in a hashtable of already known costs for multiplication.  */
3252 struct mbc_entry
3253 {
3254   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3255   enum machine_mode mode;       /* In mode.  */
3256   unsigned cost;                /* The cost.  */
3257 };
3258
3259 /* Counts hash value for the ENTRY.  */
3260
3261 static hashval_t
3262 mbc_entry_hash (const void *entry)
3263 {
3264   const struct mbc_entry *e = entry;
3265
3266   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3267 }
3268
3269 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3270
3271 static int
3272 mbc_entry_eq (const void *entry1, const void *entry2)
3273 {
3274   const struct mbc_entry *e1 = entry1;
3275   const struct mbc_entry *e2 = entry2;
3276
3277   return (e1->mode == e2->mode
3278           && e1->cst == e2->cst);
3279 }
3280
3281 /* Returns cost of multiplication by constant CST in MODE.  */
3282
3283 unsigned
3284 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3285 {
3286   static htab_t costs;
3287   struct mbc_entry **cached, act;
3288   rtx seq;
3289   unsigned cost;
3290
3291   if (!costs)
3292     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3293
3294   act.mode = mode;
3295   act.cst = cst;
3296   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3297   if (*cached)
3298     return (*cached)->cost;
3299
3300   *cached = XNEW (struct mbc_entry);
3301   (*cached)->mode = mode;
3302   (*cached)->cst = cst;
3303
3304   start_sequence ();
3305   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3306                gen_int_mode (cst, mode), NULL_RTX, 0);
3307   seq = get_insns ();
3308   end_sequence ();
3309
3310   cost = seq_cost (seq);
3311
3312   if (dump_file && (dump_flags & TDF_DETAILS))
3313     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3314              (int) cst, GET_MODE_NAME (mode), cost);
3315
3316   (*cached)->cost = cost;
3317
3318   return cost;
3319 }
3320
3321 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3322    validity for a memory reference accessing memory of mode MODE.  */
3323
3324 bool
3325 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
3326 {
3327 #define MAX_RATIO 128
3328   static sbitmap valid_mult[MAX_MACHINE_MODE];
3329
3330   if (!valid_mult[mode])
3331     {
3332       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3333       rtx addr;
3334       HOST_WIDE_INT i;
3335
3336       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
3337       sbitmap_zero (valid_mult[mode]);
3338       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3339       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3340         {
3341           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3342           if (memory_address_p (mode, addr))
3343             SET_BIT (valid_mult[mode], i + MAX_RATIO);
3344         }
3345
3346       if (dump_file && (dump_flags & TDF_DETAILS))
3347         {
3348           fprintf (dump_file, "  allowed multipliers:");
3349           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3350             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
3351               fprintf (dump_file, " %d", (int) i);
3352           fprintf (dump_file, "\n");
3353           fprintf (dump_file, "\n");
3354         }
3355     }
3356
3357   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3358     return false;
3359
3360   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3361 }
3362
3363 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3364    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3365    variable is omitted.  Compute the cost for a memory reference that accesses
3366    a memory location of mode MEM_MODE.
3367
3368    TODO -- there must be some better way.  This all is quite crude.  */
3369
3370 static unsigned
3371 get_address_cost (bool symbol_present, bool var_present,
3372                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3373                   enum machine_mode mem_mode)
3374 {
3375   static bool initialized[MAX_MACHINE_MODE];
3376   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3377   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3378   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3379   unsigned cost, acost;
3380   bool offset_p, ratio_p;
3381   HOST_WIDE_INT s_offset;
3382   unsigned HOST_WIDE_INT mask;
3383   unsigned bits;
3384
3385   if (!initialized[mem_mode])
3386     {
3387       HOST_WIDE_INT i;
3388       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3389       int old_cse_not_expected;
3390       unsigned sym_p, var_p, off_p, rat_p, add_c;
3391       rtx seq, addr, base;
3392       rtx reg0, reg1;
3393
3394       initialized[mem_mode] = true;
3395
3396       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3397
3398       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3399       for (i = start; i <= 1 << 20; i <<= 1)
3400         {
3401           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3402           if (!memory_address_p (mem_mode, addr))
3403             break;
3404         }
3405       max_offset[mem_mode] = i == start ? 0 : i >> 1;
3406       off[mem_mode] = max_offset[mem_mode];
3407
3408       for (i = start; i <= 1 << 20; i <<= 1)
3409         {
3410           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3411           if (!memory_address_p (mem_mode, addr))
3412             break;
3413         }
3414       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3415
3416       if (dump_file && (dump_flags & TDF_DETAILS))
3417         {
3418           fprintf (dump_file, "get_address_cost:\n");
3419           fprintf (dump_file, "  min offset %s %d\n",
3420                    GET_MODE_NAME (mem_mode),
3421                    (int) min_offset[mem_mode]);
3422           fprintf (dump_file, "  max offset %s %d\n",
3423                    GET_MODE_NAME (mem_mode),
3424                    (int) max_offset[mem_mode]);
3425         }
3426
3427       rat[mem_mode] = 1;
3428       for (i = 2; i <= MAX_RATIO; i++)
3429         if (multiplier_allowed_in_address_p (i, mem_mode))
3430           {
3431             rat[mem_mode] = i;
3432             break;
3433           }
3434
3435       /* Compute the cost of various addressing modes.  */
3436       acost = 0;
3437       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3438       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3439
3440       for (i = 0; i < 16; i++)
3441         {
3442           sym_p = i & 1;
3443           var_p = (i >> 1) & 1;
3444           off_p = (i >> 2) & 1;
3445           rat_p = (i >> 3) & 1;
3446
3447           addr = reg0;
3448           if (rat_p)
3449             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3450                                    gen_int_mode (rat[mem_mode], Pmode));
3451
3452           if (var_p)
3453             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3454
3455           if (sym_p)
3456             {
3457               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3458               if (off_p)
3459                 base = gen_rtx_fmt_e (CONST, Pmode,
3460                                       gen_rtx_fmt_ee (PLUS, Pmode,
3461                                                       base,
3462                                                       gen_int_mode (off[mem_mode],
3463                                                                     Pmode)));
3464             }
3465           else if (off_p)
3466             base = gen_int_mode (off[mem_mode], Pmode);
3467           else
3468             base = NULL_RTX;
3469
3470           if (base)
3471             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3472
3473           start_sequence ();
3474           /* To avoid splitting addressing modes, pretend that no cse will
3475              follow.  */
3476           old_cse_not_expected = cse_not_expected;
3477           cse_not_expected = true;
3478           addr = memory_address (mem_mode, addr);
3479           cse_not_expected = old_cse_not_expected;
3480           seq = get_insns ();
3481           end_sequence ();
3482
3483           acost = seq_cost (seq);
3484           acost += address_cost (addr, mem_mode);
3485
3486           if (!acost)
3487             acost = 1;
3488           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3489         }
3490
3491       /* On some targets, it is quite expensive to load symbol to a register,
3492          which makes addresses that contain symbols look much more expensive.
3493          However, the symbol will have to be loaded in any case before the
3494          loop (and quite likely we have it in register already), so it does not
3495          make much sense to penalize them too heavily.  So make some final
3496          tweaks for the SYMBOL_PRESENT modes:
3497
3498          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3499          var is cheaper, use this mode with small penalty.
3500          If VAR_PRESENT is true, try whether the mode with
3501          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3502          if this is the case, use it.  */
3503       add_c = add_cost (Pmode);
3504       for (i = 0; i < 8; i++)
3505         {
3506           var_p = i & 1;
3507           off_p = (i >> 1) & 1;
3508           rat_p = (i >> 2) & 1;
3509
3510           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3511           if (var_p)
3512             acost += add_c;
3513
3514           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3515             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3516         }
3517
3518       if (dump_file && (dump_flags & TDF_DETAILS))
3519         {
3520           fprintf (dump_file, "Address costs:\n");
3521
3522           for (i = 0; i < 16; i++)
3523             {
3524               sym_p = i & 1;
3525               var_p = (i >> 1) & 1;
3526               off_p = (i >> 2) & 1;
3527               rat_p = (i >> 3) & 1;
3528
3529               fprintf (dump_file, "  ");
3530               if (sym_p)
3531                 fprintf (dump_file, "sym + ");
3532               if (var_p)
3533                 fprintf (dump_file, "var + ");
3534               if (off_p)
3535                 fprintf (dump_file, "cst + ");
3536               if (rat_p)
3537                 fprintf (dump_file, "rat * ");
3538
3539               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3540               fprintf (dump_file, "index costs %d\n", acost);
3541             }
3542           fprintf (dump_file, "\n");
3543         }
3544     }
3545
3546   bits = GET_MODE_BITSIZE (Pmode);
3547   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3548   offset &= mask;
3549   if ((offset >> (bits - 1) & 1))
3550     offset |= ~mask;
3551   s_offset = offset;
3552
3553   cost = 0;
3554   offset_p = (s_offset != 0
3555               && min_offset[mem_mode] <= s_offset
3556               && s_offset <= max_offset[mem_mode]);
3557   ratio_p = (ratio != 1
3558              && multiplier_allowed_in_address_p (ratio, mem_mode));
3559
3560   if (ratio != 1 && !ratio_p)
3561     cost += multiply_by_cost (ratio, Pmode);
3562
3563   if (s_offset && !offset_p && !symbol_present)
3564     cost += add_cost (Pmode);
3565
3566   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3567   return cost + acost;
3568 }
3569
3570 /* Estimates cost of forcing expression EXPR into a variable.  */
3571
3572 unsigned
3573 force_expr_to_var_cost (tree expr)
3574 {
3575   static bool costs_initialized = false;
3576   static unsigned integer_cost;
3577   static unsigned symbol_cost;
3578   static unsigned address_cost;
3579   tree op0, op1;
3580   unsigned cost0, cost1, cost;
3581   enum machine_mode mode;
3582
3583   if (!costs_initialized)
3584     {
3585       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3586       rtx x = gen_rtx_MEM (DECL_MODE (var),
3587                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3588       tree addr;
3589       tree type = build_pointer_type (integer_type_node);
3590
3591       integer_cost = computation_cost (build_int_cst (integer_type_node,
3592                                                       2000));
3593
3594       SET_DECL_RTL (var, x);
3595       TREE_STATIC (var) = 1;
3596       addr = build1 (ADDR_EXPR, type, var);
3597       symbol_cost = computation_cost (addr) + 1;
3598
3599       address_cost
3600         = computation_cost (build2 (PLUS_EXPR, type,
3601                                     addr,
3602                                     build_int_cst (type, 2000))) + 1;
3603       if (dump_file && (dump_flags & TDF_DETAILS))
3604         {
3605           fprintf (dump_file, "force_expr_to_var_cost:\n");
3606           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3607           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3608           fprintf (dump_file, "  address %d\n", (int) address_cost);
3609           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3610           fprintf (dump_file, "\n");
3611         }
3612
3613       costs_initialized = true;
3614     }
3615
3616   STRIP_NOPS (expr);
3617
3618   if (SSA_VAR_P (expr))
3619     return 0;
3620
3621   if (TREE_INVARIANT (expr))
3622     {
3623       if (TREE_CODE (expr) == INTEGER_CST)
3624         return integer_cost;
3625
3626       if (TREE_CODE (expr) == ADDR_EXPR)
3627         {
3628           tree obj = TREE_OPERAND (expr, 0);
3629
3630           if (TREE_CODE (obj) == VAR_DECL
3631               || TREE_CODE (obj) == PARM_DECL
3632               || TREE_CODE (obj) == RESULT_DECL)
3633             return symbol_cost;
3634         }
3635
3636       return address_cost;
3637     }
3638
3639   switch (TREE_CODE (expr))
3640     {
3641     case PLUS_EXPR:
3642     case MINUS_EXPR:
3643     case MULT_EXPR:
3644       op0 = TREE_OPERAND (expr, 0);
3645       op1 = TREE_OPERAND (expr, 1);
3646       STRIP_NOPS (op0);
3647       STRIP_NOPS (op1);
3648
3649       if (is_gimple_val (op0))
3650         cost0 = 0;
3651       else
3652         cost0 = force_expr_to_var_cost (op0);
3653
3654       if (is_gimple_val (op1))
3655         cost1 = 0;
3656       else
3657         cost1 = force_expr_to_var_cost (op1);
3658
3659       break;
3660
3661     default:
3662       /* Just an arbitrary value, FIXME.  */
3663       return target_spill_cost;
3664     }
3665
3666   mode = TYPE_MODE (TREE_TYPE (expr));
3667   switch (TREE_CODE (expr))
3668     {
3669     case PLUS_EXPR:
3670     case MINUS_EXPR:
3671       cost = add_cost (mode);
3672       break;
3673
3674     case MULT_EXPR:
3675       if (cst_and_fits_in_hwi (op0))
3676         cost = multiply_by_cost (int_cst_value (op0), mode);
3677       else if (cst_and_fits_in_hwi (op1))
3678         cost = multiply_by_cost (int_cst_value (op1), mode);
3679       else
3680         return target_spill_cost;
3681       break;
3682
3683     default:
3684       gcc_unreachable ();
3685     }
3686
3687   cost += cost0;
3688   cost += cost1;
3689
3690   /* Bound the cost by target_spill_cost.  The parts of complicated
3691      computations often are either loop invariant or at least can
3692      be shared between several iv uses, so letting this grow without
3693      limits would not give reasonable results.  */
3694   return cost < target_spill_cost ? cost : target_spill_cost;
3695 }
3696
3697 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3698    invariants the computation depends on.  */
3699
3700 static unsigned
3701 force_var_cost (struct ivopts_data *data,
3702                 tree expr, bitmap *depends_on)
3703 {
3704   if (depends_on)
3705     {
3706       fd_ivopts_data = data;
3707       walk_tree (&expr, find_depends, depends_on, NULL);
3708     }
3709
3710   return force_expr_to_var_cost (expr);
3711 }
3712
3713 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3714    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3715    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3716    invariants the computation depends on.  */
3717
3718 static unsigned
3719 split_address_cost (struct ivopts_data *data,
3720                     tree addr, bool *symbol_present, bool *var_present,
3721                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3722 {
3723   tree core;
3724   HOST_WIDE_INT bitsize;
3725   HOST_WIDE_INT bitpos;
3726   tree toffset;
3727   enum machine_mode mode;
3728   int unsignedp, volatilep;
3729
3730   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3731                               &unsignedp, &volatilep, false);
3732
3733   if (toffset != 0
3734       || bitpos % BITS_PER_UNIT != 0
3735       || TREE_CODE (core) != VAR_DECL)
3736     {
3737       *symbol_present = false;
3738       *var_present = true;
3739       fd_ivopts_data = data;
3740       walk_tree (&addr, find_depends, depends_on, NULL);
3741       return target_spill_cost;
3742     }
3743
3744   *offset += bitpos / BITS_PER_UNIT;
3745   if (TREE_STATIC (core)
3746       || DECL_EXTERNAL (core))
3747     {
3748       *symbol_present = true;
3749       *var_present = false;
3750       return 0;
3751     }
3752
3753   *symbol_present = false;
3754   *var_present = true;
3755   return 0;
3756 }
3757
3758 /* Estimates cost of expressing difference of addresses E1 - E2 as
3759    var + symbol + offset.  The value of offset is added to OFFSET,
3760    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3761    part is missing.  DEPENDS_ON is a set of the invariants the computation
3762    depends on.  */
3763
3764 static unsigned
3765 ptr_difference_cost (struct ivopts_data *data,
3766                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3767                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3768 {
3769   HOST_WIDE_INT diff = 0;
3770   unsigned cost;
3771
3772   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3773
3774   if (ptr_difference_const (e1, e2, &diff))
3775     {
3776       *offset += diff;
3777       *symbol_present = false;
3778       *var_present = false;
3779       return 0;
3780     }
3781
3782   if (e2 == integer_zero_node)
3783     return split_address_cost (data, TREE_OPERAND (e1, 0),
3784                                symbol_present, var_present, offset, depends_on);
3785
3786   *symbol_present = false;
3787   *var_present = true;
3788
3789   cost = force_var_cost (data, e1, depends_on);
3790   cost += force_var_cost (data, e2, depends_on);
3791   cost += add_cost (Pmode);
3792
3793   return cost;
3794 }
3795
3796 /* Estimates cost of expressing difference E1 - E2 as
3797    var + symbol + offset.  The value of offset is added to OFFSET,
3798    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3799    part is missing.  DEPENDS_ON is a set of the invariants the computation
3800    depends on.  */
3801
3802 static unsigned
3803 difference_cost (struct ivopts_data *data,
3804                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3805                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3806 {
3807   unsigned cost;
3808   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3809   unsigned HOST_WIDE_INT off1, off2;
3810
3811   e1 = strip_offset (e1, &off1);
3812   e2 = strip_offset (e2, &off2);
3813   *offset += off1 - off2;
3814
3815   STRIP_NOPS (e1);
3816   STRIP_NOPS (e2);
3817
3818   if (TREE_CODE (e1) == ADDR_EXPR)
3819     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3820                                 depends_on);
3821   *symbol_present = false;
3822
3823   if (operand_equal_p (e1, e2, 0))
3824     {
3825       *var_present = false;
3826       return 0;
3827     }
3828   *var_present = true;
3829   if (zero_p (e2))
3830     return force_var_cost (data, e1, depends_on);
3831
3832   if (zero_p (e1))
3833     {
3834       cost = force_var_cost (data, e2, depends_on);
3835       cost += multiply_by_cost (-1, mode);
3836
3837       return cost;
3838     }
3839
3840   cost = force_var_cost (data, e1, depends_on);
3841   cost += force_var_cost (data, e2, depends_on);
3842   cost += add_cost (mode);
3843
3844   return cost;
3845 }
3846
3847 /* Determines the cost of the computation by that USE is expressed
3848    from induction variable CAND.  If ADDRESS_P is true, we just need
3849    to create an address from it, otherwise we want to get it into
3850    register.  A set of invariants we depend on is stored in
3851    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3852
3853 static unsigned
3854 get_computation_cost_at (struct ivopts_data *data,
3855                          struct iv_use *use, struct iv_cand *cand,
3856                          bool address_p, bitmap *depends_on, tree at)
3857 {
3858   tree ubase = use->iv->base, ustep = use->iv->step;
3859   tree cbase, cstep;
3860   tree utype = TREE_TYPE (ubase), ctype;
3861   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3862   HOST_WIDE_INT ratio, aratio;
3863   bool var_present, symbol_present;
3864   unsigned cost = 0, n_sums;
3865
3866   *depends_on = NULL;
3867
3868   /* Only consider real candidates.  */
3869   if (!cand->iv)
3870     return INFTY;
3871
3872   cbase = cand->iv->base;
3873   cstep = cand->iv->step;
3874   ctype = TREE_TYPE (cbase);
3875
3876   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3877     {
3878       /* We do not have a precision to express the values of use.  */
3879       return INFTY;
3880     }
3881
3882   if (address_p)
3883     {
3884       /* Do not try to express address of an object with computation based
3885          on address of a different object.  This may cause problems in rtl
3886          level alias analysis (that does not expect this to be happening,
3887          as this is illegal in C), and would be unlikely to be useful
3888          anyway.  */
3889       if (use->iv->base_object
3890           && cand->iv->base_object
3891           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3892         return INFTY;
3893     }
3894
3895   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3896     {
3897       /* TODO -- add direct handling of this case.  */
3898       goto fallback;
3899     }
3900
3901   /* CSTEPI is removed from the offset in case statement is after the
3902      increment.  If the step is not constant, we use zero instead.
3903      This is a bit imprecise (there is the extra addition), but
3904      redundancy elimination is likely to transform the code so that
3905      it uses value of the variable before increment anyway,
3906      so it is not that much unrealistic.  */
3907   if (cst_and_fits_in_hwi (cstep))
3908     cstepi = int_cst_value (cstep);
3909   else
3910     cstepi = 0;
3911
3912   if (cst_and_fits_in_hwi (ustep)
3913       && cst_and_fits_in_hwi (cstep))
3914     {
3915       ustepi = int_cst_value (ustep);
3916
3917       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3918         return INFTY;
3919     }
3920   else
3921     {
3922       double_int rat;
3923
3924       if (!constant_multiple_of (ustep, cstep, &rat))
3925         return INFTY;
3926
3927       if (double_int_fits_in_shwi_p (rat))
3928         ratio = double_int_to_shwi (rat);
3929       else
3930         return INFTY;
3931     }
3932
3933   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3934      or ratio == 1, it is better to handle this like
3935
3936      ubase - ratio * cbase + ratio * var
3937
3938      (also holds in the case ratio == -1, TODO.  */
3939
3940   if (cst_and_fits_in_hwi (cbase))
3941     {
3942       offset = - ratio * int_cst_value (cbase);
3943       cost += difference_cost (data,
3944                                ubase, integer_zero_node,
3945                                &symbol_present, &var_present, &offset,
3946                                depends_on);
3947     }
3948   else if (ratio == 1)
3949     {
3950       cost += difference_cost (data,
3951                                ubase, cbase,
3952                                &symbol_present, &var_present, &offset,
3953                                depends_on);
3954     }
3955   else
3956     {
3957       cost += force_var_cost (data, cbase, depends_on);
3958       cost += add_cost (TYPE_MODE (ctype));
3959       cost += difference_cost (data,
3960                                ubase, integer_zero_node,
3961                                &symbol_present, &var_present, &offset,
3962                                depends_on);
3963     }
3964
3965   /* If we are after the increment, the value of the candidate is higher by
3966      one iteration.  */
3967   if (stmt_after_increment (data->current_loop, cand, at))
3968     offset -= ratio * cstepi;
3969
3970   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3971      (symbol/var/const parts may be omitted).  If we are looking for an address,
3972      find the cost of addressing this.  */
3973   if (address_p)
3974     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3975                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3976
3977   /* Otherwise estimate the costs for computing the expression.  */
3978   aratio = ratio > 0 ? ratio : -ratio;
3979   if (!symbol_present && !var_present && !offset)
3980     {
3981       if (ratio != 1)
3982         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3983
3984       return cost;
3985     }
3986
3987   if (aratio != 1)
3988     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3989
3990   n_sums = 1;
3991   if (var_present
3992       /* Symbol + offset should be compile-time computable.  */
3993       && (symbol_present || offset))
3994     n_sums++;
3995
3996   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3997
3998 fallback:
3999   {
4000     /* Just get the expression, expand it and measure the cost.  */
4001     tree comp = get_computation_at (data->current_loop, use, cand, at);
4002
4003     if (!comp)
4004       return INFTY;
4005
4006     if (address_p)
4007       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
4008
4009     return computation_cost (comp);
4010   }
4011 }
4012
4013 /* Determines the cost of the computation by that USE is expressed
4014    from induction variable CAND.  If ADDRESS_P is true, we just need
4015    to create an address from it, otherwise we want to get it into
4016    register.  A set of invariants we depend on is stored in
4017    DEPENDS_ON.  */
4018
4019 static unsigned
4020 get_computation_cost (struct ivopts_data *data,
4021                       struct iv_use *use, struct iv_cand *cand,
4022                       bool address_p, bitmap *depends_on)
4023 {
4024   return get_computation_cost_at (data,
4025                                   use, cand, address_p, depends_on, use->stmt);
4026 }
4027
4028 /* Determines cost of basing replacement of USE on CAND in a generic
4029    expression.  */
4030
4031 static bool
4032 determine_use_iv_cost_generic (struct ivopts_data *data,
4033                                struct iv_use *use, struct iv_cand *cand)
4034 {
4035   bitmap depends_on;
4036   unsigned cost;
4037
4038   /* The simple case first -- if we need to express value of the preserved
4039      original biv, the cost is 0.  This also prevents us from counting the
4040      cost of increment twice -- once at this use and once in the cost of
4041      the candidate.  */
4042   if (cand->pos == IP_ORIGINAL
4043       && cand->incremented_at == use->stmt)
4044     {
4045       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4046       return true;
4047     }
4048
4049   cost = get_computation_cost (data, use, cand, false, &depends_on);
4050   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4051
4052   return cost != INFTY;
4053 }
4054
4055 /* Determines cost of basing replacement of USE on CAND in an address.  */
4056
4057 static bool
4058 determine_use_iv_cost_address (struct ivopts_data *data,
4059                                struct iv_use *use, struct iv_cand *cand)
4060 {
4061   bitmap depends_on;
4062   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4063
4064   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4065
4066   return cost != INFTY;
4067 }
4068
4069 /* Computes value of induction variable IV in iteration NITER.  */
4070
4071 static tree
4072 iv_value (struct iv *iv, tree niter)
4073 {
4074   tree val;
4075   tree type = TREE_TYPE (iv->base);
4076
4077   niter = fold_convert (type, niter);
4078   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4079
4080   return fold_build2 (PLUS_EXPR, type, iv->base, val);
4081 }
4082
4083 /* Computes value of candidate CAND at position AT in iteration NITER.  */
4084
4085 static tree
4086 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4087 {
4088   tree val = iv_value (cand->iv, niter);
4089   tree type = TREE_TYPE (cand->iv->base);
4090
4091   if (stmt_after_increment (loop, cand, at))
4092     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4093
4094   return val;
4095 }
4096
4097 /* Returns period of induction variable iv.  */
4098
4099 static tree
4100 iv_period (struct iv *iv)
4101 {
4102   tree step = iv->step, period, type;
4103   tree pow2div;
4104
4105   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4106
4107   /* Period of the iv is gcd (step, type range).  Since type range is power
4108      of two, it suffices to determine the maximum power of two that divides
4109      step.  */
4110   pow2div = num_ending_zeros (step);
4111   type = unsigned_type_for (TREE_TYPE (step));
4112
4113   period = build_low_bits_mask (type,
4114                                 (TYPE_PRECISION (type)
4115                                  - tree_low_cst (pow2div, 1)));
4116
4117   return period;
4118 }
4119
4120 /* Returns the comparison operator used when eliminating the iv USE.  */
4121
4122 static enum tree_code
4123 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4124 {
4125   struct loop *loop = data->current_loop;
4126   basic_block ex_bb;
4127   edge exit;
4128
4129   ex_bb = bb_for_stmt (use->stmt);
4130   exit = EDGE_SUCC (ex_bb, 0);
4131   if (flow_bb_inside_loop_p (loop, exit->dest))
4132     exit = EDGE_SUCC (ex_bb, 1);
4133
4134   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4135 }
4136
4137 /* Check whether it is possible to express the condition in USE by comparison
4138    of candidate CAND.  If so, store the value compared with to BOUND.  */
4139
4140 static bool
4141 may_eliminate_iv (struct ivopts_data *data,
4142                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4143 {
4144   basic_block ex_bb;
4145   edge exit;
4146   tree nit, nit_type;
4147   tree wider_type, period, per_type;
4148   struct loop *loop = data->current_loop;
4149
4150   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4151     return false;
4152
4153   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4154      for other conditions inside loop body.  */
4155   ex_bb = bb_for_stmt (use->stmt);
4156   if (use->stmt != last_stmt (ex_bb)
4157       || TREE_CODE (use->stmt) != COND_EXPR)
4158     return false;
4159   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4160     return false;
4161
4162   exit = EDGE_SUCC (ex_bb, 0);
4163   if (flow_bb_inside_loop_p (loop, exit->dest))
4164     exit = EDGE_SUCC (ex_bb, 1);
4165   if (flow_bb_inside_loop_p (loop, exit->dest))
4166     return false;
4167
4168   nit = niter_for_exit (data, exit);
4169   if (!nit)
4170     return false;
4171
4172   nit_type = TREE_TYPE (nit);
4173
4174   /* Determine whether we may use the variable to test whether niter iterations
4175      elapsed.  This is the case iff the period of the induction variable is
4176      greater than the number of iterations.  */
4177   period = iv_period (cand->iv);
4178   if (!period)
4179     return false;
4180   per_type = TREE_TYPE (period);
4181
4182   wider_type = TREE_TYPE (period);
4183   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4184     wider_type = per_type;
4185   else
4186     wider_type = nit_type;
4187
4188   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4189                                       fold_convert (wider_type, period),
4190                                       fold_convert (wider_type, nit))))
4191     return false;
4192
4193   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4194   return true;
4195 }
4196
4197 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4198
4199 static bool
4200 determine_use_iv_cost_condition (struct ivopts_data *data,
4201                                  struct iv_use *use, struct iv_cand *cand)
4202 {
4203   tree bound = NULL_TREE, op, cond;
4204   bitmap depends_on = NULL;
4205   unsigned cost;
4206
4207   /* Only consider real candidates.  */
4208   if (!cand->iv)
4209     {
4210       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4211       return false;
4212     }
4213
4214   if (may_eliminate_iv (data, use, cand, &bound))
4215     {
4216       cost = force_var_cost (data, bound, &depends_on);
4217
4218       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4219       return cost != INFTY;
4220     }
4221
4222   /* The induction variable elimination failed; just express the original
4223      giv.  If it is compared with an invariant, note that we cannot get
4224      rid of it.  */
4225   cost = get_computation_cost (data, use, cand, false, &depends_on);
4226
4227   cond = *use->op_p;
4228   if (TREE_CODE (cond) != SSA_NAME)
4229     {
4230       op = TREE_OPERAND (cond, 0);
4231       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4232         op = TREE_OPERAND (cond, 1);
4233       if (TREE_CODE (op) == SSA_NAME)
4234         {
4235           op = get_iv (data, op)->base;
4236           fd_ivopts_data = data;
4237           walk_tree (&op, find_depends, &depends_on, NULL);
4238         }
4239     }
4240
4241   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4242   return cost != INFTY;
4243 }
4244
4245 /* Determines cost of basing replacement of USE on CAND.  Returns false
4246    if USE cannot be based on CAND.  */
4247
4248 static bool
4249 determine_use_iv_cost (struct ivopts_data *data,
4250                        struct iv_use *use, struct iv_cand *cand)
4251 {
4252   switch (use->type)
4253     {
4254     case USE_NONLINEAR_EXPR:
4255       return determine_use_iv_cost_generic (data, use, cand);
4256
4257     case USE_ADDRESS:
4258       return determine_use_iv_cost_address (data, use, cand);
4259
4260     case USE_COMPARE:
4261       return determine_use_iv_cost_condition (data, use, cand);
4262
4263     default:
4264       gcc_unreachable ();
4265     }
4266 }
4267
4268 /* Determines costs of basing the use of the iv on an iv candidate.  */
4269
4270 static void
4271 determine_use_iv_costs (struct ivopts_data *data)
4272 {
4273   unsigned i, j;
4274   struct iv_use *use;
4275   struct iv_cand *cand;
4276   bitmap to_clear = BITMAP_ALLOC (NULL);
4277
4278   alloc_use_cost_map (data);
4279
4280   for (i = 0; i < n_iv_uses (data); i++)
4281     {
4282       use = iv_use (data, i);
4283
4284       if (data->consider_all_candidates)
4285         {
4286           for (j = 0; j < n_iv_cands (data); j++)
4287             {
4288               cand = iv_cand (data, j);
4289               determine_use_iv_cost (data, use, cand);
4290             }
4291         }
4292       else
4293         {
4294           bitmap_iterator bi;
4295
4296           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4297             {
4298               cand = iv_cand (data, j);
4299               if (!determine_use_iv_cost (data, use, cand))
4300                 bitmap_set_bit (to_clear, j);
4301             }
4302
4303           /* Remove the candidates for that the cost is infinite from
4304              the list of related candidates.  */
4305           bitmap_and_compl_into (use->related_cands, to_clear);
4306           bitmap_clear (to_clear);
4307         }
4308     }
4309
4310   BITMAP_FREE (to_clear);
4311
4312   if (dump_file && (dump_flags & TDF_DETAILS))
4313     {
4314       fprintf (dump_file, "Use-candidate costs:\n");
4315
4316       for (i = 0; i < n_iv_uses (data); i++)
4317         {
4318           use = iv_use (data, i);
4319
4320           fprintf (dump_file, "Use %d:\n", i);
4321           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4322           for (j = 0; j < use->n_map_members; j++)
4323             {
4324               if (!use->cost_map[j].cand
4325                   || use->cost_map[j].cost == INFTY)
4326                 continue;
4327
4328               fprintf (dump_file, "  %d\t%d\t",
4329                        use->cost_map[j].cand->id,
4330                        use->cost_map[j].cost);
4331               if (use->cost_map[j].depends_on)
4332                 bitmap_print (dump_file,
4333                               use->cost_map[j].depends_on, "","");
4334               fprintf (dump_file, "\n");
4335             }
4336
4337           fprintf (dump_file, "\n");
4338         }
4339       fprintf (dump_file, "\n");
4340     }
4341 }
4342
4343 /* Determines cost of the candidate CAND.  */
4344
4345 static void
4346 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4347 {
4348   unsigned cost_base, cost_step;
4349   tree base;
4350
4351   if (!cand->iv)
4352     {
4353       cand->cost = 0;
4354       return;
4355     }
4356
4357   /* There are two costs associated with the candidate -- its increment
4358      and its initialization.  The second is almost negligible for any loop
4359      that rolls enough, so we take it just very little into account.  */
4360
4361   base = cand->iv->base;
4362   cost_base = force_var_cost (data, base, NULL);
4363   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4364
4365   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4366
4367   /* Prefer the original iv unless we may gain something by replacing it;
4368      this is not really relevant for artificial ivs created by other
4369      passes.  */
4370   if (cand->pos == IP_ORIGINAL
4371       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4372     cand->cost--;
4373
4374   /* Prefer not to insert statements into latch unless there are some
4375      already (so that we do not create unnecessary jumps).  */
4376   if (cand->pos == IP_END
4377       && empty_block_p (ip_end_pos (data->current_loop)))
4378     cand->cost++;
4379 }
4380
4381 /* Determines costs of computation of the candidates.  */
4382
4383 static void
4384 determine_iv_costs (struct ivopts_data *data)
4385 {
4386   unsigned i;
4387
4388   if (dump_file && (dump_flags & TDF_DETAILS))
4389     {
4390       fprintf (dump_file, "Candidate costs:\n");
4391       fprintf (dump_file, "  cand\tcost\n");
4392     }
4393
4394   for (i = 0; i < n_iv_cands (data); i++)
4395     {
4396       struct iv_cand *cand = iv_cand (data, i);
4397
4398       determine_iv_cost (data, cand);
4399
4400       if (dump_file && (dump_flags & TDF_DETAILS))
4401         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4402     }
4403
4404   if (dump_file && (dump_flags & TDF_DETAILS))
4405     fprintf (dump_file, "\n");
4406 }
4407
4408 /* Calculates cost for having SIZE induction variables.  */
4409
4410 static unsigned
4411 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4412 {
4413   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4414 }
4415
4416 /* For each size of the induction variable set determine the penalty.  */
4417
4418 static void
4419 determine_set_costs (struct ivopts_data *data)
4420 {
4421   unsigned j, n;
4422   tree phi, op;
4423   struct loop *loop = data->current_loop;
4424   bitmap_iterator bi;
4425
4426   /* We use the following model (definitely improvable, especially the
4427      cost function -- TODO):
4428
4429      We estimate the number of registers available (using MD data), name it A.
4430
4431      We estimate the number of registers used by the loop, name it U.  This
4432      number is obtained as the number of loop phi nodes (not counting virtual
4433      registers and bivs) + the number of variables from outside of the loop.
4434
4435      We set a reserve R (free regs that are used for temporary computations,
4436      etc.).  For now the reserve is a constant 3.
4437
4438      Let I be the number of induction variables.
4439
4440      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4441         make a lot of ivs without a reason).
4442      -- if A - R < U + I <= A, the cost is I * PRES_COST
4443      -- if U + I > A, the cost is I * PRES_COST and
4444         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4445
4446   if (dump_file && (dump_flags & TDF_DETAILS))
4447     {
4448       fprintf (dump_file, "Global costs:\n");
4449       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4450       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4451       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4452       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4453     }
4454
4455   n = 0;
4456   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4457     {
4458       op = PHI_RESULT (phi);
4459
4460       if (!is_gimple_reg (op))
4461         continue;
4462
4463       if (get_iv (data, op))
4464         continue;
4465
4466       n++;
4467     }
4468
4469   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4470     {
4471       struct version_info *info = ver_info (data, j);
4472
4473       if (info->inv_id && info->has_nonlin_use)
4474         n++;
4475     }
4476
4477   data->regs_used = n;
4478   if (dump_file && (dump_flags & TDF_DETAILS))
4479     fprintf (dump_file, "  regs_used %d\n", n);
4480
4481   if (dump_file && (dump_flags & TDF_DETAILS))
4482     {
4483       fprintf (dump_file, "  cost for size:\n");
4484       fprintf (dump_file, "  ivs\tcost\n");
4485       for (j = 0; j <= 2 * target_avail_regs; j++)
4486         fprintf (dump_file, "  %d\t%d\n", j,
4487                  ivopts_global_cost_for_size (data, j));
4488       fprintf (dump_file, "\n");
4489     }
4490 }
4491
4492 /* Returns true if A is a cheaper cost pair than B.  */
4493
4494 static bool
4495 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4496 {
4497   if (!a)
4498     return false;
4499
4500   if (!b)
4501     return true;
4502
4503   if (a->cost < b->cost)
4504     return true;
4505
4506   if (a->cost > b->cost)
4507     return false;
4508
4509   /* In case the costs are the same, prefer the cheaper candidate.  */
4510   if (a->cand->cost < b->cand->cost)
4511     return true;
4512
4513   return false;
4514 }
4515
4516 /* Computes the cost field of IVS structure.  */
4517
4518 static void
4519 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4520 {
4521   unsigned cost = 0;
4522
4523   cost += ivs->cand_use_cost;
4524   cost += ivs->cand_cost;
4525   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4526
4527   ivs->cost = cost;
4528 }
4529
4530 /* Remove invariants in set INVS to set IVS.  */
4531
4532 static void
4533 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4534 {
4535   bitmap_iterator bi;
4536   unsigned iid;
4537
4538   if (!invs)
4539     return;
4540
4541   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4542     {
4543       ivs->n_invariant_uses[iid]--;
4544       if (ivs->n_invariant_uses[iid] == 0)
4545         ivs->n_regs--;
4546     }
4547 }
4548
4549 /* Set USE not to be expressed by any candidate in IVS.  */
4550
4551 static void
4552 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4553                  struct iv_use *use)
4554 {
4555   unsigned uid = use->id, cid;
4556   struct cost_pair *cp;
4557
4558   cp = ivs->cand_for_use[uid];
4559   if (!cp)
4560     return;
4561   cid = cp->cand->id;
4562
4563   ivs->bad_uses++;
4564   ivs->cand_for_use[uid] = NULL;
4565   ivs->n_cand_uses[cid]--;
4566
4567   if (ivs->n_cand_uses[cid] == 0)
4568     {
4569       bitmap_clear_bit (ivs->cands, cid);
4570       /* Do not count the pseudocandidates.  */
4571       if (cp->cand->iv)
4572         ivs->n_regs--;
4573       ivs->n_cands--;
4574       ivs->cand_cost -= cp->cand->cost;
4575
4576       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4577     }
4578
4579   ivs->cand_use_cost -= cp->cost;
4580
4581   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4582   iv_ca_recount_cost (data, ivs);
4583 }
4584
4585 /* Add invariants in set INVS to set IVS.  */
4586
4587 static void
4588 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4589 {
4590   bitmap_iterator bi;
4591   unsigned iid;
4592
4593   if (!invs)
4594     return;
4595
4596   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4597     {
4598       ivs->n_invariant_uses[iid]++;
4599       if (ivs->n_invariant_uses[iid] == 1)
4600         ivs->n_regs++;
4601     }
4602 }
4603
4604 /* Set cost pair for USE in set IVS to CP.  */
4605
4606 static void
4607 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4608               struct iv_use *use, struct cost_pair *cp)
4609 {
4610   unsigned uid = use->id, cid;
4611
4612   if (ivs->cand_for_use[uid] == cp)
4613     return;
4614
4615   if (ivs->cand_for_use[uid])
4616     iv_ca_set_no_cp (data, ivs, use);
4617
4618   if (cp)
4619     {
4620       cid = cp->cand->id;
4621
4622       ivs->bad_uses--;
4623       ivs->cand_for_use[uid] = cp;
4624       ivs->n_cand_uses[cid]++;
4625       if (ivs->n_cand_uses[cid] == 1)
4626         {
4627           bitmap_set_bit (ivs->cands, cid);
4628           /* Do not count the pseudocandidates.  */
4629           if (cp->cand->iv)
4630             ivs->n_regs++;
4631           ivs->n_cands++;
4632           ivs->cand_cost += cp->cand->cost;
4633
4634           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4635         }
4636
4637       ivs->cand_use_cost += cp->cost;
4638       iv_ca_set_add_invariants (ivs, cp->depends_on);
4639       iv_ca_recount_cost (data, ivs);
4640     }
4641 }
4642
4643 /* Extend set IVS by expressing USE by some of the candidates in it
4644    if possible.  */
4645
4646 static void
4647 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4648                struct iv_use *use)
4649 {
4650   struct cost_pair *best_cp = NULL, *cp;
4651   bitmap_iterator bi;
4652   unsigned i;
4653
4654   gcc_assert (ivs->upto >= use->id);
4655
4656   if (ivs->upto == use->id)
4657     {
4658       ivs->upto++;
4659       ivs->bad_uses++;
4660     }
4661
4662   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4663     {
4664       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4665
4666       if (cheaper_cost_pair (cp, best_cp))
4667         best_cp = cp;
4668     }
4669
4670   iv_ca_set_cp (data, ivs, use, best_cp);
4671 }
4672
4673 /* Get cost for assignment IVS.  */
4674
4675 static unsigned
4676 iv_ca_cost (struct iv_ca *ivs)
4677 {
4678   return (ivs->bad_uses ? INFTY : ivs->cost);
4679 }
4680
4681 /* Returns true if all dependences of CP are among invariants in IVS.  */
4682
4683 static bool
4684 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4685 {
4686   unsigned i;
4687   bitmap_iterator bi;
4688
4689   if (!cp->depends_on)
4690     return true;
4691
4692   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4693     {
4694       if (ivs->n_invariant_uses[i] == 0)
4695         return false;
4696     }
4697
4698   return true;
4699 }
4700
4701 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4702    it before NEXT_CHANGE.  */
4703
4704 static struct iv_ca_delta *
4705 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4706                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4707 {
4708   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4709
4710   change->use = use;
4711   change->old_cp = old_cp;
4712   change->new_cp = new_cp;
4713   change->next_change = next_change;
4714
4715   return change;
4716 }
4717
4718 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4719    are rewritten.  */
4720
4721 static struct iv_ca_delta *
4722 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4723 {
4724   struct iv_ca_delta *last;
4725
4726   if (!l2)
4727     return l1;
4728
4729   if (!l1)
4730     return l2;
4731
4732   for (last = l1; last->next_change; last = last->next_change)
4733     continue;
4734   last->next_change = l2;
4735
4736   return l1;
4737 }
4738
4739 /* Returns candidate by that USE is expressed in IVS.  */
4740
4741 static struct cost_pair *
4742 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4743 {
4744   return ivs->cand_for_use[use->id];
4745 }
4746
4747 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4748
4749 static struct iv_ca_delta *
4750 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4751 {
4752   struct iv_ca_delta *act, *next, *prev = NULL;
4753   struct cost_pair *tmp;
4754
4755   for (act = delta; act; act = next)
4756     {
4757       next = act->next_change;
4758       act->next_change = prev;
4759       prev = act;
4760
4761       tmp = act->old_cp;
4762       act->old_cp = act->new_cp;
4763       act->new_cp = tmp;
4764     }
4765
4766   return prev;
4767 }
4768
4769 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4770    reverted instead.  */
4771
4772 static void
4773 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4774                     struct iv_ca_delta *delta, bool forward)
4775 {
4776   struct cost_pair *from, *to;
4777   struct iv_ca_delta *act;
4778
4779   if (!forward)
4780     delta = iv_ca_delta_reverse (delta);
4781
4782   for (act = delta; act; act = act->next_change)
4783     {
4784       from = act->old_cp;
4785       to = act->new_cp;
4786       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4787       iv_ca_set_cp (data, ivs, act->use, to);
4788     }
4789
4790   if (!forward)
4791     iv_ca_delta_reverse (delta);
4792 }
4793
4794 /* Returns true if CAND is used in IVS.  */
4795
4796 static bool
4797 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4798 {
4799   return ivs->n_cand_uses[cand->id] > 0;
4800 }
4801
4802 /* Returns number of induction variable candidates in the set IVS.  */
4803
4804 static unsigned
4805 iv_ca_n_cands (struct iv_ca *ivs)
4806 {
4807   return ivs->n_cands;
4808 }
4809
4810 /* Free the list of changes DELTA.  */
4811
4812 static void
4813 iv_ca_delta_free (struct iv_ca_delta **delta)
4814 {
4815   struct iv_ca_delta *act, *next;
4816
4817   for (act = *delta; act; act = next)
4818     {
4819       next = act->next_change;
4820       free (act);
4821     }
4822
4823   *delta = NULL;
4824 }
4825
4826 /* Allocates new iv candidates assignment.  */
4827
4828 static struct iv_ca *
4829 iv_ca_new (struct ivopts_data *data)
4830 {
4831   struct iv_ca *nw = XNEW (struct iv_ca);
4832
4833   nw->upto = 0;
4834   nw->bad_uses = 0;
4835   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4836   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4837   nw->cands = BITMAP_ALLOC (NULL);
4838   nw->n_cands = 0;
4839   nw->n_regs = 0;
4840   nw->cand_use_cost = 0;
4841   nw->cand_cost = 0;
4842   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4843   nw->cost = 0;
4844
4845   return nw;
4846 }
4847
4848 /* Free memory occupied by the set IVS.  */
4849
4850 static void
4851 iv_ca_free (struct iv_ca **ivs)
4852 {
4853   free ((*ivs)->cand_for_use);
4854   free ((*ivs)->n_cand_uses);
4855   BITMAP_FREE ((*ivs)->cands);
4856   free ((*ivs)->n_invariant_uses);
4857   free (*ivs);
4858   *ivs = NULL;
4859 }
4860
4861 /* Dumps IVS to FILE.  */
4862
4863 static void
4864 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4865 {
4866   const char *pref = "  invariants ";
4867   unsigned i;
4868
4869   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4870   bitmap_print (file, ivs->cands, "  candidates ","\n");
4871
4872   for (i = 1; i <= data->max_inv_id; i++)
4873     if (ivs->n_invariant_uses[i])
4874       {
4875         fprintf (file, "%s%d", pref, i);
4876         pref = ", ";
4877       }
4878   fprintf (file, "\n");
4879 }
4880
4881 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4882    new set, and store differences in DELTA.  Number of induction variables
4883    in the new set is stored to N_IVS.  */
4884
4885 static unsigned
4886 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4887               struct iv_cand *cand, struct iv_ca_delta **delta,
4888               unsigned *n_ivs)
4889 {
4890   unsigned i, cost;
4891   struct iv_use *use;
4892   struct cost_pair *old_cp, *new_cp;
4893
4894   *delta = NULL;
4895   for (i = 0; i < ivs->upto; i++)
4896     {
4897       use = iv_use (data, i);
4898       old_cp = iv_ca_cand_for_use (ivs, use);
4899
4900       if (old_cp
4901           && old_cp->cand == cand)
4902         continue;
4903
4904       new_cp = get_use_iv_cost (data, use, cand);
4905       if (!new_cp)
4906         continue;
4907
4908       if (!iv_ca_has_deps (ivs, new_cp))
4909         continue;
4910
4911       if (!cheaper_cost_pair (new_cp, old_cp))
4912         continue;
4913
4914       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4915     }
4916
4917   iv_ca_delta_commit (data, ivs, *delta, true);
4918   cost = iv_ca_cost (ivs);
4919   if (n_ivs)
4920     *n_ivs = iv_ca_n_cands (ivs);
4921   iv_ca_delta_commit (data, ivs, *delta, false);
4922
4923   return cost;
4924 }
4925
4926 /* Try narrowing set IVS by removing CAND.  Return the cost of
4927    the new set and store the differences in DELTA.  */
4928
4929 static unsigned
4930 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4931               struct iv_cand *cand, struct iv_ca_delta **delta)
4932 {
4933   unsigned i, ci;
4934   struct iv_use *use;
4935   struct cost_pair *old_cp, *new_cp, *cp;
4936   bitmap_iterator bi;
4937   struct iv_cand *cnd;
4938   unsigned cost;
4939
4940   *delta = NULL;
4941   for (i = 0; i < n_iv_uses (data); i++)
4942     {
4943       use = iv_use (data, i);
4944
4945       old_cp = iv_ca_cand_for_use (ivs, use);
4946       if (old_cp->cand != cand)
4947         continue;
4948
4949       new_cp = NULL;
4950
4951       if (data->consider_all_candidates)
4952         {
4953           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4954             {
4955               if (ci == cand->id)
4956                 continue;
4957
4958               cnd = iv_cand (data, ci);
4959
4960               cp = get_use_iv_cost (data, use, cnd);
4961               if (!cp)
4962                 continue;
4963               if (!iv_ca_has_deps (ivs, cp))
4964                 continue;
4965
4966               if (!cheaper_cost_pair (cp, new_cp))
4967                 continue;
4968
4969               new_cp = cp;
4970             }
4971         }
4972       else
4973         {
4974           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4975             {
4976               if (ci == cand->id)
4977                 continue;
4978
4979               cnd = iv_cand (data, ci);
4980
4981               cp = get_use_iv_cost (data, use, cnd);
4982               if (!cp)
4983                 continue;
4984               if (!iv_ca_has_deps (ivs, cp))
4985                 continue;
4986
4987               if (!cheaper_cost_pair (cp, new_cp))
4988                 continue;
4989
4990               new_cp = cp;
4991             }
4992         }
4993
4994       if (!new_cp)
4995         {
4996           iv_ca_delta_free (delta);
4997           return INFTY;
4998         }
4999
5000       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5001     }
5002
5003   iv_ca_delta_commit (data, ivs, *delta, true);
5004   cost = iv_ca_cost (ivs);
5005   iv_ca_delta_commit (data, ivs, *delta, false);
5006
5007   return cost;
5008 }
5009
5010 /* Try optimizing the set of candidates IVS by removing candidates different
5011    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5012    differences in DELTA.  */
5013
5014 static unsigned
5015 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5016              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5017 {
5018   bitmap_iterator bi;
5019   struct iv_ca_delta *act_delta, *best_delta;
5020   unsigned i, best_cost, acost;
5021   struct iv_cand *cand;
5022
5023   best_delta = NULL;
5024   best_cost = iv_ca_cost (ivs);
5025
5026   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5027     {
5028       cand = iv_cand (data, i);
5029
5030       if (cand == except_cand)
5031         continue;
5032
5033       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5034
5035       if (acost < best_cost)
5036         {
5037           best_cost = acost;
5038           iv_ca_delta_free (&best_delta);
5039           best_delta = act_delta;
5040         }
5041       else
5042         iv_ca_delta_free (&act_delta);
5043     }
5044
5045   if (!best_delta)
5046     {
5047       *delta = NULL;
5048       return best_cost;
5049     }
5050
5051   /* Recurse to possibly remove other unnecessary ivs.  */
5052   iv_ca_delta_commit (data, ivs, best_delta, true);
5053   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5054   iv_ca_delta_commit (data, ivs, best_delta, false);
5055   *delta = iv_ca_delta_join (best_delta, *delta);
5056   return best_cost;
5057 }
5058
5059 /* Tries to extend the sets IVS in the best possible way in order
5060    to express the USE.  */
5061
5062 static bool
5063 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5064                   struct iv_use *use)
5065 {
5066   unsigned best_cost, act_cost;
5067   unsigned i;
5068   bitmap_iterator bi;
5069   struct iv_cand *cand;
5070   struct iv_ca_delta *best_delta = NULL, *act_delta;
5071   struct cost_pair *cp;
5072
5073   iv_ca_add_use (data, ivs, use);
5074   best_cost = iv_ca_cost (ivs);
5075
5076   cp = iv_ca_cand_for_use (ivs, use);
5077   if (cp)
5078     {
5079       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5080       iv_ca_set_no_cp (data, ivs, use);
5081     }
5082
5083   /* First try important candidates.  Only if it fails, try the specific ones.
5084      Rationale -- in loops with many variables the best choice often is to use
5085      just one generic biv.  If we added here many ivs specific to the uses,
5086      the optimization algorithm later would be likely to get stuck in a local
5087      minimum, thus causing us to create too many ivs.  The approach from
5088      few ivs to more seems more likely to be successful -- starting from few
5089      ivs, replacing an expensive use by a specific iv should always be a
5090      win.  */
5091   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5092     {
5093       cand = iv_cand (data, i);
5094
5095       if (iv_ca_cand_used_p (ivs, cand))
5096         continue;
5097
5098       cp = get_use_iv_cost (data, use, cand);
5099       if (!cp)
5100         continue;
5101
5102       iv_ca_set_cp (data, ivs, use, cp);
5103       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5104       iv_ca_set_no_cp (data, ivs, use);
5105       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5106
5107       if (act_cost < best_cost)
5108         {
5109           best_cost = act_cost;
5110
5111           iv_ca_delta_free (&best_delta);
5112           best_delta = act_delta;
5113         }
5114       else
5115         iv_ca_delta_free (&act_delta);
5116     }
5117
5118   if (best_cost == INFTY)
5119     {
5120       for (i = 0; i < use->n_map_members; i++)
5121         {
5122           cp = use->cost_map + i;
5123           cand = cp->cand;
5124           if (!cand)
5125             continue;
5126
5127           /* Already tried this.  */
5128           if (cand->important)
5129             continue;
5130
5131           if (iv_ca_cand_used_p (ivs, cand))
5132             continue;
5133
5134           act_delta = NULL;
5135           iv_ca_set_cp (data, ivs, use, cp);
5136           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5137           iv_ca_set_no_cp (data, ivs, use);
5138           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5139                                        cp, act_delta);
5140
5141           if (act_cost < best_cost)
5142             {
5143               best_cost = act_cost;
5144
5145               if (best_delta)
5146                 iv_ca_delta_free (&best_delta);
5147               best_delta = act_delta;
5148             }
5149           else
5150             iv_ca_delta_free (&act_delta);
5151         }
5152     }
5153
5154   iv_ca_delta_commit (data, ivs, best_delta, true);
5155   iv_ca_delta_free (&best_delta);
5156
5157   return (best_cost != INFTY);
5158 }
5159
5160 /* Finds an initial assignment of candidates to uses.  */
5161
5162 static struct iv_ca *
5163 get_initial_solution (struct ivopts_data *data)
5164 {
5165   struct iv_ca *ivs = iv_ca_new (data);
5166   unsigned i;
5167
5168   for (i = 0; i < n_iv_uses (data); i++)
5169     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5170       {
5171         iv_ca_free (&ivs);
5172         return NULL;
5173       }
5174
5175   return ivs;
5176 }
5177
5178 /* Tries to improve set of induction variables IVS.  */
5179
5180 static bool
5181 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5182 {
5183   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5184   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5185   struct iv_cand *cand;
5186
5187   /* Try extending the set of induction variables by one.  */
5188   for (i = 0; i < n_iv_cands (data); i++)
5189     {
5190       cand = iv_cand (data, i);
5191
5192       if (iv_ca_cand_used_p (ivs, cand))
5193         continue;
5194
5195       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5196       if (!act_delta)
5197         continue;
5198
5199       /* If we successfully added the candidate and the set is small enough,
5200          try optimizing it by removing other candidates.  */
5201       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5202         {
5203           iv_ca_delta_commit (data, ivs, act_delta, true);
5204           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5205           iv_ca_delta_commit (data, ivs, act_delta, false);
5206           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5207         }
5208
5209       if (acost < best_cost)
5210         {
5211           best_cost = acost;
5212           iv_ca_delta_free (&best_delta);
5213           best_delta = act_delta;
5214         }
5215       else
5216         iv_ca_delta_free (&act_delta);
5217     }
5218
5219   if (!best_delta)
5220     {
5221       /* Try removing the candidates from the set instead.  */
5222       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5223
5224       /* Nothing more we can do.  */
5225       if (!best_delta)
5226         return false;
5227     }
5228
5229   iv_ca_delta_commit (data, ivs, best_delta, true);
5230   gcc_assert (best_cost == iv_ca_cost (ivs));
5231   iv_ca_delta_free (&best_delta);
5232   return true;
5233 }
5234
5235 /* Attempts to find the optimal set of induction variables.  We do simple
5236    greedy heuristic -- we try to replace at most one candidate in the selected
5237    solution and remove the unused ivs while this improves the cost.  */
5238
5239 static struct iv_ca *
5240 find_optimal_iv_set (struct ivopts_data *data)
5241 {
5242   unsigned i;
5243   struct iv_ca *set;
5244   struct iv_use *use;
5245
5246   /* Get the initial solution.  */
5247   set = get_initial_solution (data);
5248   if (!set)
5249     {
5250       if (dump_file && (dump_flags & TDF_DETAILS))
5251         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5252       return NULL;
5253     }
5254
5255   if (dump_file && (dump_flags & TDF_DETAILS))
5256     {
5257       fprintf (dump_file, "Initial set of candidates:\n");
5258       iv_ca_dump (data, dump_file, set);
5259     }
5260
5261   while (try_improve_iv_set (data, set))
5262     {
5263       if (dump_file && (dump_flags & TDF_DETAILS))
5264         {
5265           fprintf (dump_file, "Improved to:\n");
5266           iv_ca_dump (data, dump_file, set);
5267         }
5268     }
5269
5270   if (dump_file && (dump_flags & TDF_DETAILS))
5271     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5272
5273   for (i = 0; i < n_iv_uses (data); i++)
5274     {
5275       use = iv_use (data, i);
5276       use->selected = iv_ca_cand_for_use (set, use)->cand;
5277     }
5278
5279   return set;
5280 }
5281
5282 /* Creates a new induction variable corresponding to CAND.  */
5283
5284 static void
5285 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5286 {
5287   block_stmt_iterator incr_pos;
5288   tree base;
5289   bool after = false;
5290
5291   if (!cand->iv)
5292     return;
5293
5294   switch (cand->pos)
5295     {
5296     case IP_NORMAL:
5297       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5298       break;
5299
5300     case IP_END:
5301       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5302       after = true;
5303       break;
5304
5305     case IP_ORIGINAL:
5306       /* Mark that the iv is preserved.  */
5307       name_info (data, cand->var_before)->preserve_biv = true;
5308       name_info (data, cand->var_after)->preserve_biv = true;
5309
5310       /* Rewrite the increment so that it uses var_before directly.  */
5311       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5312
5313       return;
5314     }
5315
5316   gimple_add_tmp_var (cand->var_before);
5317   add_referenced_var (cand->var_before);
5318
5319   base = unshare_expr (cand->iv->base);
5320
5321   create_iv (base, unshare_expr (cand->iv->step),
5322              cand->var_before, data->current_loop,
5323              &incr_pos, after, &cand->var_before, &cand->var_after);
5324 }
5325
5326 /* Creates new induction variables described in SET.  */
5327
5328 static void
5329 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5330 {
5331   unsigned i;
5332   struct iv_cand *cand;
5333   bitmap_iterator bi;
5334
5335   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5336     {
5337       cand = iv_cand (data, i);
5338       create_new_iv (data, cand);
5339     }
5340 }
5341
5342 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5343    is true, remove also the ssa name defined by the statement.  */
5344
5345 static void
5346 remove_statement (tree stmt, bool including_defined_name)
5347 {
5348   if (TREE_CODE (stmt) == PHI_NODE)
5349     {
5350       if (!including_defined_name)
5351         {
5352           /* Prevent the ssa name defined by the statement from being removed.  */
5353           SET_PHI_RESULT (stmt, NULL);
5354         }
5355       remove_phi_node (stmt, NULL_TREE);
5356     }
5357   else
5358     {
5359       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5360
5361       bsi_remove (&bsi, true);
5362     }
5363 }
5364
5365 /* Rewrites USE (definition of iv used in a nonlinear expression)
5366    using candidate CAND.  */
5367
5368 static void
5369 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5370                             struct iv_use *use, struct iv_cand *cand)
5371 {
5372   tree comp;
5373   tree op, stmts, tgt, ass;
5374   block_stmt_iterator bsi, pbsi;
5375
5376   /* An important special case -- if we are asked to express value of
5377      the original iv by itself, just exit; there is no need to
5378      introduce a new computation (that might also need casting the
5379      variable to unsigned and back).  */
5380   if (cand->pos == IP_ORIGINAL
5381       && cand->incremented_at == use->stmt)
5382     {
5383       tree step, ctype, utype;
5384       enum tree_code incr_code = PLUS_EXPR;
5385
5386       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5387       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5388
5389       step = cand->iv->step;
5390       ctype = TREE_TYPE (step);
5391       utype = TREE_TYPE (cand->var_after);
5392       if (TREE_CODE (step) == NEGATE_EXPR)
5393         {
5394           incr_code = MINUS_EXPR;
5395           step = TREE_OPERAND (step, 0);
5396         }
5397
5398       /* Check whether we may leave the computation unchanged.
5399          This is the case only if it does not rely on other
5400          computations in the loop -- otherwise, the computation
5401          we rely upon may be removed in remove_unused_ivs,
5402          thus leading to ICE.  */
5403       op = TREE_OPERAND (use->stmt, 1);
5404       if (TREE_CODE (op) == PLUS_EXPR
5405           || TREE_CODE (op) == MINUS_EXPR)
5406         {
5407           if (TREE_OPERAND (op, 0) == cand->var_before)
5408             op = TREE_OPERAND (op, 1);
5409           else if (TREE_CODE (op) == PLUS_EXPR
5410                    && TREE_OPERAND (op, 1) == cand->var_before)
5411             op = TREE_OPERAND (op, 0);
5412           else
5413             op = NULL_TREE;
5414         }
5415       else
5416         op = NULL_TREE;
5417
5418       if (op
5419           && (TREE_CODE (op) == INTEGER_CST
5420               || operand_equal_p (op, step, 0)))
5421         return;
5422
5423       /* Otherwise, add the necessary computations to express
5424          the iv.  */
5425       op = fold_convert (ctype, cand->var_before);
5426       comp = fold_convert (utype,
5427                            build2 (incr_code, ctype, op,
5428                                    unshare_expr (step)));
5429     }
5430   else
5431     comp = get_computation (data->current_loop, use, cand);
5432
5433   switch (TREE_CODE (use->stmt))
5434     {
5435     case PHI_NODE:
5436       tgt = PHI_RESULT (use->stmt);
5437
5438       /* If we should keep the biv, do not replace it.  */
5439       if (name_info (data, tgt)->preserve_biv)
5440         return;
5441
5442       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5443       while (!bsi_end_p (pbsi)
5444              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5445         {
5446           bsi = pbsi;
5447           bsi_next (&pbsi);
5448         }
5449       break;
5450
5451     case MODIFY_EXPR:
5452       tgt = TREE_OPERAND (use->stmt, 0);
5453       bsi = bsi_for_stmt (use->stmt);
5454       break;
5455
5456     default:
5457       gcc_unreachable ();
5458     }
5459
5460   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5461
5462   if (TREE_CODE (use->stmt) == PHI_NODE)
5463     {
5464       if (stmts)
5465         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5466       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5467       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5468       remove_statement (use->stmt, false);
5469       SSA_NAME_DEF_STMT (tgt) = ass;
5470     }
5471   else
5472     {
5473       if (stmts)
5474         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5475       TREE_OPERAND (use->stmt, 1) = op;
5476     }
5477 }
5478
5479 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5480    for_each_index.  */
5481
5482 static bool
5483 idx_remove_ssa_names (tree base, tree *idx,
5484                       void *data ATTRIBUTE_UNUSED)
5485 {
5486   tree *op;
5487
5488   if (TREE_CODE (*idx) == SSA_NAME)
5489     *idx = SSA_NAME_VAR (*idx);
5490
5491   if (TREE_CODE (base) == ARRAY_REF)
5492     {
5493       op = &TREE_OPERAND (base, 2);
5494       if (*op
5495           && TREE_CODE (*op) == SSA_NAME)
5496         *op = SSA_NAME_VAR (*op);
5497       op = &TREE_OPERAND (base, 3);
5498       if (*op
5499           && TREE_CODE (*op) == SSA_NAME)
5500         *op = SSA_NAME_VAR (*op);
5501     }
5502
5503   return true;
5504 }
5505
5506 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5507
5508 static tree
5509 unshare_and_remove_ssa_names (tree ref)
5510 {
5511   ref = unshare_expr (ref);
5512   for_each_index (&ref, idx_remove_ssa_names, NULL);
5513
5514   return ref;
5515 }
5516
5517 /* Extract the alias analysis info for the memory reference REF.  There are
5518    several ways how this information may be stored and what precisely is
5519    its semantics depending on the type of the reference, but there always is
5520    somewhere hidden one _DECL node that is used to determine the set of
5521    virtual operands for the reference.  The code below deciphers this jungle
5522    and extracts this single useful piece of information.  */
5523
5524 static tree
5525 get_ref_tag (tree ref, tree orig)
5526 {
5527   tree var = get_base_address (ref);
5528   tree aref = NULL_TREE, tag, sv;
5529   HOST_WIDE_INT offset, size, maxsize;
5530
5531   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5532     {
5533       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5534       if (ref)
5535         break;
5536     }
5537
5538   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5539     return unshare_expr (sv);
5540
5541   if (!var)
5542     return NULL_TREE;
5543
5544   if (TREE_CODE (var) == INDIRECT_REF)
5545     {
5546       /* If the base is a dereference of a pointer, first check its name memory
5547          tag.  If it does not have one, use its symbol memory tag.  */
5548       var = TREE_OPERAND (var, 0);
5549       if (TREE_CODE (var) != SSA_NAME)
5550         return NULL_TREE;
5551
5552       if (SSA_NAME_PTR_INFO (var))
5553         {
5554           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5555           if (tag)
5556             return tag;
5557         }
5558
5559       var = SSA_NAME_VAR (var);
5560       tag = var_ann (var)->symbol_mem_tag;
5561       gcc_assert (tag != NULL_TREE);
5562       return tag;
5563     }
5564   else
5565     {
5566       if (!DECL_P (var))
5567         return NULL_TREE;
5568
5569       tag = var_ann (var)->symbol_mem_tag;
5570       if (tag)
5571         return tag;
5572
5573       return var;
5574     }
5575 }
5576
5577 /* Copies the reference information from OLD_REF to NEW_REF.  */
5578
5579 static void
5580 copy_ref_info (tree new_ref, tree old_ref)
5581 {
5582   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5583     copy_mem_ref_info (new_ref, old_ref);
5584   else
5585     {
5586       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5587       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5588     }
5589 }
5590
5591 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5592
5593 static void
5594 rewrite_use_address (struct ivopts_data *data,
5595                      struct iv_use *use, struct iv_cand *cand)
5596 {
5597   struct affine_tree_combination aff;
5598   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5599   tree ref;
5600
5601   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5602   unshare_aff_combination (&aff);
5603
5604   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5605   copy_ref_info (ref, *use->op_p);
5606   *use->op_p = ref;
5607 }
5608
5609 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5610    candidate CAND.  */
5611
5612 static void
5613 rewrite_use_compare (struct ivopts_data *data,
5614                      struct iv_use *use, struct iv_cand *cand)
5615 {
5616   tree comp;
5617   tree *op_p, cond, op, stmts, bound;
5618   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5619   enum tree_code compare;
5620   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5621
5622   bound = cp->value;
5623   if (bound)
5624     {
5625       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5626       tree var_type = TREE_TYPE (var);
5627
5628       compare = iv_elimination_compare (data, use);
5629       bound = fold_convert (var_type, bound);
5630       op = force_gimple_operand (unshare_expr (bound), &stmts,
5631                                  true, NULL_TREE);
5632
5633       if (stmts)
5634         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5635
5636       *use->op_p = build2 (compare, boolean_type_node, var, op);
5637       update_stmt (use->stmt);
5638       return;
5639     }
5640
5641   /* The induction variable elimination failed; just express the original
5642      giv.  */
5643   comp = get_computation (data->current_loop, use, cand);
5644
5645   cond = *use->op_p;
5646   op_p = &TREE_OPERAND (cond, 0);
5647   if (TREE_CODE (*op_p) != SSA_NAME
5648       || zero_p (get_iv (data, *op_p)->step))
5649     op_p = &TREE_OPERAND (cond, 1);
5650
5651   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5652   if (stmts)
5653     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5654
5655   *op_p = op;
5656 }
5657
5658 /* Rewrites USE using candidate CAND.  */
5659
5660 static void
5661 rewrite_use (struct ivopts_data *data,
5662              struct iv_use *use, struct iv_cand *cand)
5663 {
5664   switch (use->type)
5665     {
5666       case USE_NONLINEAR_EXPR:
5667         rewrite_use_nonlinear_expr (data, use, cand);
5668         break;
5669
5670       case USE_ADDRESS:
5671         rewrite_use_address (data, use, cand);
5672         break;
5673
5674       case USE_COMPARE:
5675         rewrite_use_compare (data, use, cand);
5676         break;
5677
5678       default:
5679         gcc_unreachable ();
5680     }
5681   mark_new_vars_to_rename (use->stmt);
5682 }
5683
5684 /* Rewrite the uses using the selected induction variables.  */
5685
5686 static void
5687 rewrite_uses (struct ivopts_data *data)
5688 {
5689   unsigned i;
5690   struct iv_cand *cand;
5691   struct iv_use *use;
5692
5693   for (i = 0; i < n_iv_uses (data); i++)
5694     {
5695       use = iv_use (data, i);
5696       cand = use->selected;
5697       gcc_assert (cand);
5698
5699       rewrite_use (data, use, cand);
5700     }
5701 }
5702
5703 /* Removes the ivs that are not used after rewriting.  */
5704
5705 static void
5706 remove_unused_ivs (struct ivopts_data *data)
5707 {
5708   unsigned j;
5709   bitmap_iterator bi;
5710
5711   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5712     {
5713       struct version_info *info;
5714
5715       info = ver_info (data, j);
5716       if (info->iv
5717           && !zero_p (info->iv->step)
5718           && !info->inv_id
5719           && !info->iv->have_use_for
5720           && !info->preserve_biv)
5721         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5722     }
5723 }
5724
5725 /* Frees data allocated by the optimization of a single loop.  */
5726
5727 static void
5728 free_loop_data (struct ivopts_data *data)
5729 {
5730   unsigned i, j;
5731   bitmap_iterator bi;
5732   tree obj;
5733
5734   htab_empty (data->niters);
5735
5736   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5737     {
5738       struct version_info *info;
5739
5740       info = ver_info (data, i);
5741       if (info->iv)
5742         free (info->iv);
5743       info->iv = NULL;
5744       info->has_nonlin_use = false;
5745       info->preserve_biv = false;
5746       info->inv_id = 0;
5747     }
5748   bitmap_clear (data->relevant);
5749   bitmap_clear (data->important_candidates);
5750
5751   for (i = 0; i < n_iv_uses (data); i++)
5752     {
5753       struct iv_use *use = iv_use (data, i);
5754
5755       free (use->iv);
5756       BITMAP_FREE (use->related_cands);
5757       for (j = 0; j < use->n_map_members; j++)
5758         if (use->cost_map[j].depends_on)
5759           BITMAP_FREE (use->cost_map[j].depends_on);
5760       free (use->cost_map);
5761       free (use);
5762     }
5763   VEC_truncate (iv_use_p, data->iv_uses, 0);
5764
5765   for (i = 0; i < n_iv_cands (data); i++)
5766     {
5767       struct iv_cand *cand = iv_cand (data, i);
5768
5769       if (cand->iv)
5770         free (cand->iv);
5771       if (cand->depends_on)
5772         BITMAP_FREE (cand->depends_on);
5773       free (cand);
5774     }
5775   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5776
5777   if (data->version_info_size < num_ssa_names)
5778     {
5779       data->version_info_size = 2 * num_ssa_names;
5780       free (data->version_info);
5781       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5782     }
5783
5784   data->max_inv_id = 0;
5785
5786   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5787     SET_DECL_RTL (obj, NULL_RTX);
5788
5789   VEC_truncate (tree, decl_rtl_to_reset, 0);
5790 }
5791
5792 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5793    loop tree.  */
5794
5795 static void
5796 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5797 {
5798   free_loop_data (data);
5799   free (data->version_info);
5800   BITMAP_FREE (data->relevant);
5801   BITMAP_FREE (data->important_candidates);
5802   htab_delete (data->niters);
5803
5804   VEC_free (tree, heap, decl_rtl_to_reset);
5805   VEC_free (iv_use_p, heap, data->iv_uses);
5806   VEC_free (iv_cand_p, heap, data->iv_candidates);
5807 }
5808
5809 /* Optimizes the LOOP.  Returns true if anything changed.  */
5810
5811 static bool
5812 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5813 {
5814   bool changed = false;
5815   struct iv_ca *iv_ca;
5816   edge exit;
5817
5818   data->current_loop = loop;
5819
5820   if (dump_file && (dump_flags & TDF_DETAILS))
5821     {
5822       fprintf (dump_file, "Processing loop %d\n", loop->num);
5823
5824       exit = single_dom_exit (loop);
5825       if (exit)
5826         {
5827           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5828                    exit->src->index, exit->dest->index);
5829           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5830           fprintf (dump_file, "\n");
5831         }
5832
5833       fprintf (dump_file, "\n");
5834     }
5835
5836   /* For each ssa name determines whether it behaves as an induction variable
5837      in some loop.  */
5838   if (!find_induction_variables (data))
5839     goto finish;
5840
5841   /* Finds interesting uses (item 1).  */
5842   find_interesting_uses (data);
5843   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5844     goto finish;
5845
5846   /* Finds candidates for the induction variables (item 2).  */
5847   find_iv_candidates (data);
5848
5849   /* Calculates the costs (item 3, part 1).  */
5850   determine_use_iv_costs (data);
5851   determine_iv_costs (data);
5852   determine_set_costs (data);
5853
5854   /* Find the optimal set of induction variables (item 3, part 2).  */
5855   iv_ca = find_optimal_iv_set (data);
5856   if (!iv_ca)
5857     goto finish;
5858   changed = true;
5859
5860   /* Create the new induction variables (item 4, part 1).  */
5861   create_new_ivs (data, iv_ca);
5862   iv_ca_free (&iv_ca);
5863
5864   /* Rewrite the uses (item 4, part 2).  */
5865   rewrite_uses (data);
5866
5867   /* Remove the ivs that are unused after rewriting.  */
5868   remove_unused_ivs (data);
5869
5870   /* We have changed the structure of induction variables; it might happen
5871      that definitions in the scev database refer to some of them that were
5872      eliminated.  */
5873   scev_reset ();
5874
5875 finish:
5876   free_loop_data (data);
5877
5878   return changed;
5879 }
5880
5881 /* Main entry point.  Optimizes induction variables in loops.  */
5882
5883 void
5884 tree_ssa_iv_optimize (void)
5885 {
5886   struct loop *loop;
5887   struct ivopts_data data;
5888
5889   tree_ssa_iv_optimize_init (&data);
5890
5891   /* Optimize the loops starting with the innermost ones.  */
5892   loop = current_loops->tree_root;
5893   while (loop->inner)
5894     loop = loop->inner;
5895
5896   /* Scan the loops, inner ones first.  */
5897   while (loop != current_loops->tree_root)
5898     {
5899       if (dump_file && (dump_flags & TDF_DETAILS))
5900         flow_loop_dump (loop, dump_file, NULL, 1);
5901
5902       tree_ssa_iv_optimize_loop (&data, loop);
5903
5904       if (loop->next)
5905         {
5906           loop = loop->next;
5907           while (loop->inner)
5908             loop = loop->inner;
5909         }
5910       else
5911         loop = loop->outer;
5912     }
5913
5914   tree_ssa_iv_optimize_finalize (&data);
5915 }