gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Types of uses.  */
 125 enum use_type
 126 {
 127   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 128   USE_ADDRESS,          /* Use in an address.  */
 129   USE_COMPARE           /* Use is a compare.  */
 130 };
 131
 132 /* The candidate - cost pair.  */
 133 struct cost_pair
 134 {
 135   struct iv_cand *cand; /* The candidate.  */
 136   unsigned cost;        /* The cost.  */
 137   bitmap depends_on;    /* The list of invariants that have to be
 138                            preserved.  */
 139   tree value;           /* For final value elimination, the expression for
 140                            the final value of the iv.  For iv elimination,
 141                            the new bound to compare with.  */
 142 };
 143
 144 /* Use.  */
 145 struct iv_use
 146 {
 147   unsigned id;          /* The id of the use.  */
 148   enum use_type type;   /* Type of the use.  */
 149   struct iv *iv;        /* The induction variable it is based on.  */
 150   tree stmt;            /* Statement in that it occurs.  */
 151   tree *op_p;           /* The place where it occurs.  */
 152   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 153                            important ones.  */
 154
 155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 156   struct cost_pair *cost_map;
 157                         /* The costs wrto the iv candidates.  */
 158
 159   struct iv_cand *selected;
 160                         /* The selected candidate.  */
 161 };
 162
 163 /* The position where the iv is computed.  */
 164 enum iv_position
 165 {
 166   IP_NORMAL,            /* At the end, just before the exit condition.  */
 167   IP_END,               /* At the end of the latch block.  */
 168   IP_ORIGINAL           /* The original biv.  */
 169 };
 170
 171 /* The induction variable candidate.  */
 172 struct iv_cand
 173 {
 174   unsigned id;          /* The number of the candidate.  */
 175   bool important;       /* Whether this is an "important" candidate, i.e. such
 176                            that it should be considered by all uses.  */
 177   enum iv_position pos; /* Where it is computed.  */
 178   tree incremented_at;  /* For original biv, the statement where it is
 179                            incremented.  */
 180   tree var_before;      /* The variable used for it before increment.  */
 181   tree var_after;       /* The variable used for it after increment.  */
 182   struct iv *iv;        /* The value of the candidate.  NULL for
 183                            "pseudocandidate" used to indicate the possibility
 184                            to replace the final value of an iv by direct
 185                            computation of the value.  */
 186   unsigned cost;        /* Cost of the candidate.  */
 187   bitmap depends_on;    /* The list of invariants that are used in step of the
 188                            biv.  */
 189 };
 190
 191 /* The data used by the induction variable optimizations.  */
 192
 193 typedef struct iv_use *iv_use_p;
 194 DEF_VEC_P(iv_use_p);
 195 DEF_VEC_ALLOC_P(iv_use_p,heap);
 196
 197 typedef struct iv_cand *iv_cand_p;
 198 DEF_VEC_P(iv_cand_p);
 199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 200
 201 struct ivopts_data
 202 {
 203   /* The currently optimized loop.  */
 204   struct loop *current_loop;
 205
 206   /* Number of registers used in it.  */
 207   unsigned regs_used;
 208
 209   /* Numbers of iterations for all exits of the current loop.  */
 210   htab_t niters;
 211
 212   /* The size of version_info array allocated.  */
 213   unsigned version_info_size;
 214
 215   /* The array of information for the ssa names.  */
 216   struct version_info *version_info;
 217
 218   /* The bitmap of indices in version_info whose value was changed.  */
 219   bitmap relevant;
 220
 221   /* The maximum invariant id.  */
 222   unsigned max_inv_id;
 223
 224   /* The uses of induction variables.  */
 225   VEC(iv_use_p,heap) *iv_uses;
 226
 227   /* The candidates.  */
 228   VEC(iv_cand_p,heap) *iv_candidates;
 229
 230   /* A bitmap of important candidates.  */
 231   bitmap important_candidates;
 232
 233   /* Whether to consider just related and important candidates when replacing a
 234      use.  */
 235   bool consider_all_candidates;
 236 };
 237
 238 /* An assignment of iv candidates to uses.  */
 239
 240 struct iv_ca
 241 {
 242   /* The number of uses covered by the assignment.  */
 243   unsigned upto;
 244
 245   /* Number of uses that cannot be expressed by the candidates in the set.  */
 246   unsigned bad_uses;
 247
 248   /* Candidate assigned to a use, together with the related costs.  */
 249   struct cost_pair **cand_for_use;
 250
 251   /* Number of times each candidate is used.  */
 252   unsigned *n_cand_uses;
 253
 254   /* The candidates used.  */
 255   bitmap cands;
 256
 257   /* The number of candidates in the set.  */
 258   unsigned n_cands;
 259
 260   /* Total number of registers needed.  */
 261   unsigned n_regs;
 262
 263   /* Total cost of expressing uses.  */
 264   unsigned cand_use_cost;
 265
 266   /* Total cost of candidates.  */
 267   unsigned cand_cost;
 268
 269   /* Number of times each invariant is used.  */
 270   unsigned *n_invariant_uses;
 271
 272   /* Total cost of the assignment.  */
 273   unsigned cost;
 274 };
 275
 276 /* Difference of two iv candidate assignments.  */
 277
 278 struct iv_ca_delta
 279 {
 280   /* Changed use.  */
 281   struct iv_use *use;
 282
 283   /* An old assignment (for rollback purposes).  */
 284   struct cost_pair *old_cp;
 285
 286   /* A new assignment.  */
 287   struct cost_pair *new_cp;
 288
 289   /* Next change in the list.  */
 290   struct iv_ca_delta *next_change;
 291 };
 292
 293 /* Bound on number of candidates below that all candidates are considered.  */
 294
 295 #define CONSIDER_ALL_CANDIDATES_BOUND \
 296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 297
 298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 299    optimizing such a loop would help, and it would take ages).  */
 300
 301 #define MAX_CONSIDERED_USES \
 302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 303
 304 /* If there are at most this number of ivs in the set, try removing unnecessary
 305    ivs from the set always.  */
 306
 307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 309
 310 /* The list of trees for that the decl_rtl field must be reset is stored
 311    here.  */
 312
 313 static VEC(tree,heap) *decl_rtl_to_reset;
 314
 315 /* Number of uses recorded in DATA.  */
 316
 317 static inline unsigned
 318 n_iv_uses (struct ivopts_data *data)
 319 {
 320   return VEC_length (iv_use_p, data->iv_uses);
 321 }
 322
 323 /* Ith use recorded in DATA.  */
 324
 325 static inline struct iv_use *
 326 iv_use (struct ivopts_data *data, unsigned i)
 327 {
 328   return VEC_index (iv_use_p, data->iv_uses, i);
 329 }
 330
 331 /* Number of candidates recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_cands (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_cand_p, data->iv_candidates);
 337 }
 338
 339 /* Ith candidate recorded in DATA.  */
 340
 341 static inline struct iv_cand *
 342 iv_cand (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_cand_p, data->iv_candidates, i);
 345 }
 346
 347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 348
 349 edge
 350 single_dom_exit (struct loop *loop)
 351 {
 352   edge exit = single_exit (loop);
 353
 354   if (!exit)
 355     return NULL;
 356
 357   if (!just_once_each_iteration_p (loop, exit->src))
 358     return NULL;
 359
 360   return exit;
 361 }
 362
 363 /* Dumps information about the induction variable IV to FILE.  */
 364
 365 extern void dump_iv (FILE *, struct iv *);
 366 void
 367 dump_iv (FILE *file, struct iv *iv)
 368 {
 369   if (iv->ssa_name)
 370     {
 371       fprintf (file, "ssa name ");
 372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 373       fprintf (file, "\n");
 374     }
 375
 376   fprintf (file, "  type ");
 377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 378   fprintf (file, "\n");
 379
 380   if (iv->step)
 381     {
 382       fprintf (file, "  base ");
 383       print_generic_expr (file, iv->base, TDF_SLIM);
 384       fprintf (file, "\n");
 385
 386       fprintf (file, "  step ");
 387       print_generic_expr (file, iv->step, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390   else
 391     {
 392       fprintf (file, "  invariant ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395     }
 396
 397   if (iv->base_object)
 398     {
 399       fprintf (file, "  base object ");
 400       print_generic_expr (file, iv->base_object, TDF_SLIM);
 401       fprintf (file, "\n");
 402     }
 403
 404   if (iv->biv_p)
 405     fprintf (file, "  is a biv\n");
 406 }
 407
 408 /* Dumps information about the USE to FILE.  */
 409
 410 extern void dump_use (FILE *, struct iv_use *);
 411 void
 412 dump_use (FILE *file, struct iv_use *use)
 413 {
 414   fprintf (file, "use %d\n", use->id);
 415
 416   switch (use->type)
 417     {
 418     case USE_NONLINEAR_EXPR:
 419       fprintf (file, "  generic\n");
 420       break;
 421
 422     case USE_ADDRESS:
 423       fprintf (file, "  address\n");
 424       break;
 425
 426     case USE_COMPARE:
 427       fprintf (file, "  compare\n");
 428       break;
 429
 430     default:
 431       gcc_unreachable ();
 432     }
 433
 434   fprintf (file, "  in statement ");
 435   print_generic_expr (file, use->stmt, TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   fprintf (file, "  at position ");
 439   if (use->op_p)
 440     print_generic_expr (file, *use->op_p, TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   dump_iv (file, use->iv);
 444
 445   if (use->related_cands)
 446     {
 447       fprintf (file, "  related candidates ");
 448       dump_bitmap (file, use->related_cands);
 449     }
 450 }
 451
 452 /* Dumps information about the uses to FILE.  */
 453
 454 extern void dump_uses (FILE *, struct ivopts_data *);
 455 void
 456 dump_uses (FILE *file, struct ivopts_data *data)
 457 {
 458   unsigned i;
 459   struct iv_use *use;
 460
 461   for (i = 0; i < n_iv_uses (data); i++)
 462     {
 463       use = iv_use (data, i);
 464
 465       dump_use (file, use);
 466       fprintf (file, "\n");
 467     }
 468 }
 469
 470 /* Dumps information about induction variable candidate CAND to FILE.  */
 471
 472 extern void dump_cand (FILE *, struct iv_cand *);
 473 void
 474 dump_cand (FILE *file, struct iv_cand *cand)
 475 {
 476   struct iv *iv = cand->iv;
 477
 478   fprintf (file, "candidate %d%s\n",
 479            cand->id, cand->important ? " (important)" : "");
 480
 481   if (cand->depends_on)
 482     {
 483       fprintf (file, "  depends on ");
 484       dump_bitmap (file, cand->depends_on);
 485     }
 486
 487   if (!iv)
 488     {
 489       fprintf (file, "  final value replacement\n");
 490       return;
 491     }
 492
 493   switch (cand->pos)
 494     {
 495     case IP_NORMAL:
 496       fprintf (file, "  incremented before exit test\n");
 497       break;
 498
 499     case IP_END:
 500       fprintf (file, "  incremented at end\n");
 501       break;
 502
 503     case IP_ORIGINAL:
 504       fprintf (file, "  original biv\n");
 505       break;
 506     }
 507
 508   dump_iv (file, iv);
 509 }
 510
 511 /* Returns the info for ssa version VER.  */
 512
 513 static inline struct version_info *
 514 ver_info (struct ivopts_data *data, unsigned ver)
 515 {
 516   return data->version_info + ver;
 517 }
 518
 519 /* Returns the info for ssa name NAME.  */
 520
 521 static inline struct version_info *
 522 name_info (struct ivopts_data *data, tree name)
 523 {
 524   return ver_info (data, SSA_NAME_VERSION (name));
 525 }
 526
 527 /* Checks whether there exists number X such that X * B = A, counting modulo
 528    2^BITS.  */
 529
 530 static bool
 531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 532         HOST_WIDE_INT *x)
 533 {
 534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 535   unsigned HOST_WIDE_INT inv, ex, val;
 536   unsigned i;
 537
 538   a &= mask;
 539   b &= mask;
 540
 541   /* First divide the whole equation by 2 as long as possible.  */
 542   while (!(a & 1) && !(b & 1))
 543     {
 544       a >>= 1;
 545       b >>= 1;
 546       bits--;
 547       mask >>= 1;
 548     }
 549
 550   if (!(b & 1))
 551     {
 552       /* If b is still even, a is odd and there is no such x.  */
 553       return false;
 554     }
 555
 556   /* Find the inverse of b.  We compute it as
 557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 558   inv = 1;
 559   ex = b;
 560   for (i = 0; i < bits - 1; i++)
 561     {
 562       inv = (inv * ex) & mask;
 563       ex = (ex * ex) & mask;
 564     }
 565
 566   val = (a * inv) & mask;
 567
 568   gcc_assert (((val * b) & mask) == a);
 569
 570   if ((val >> (bits - 1)) & 1)
 571     val |= ~mask;
 572
 573   *x = val;
 574
 575   return true;
 576 }
 577
 578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 579    emitted in LOOP.  */
 580
 581 static bool
 582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 583 {
 584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 585
 586   gcc_assert (bb);
 587
 588   if (sbb == loop->latch)
 589     return true;
 590
 591   if (sbb != bb)
 592     return false;
 593
 594   return stmt == last_stmt (bb);
 595 }
 596
 597 /* Returns true if STMT if after the place where the original induction
 598    variable CAND is incremented.  */
 599
 600 static bool
 601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 602 {
 603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 604   basic_block stmt_bb = bb_for_stmt (stmt);
 605   block_stmt_iterator bsi;
 606
 607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 608     return false;
 609
 610   if (stmt_bb != cand_bb)
 611     return true;
 612
 613   /* Scan the block from the end, since the original ivs are usually
 614      incremented at the end of the loop body.  */
 615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 616     {
 617       if (bsi_stmt (bsi) == cand->incremented_at)
 618         return false;
 619       if (bsi_stmt (bsi) == stmt)
 620         return true;
 621     }
 622 }
 623
 624 /* Returns true if STMT if after the place where the induction variable
 625    CAND is incremented in LOOP.  */
 626
 627 static bool
 628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 629 {
 630   switch (cand->pos)
 631     {
 632     case IP_END:
 633       return false;
 634
 635     case IP_NORMAL:
 636       return stmt_after_ip_normal_pos (loop, stmt);
 637
 638     case IP_ORIGINAL:
 639       return stmt_after_ip_original_pos (cand, stmt);
 640
 641     default:
 642       gcc_unreachable ();
 643     }
 644 }
 645
 646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 647
 648 static bool
 649 abnormal_ssa_name_p (tree exp)
 650 {
 651   if (!exp)
 652     return false;
 653
 654   if (TREE_CODE (exp) != SSA_NAME)
 655     return false;
 656
 657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 658 }
 659
 660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 661    abnormal phi node.  Callback for for_each_index.  */
 662
 663 static bool
 664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 665                                   void *data ATTRIBUTE_UNUSED)
 666 {
 667   if (TREE_CODE (base) == ARRAY_REF)
 668     {
 669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 670         return false;
 671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 672         return false;
 673     }
 674
 675   return !abnormal_ssa_name_p (*index);
 676 }
 677
 678 /* Returns true if EXPR contains a ssa name that occurs in an
 679    abnormal phi node.  */
 680
 681 bool
 682 contains_abnormal_ssa_name_p (tree expr)
 683 {
 684   enum tree_code code;
 685   enum tree_code_class class;
 686
 687   if (!expr)
 688     return false;
 689
 690   code = TREE_CODE (expr);
 691   class = TREE_CODE_CLASS (code);
 692
 693   if (code == SSA_NAME)
 694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 695
 696   if (code == INTEGER_CST
 697       || is_gimple_min_invariant (expr))
 698     return false;
 699
 700   if (code == ADDR_EXPR)
 701     return !for_each_index (&TREE_OPERAND (expr, 0),
 702                             idx_contains_abnormal_ssa_name_p,
 703                             NULL);
 704
 705   switch (class)
 706     {
 707     case tcc_binary:
 708     case tcc_comparison:
 709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 710         return true;
 711
 712       /* Fallthru.  */
 713     case tcc_unary:
 714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 715         return true;
 716
 717       break;
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722
 723   return false;
 724 }
 725
 726 /* Element of the table in that we cache the numbers of iterations obtained
 727    from exits of the loop.  */
 728
 729 struct nfe_cache_elt
 730 {
 731   /* The edge for that the number of iterations is cached.  */
 732   edge exit;
 733
 734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 735      determined.  */
 736   tree niter;
 737 };
 738
 739 /* Hash function for nfe_cache_elt E.  */
 740
 741 static hashval_t
 742 nfe_hash (const void *e)
 743 {
 744   const struct nfe_cache_elt *elt = e;
 745
 746   return htab_hash_pointer (elt->exit);
 747 }
 748
 749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 750
 751 static int
 752 nfe_eq (const void *e1, const void *e2)
 753 {
 754   const struct nfe_cache_elt *elt1 = e1;
 755
 756   return elt1->exit == e2;
 757 }
 758
 759 /*  Returns tree describing number of iterations determined from
 760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 761
 762 static tree
 763 niter_for_exit (struct ivopts_data *data, edge exit)
 764 {
 765   struct nfe_cache_elt *nfe_desc;
 766   struct tree_niter_desc desc;
 767   PTR *slot;
 768
 769   slot = htab_find_slot_with_hash (data->niters, exit,
 770                                    htab_hash_pointer (exit),
 771                                    INSERT);
 772
 773   if (!*slot)
 774     {
 775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 776       nfe_desc->exit = exit;
 777
 778       /* Try to determine number of iterations.  We must know it
 779          unconditionally (i.e., without possibility of # of iterations
 780          being zero).  Also, we cannot safely work with ssa names that
 781          appear in phi nodes on abnormal edges, so that we do not create
 782          overlapping life ranges for them (PR 27283).  */
 783       if (number_of_iterations_exit (data->current_loop,
 784                                      exit, &desc, true)
 785           && zero_p (desc.may_be_zero)
 786           && !contains_abnormal_ssa_name_p (desc.niter))
 787         nfe_desc->niter = desc.niter;
 788       else
 789         nfe_desc->niter = NULL_TREE;
 790     }
 791   else
 792     nfe_desc = *slot;
 793
 794   return nfe_desc->niter;
 795 }
 796
 797 /* Returns tree describing number of iterations determined from
 798    single dominating exit of DATA->current_loop, or NULL if something
 799    goes wrong.  */
 800
 801 static tree
 802 niter_for_single_dom_exit (struct ivopts_data *data)
 803 {
 804   edge exit = single_dom_exit (data->current_loop);
 805
 806   if (!exit)
 807     return NULL;
 808
 809   return niter_for_exit (data, exit);
 810 }
 811
 812 /* Initializes data structures used by the iv optimization pass, stored
 813    in DATA.  */
 814
 815 static void
 816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 817 {
 818   data->version_info_size = 2 * num_ssa_names;
 819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 820   data->relevant = BITMAP_ALLOC (NULL);
 821   data->important_candidates = BITMAP_ALLOC (NULL);
 822   data->max_inv_id = 0;
 823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 827 }
 828
 829 /* Returns a memory object to that EXPR points.  In case we are able to
 830    determine that it does not point to any such object, NULL is returned.  */
 831
 832 static tree
 833 determine_base_object (tree expr)
 834 {
 835   enum tree_code code = TREE_CODE (expr);
 836   tree base, obj, op0, op1;
 837
 838   /* If this is a pointer casted to any type, we need to determine
 839      the base object for the pointer; so handle conversions before
 840      throwing away non-pointer expressions.  */
 841   if (TREE_CODE (expr) == NOP_EXPR
 842       || TREE_CODE (expr) == CONVERT_EXPR)
 843     return determine_base_object (TREE_OPERAND (expr, 0));
 844
 845   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 846     return NULL_TREE;
 847
 848   switch (code)
 849     {
 850     case INTEGER_CST:
 851       return NULL_TREE;
 852
 853     case ADDR_EXPR:
 854       obj = TREE_OPERAND (expr, 0);
 855       base = get_base_address (obj);
 856
 857       if (!base)
 858         return expr;
 859
 860       if (TREE_CODE (base) == INDIRECT_REF)
 861         return determine_base_object (TREE_OPERAND (base, 0));
 862
 863       return fold_convert (ptr_type_node,
 864                            build_fold_addr_expr (base));
 865
 866     case PLUS_EXPR:
 867     case MINUS_EXPR:
 868       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 869       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 870
 871       if (!op1)
 872         return op0;
 873
 874       if (!op0)
 875         return (code == PLUS_EXPR
 876                 ? op1
 877                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 878
 879       return fold_build2 (code, ptr_type_node, op0, op1);
 880
 881     default:
 882       return fold_convert (ptr_type_node, expr);
 883     }
 884 }
 885
 886 /* Allocates an induction variable with given initial value BASE and step STEP
 887    for loop LOOP.  */
 888
 889 static struct iv *
 890 alloc_iv (tree base, tree step)
 891 {
 892   struct iv *iv = XCNEW (struct iv);
 893
 894   if (step && integer_zerop (step))
 895     step = NULL_TREE;
 896
 897   iv->base = base;
 898   iv->base_object = determine_base_object (base);
 899   iv->step = step;
 900   iv->biv_p = false;
 901   iv->have_use_for = false;
 902   iv->use_id = 0;
 903   iv->ssa_name = NULL_TREE;
 904
 905   return iv;
 906 }
 907
 908 /* Sets STEP and BASE for induction variable IV.  */
 909
 910 static void
 911 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 912 {
 913   struct version_info *info = name_info (data, iv);
 914
 915   gcc_assert (!info->iv);
 916
 917   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 918   info->iv = alloc_iv (base, step);
 919   info->iv->ssa_name = iv;
 920 }
 921
 922 /* Finds induction variable declaration for VAR.  */
 923
 924 static struct iv *
 925 get_iv (struct ivopts_data *data, tree var)
 926 {
 927   basic_block bb;
 928
 929   if (!name_info (data, var)->iv)
 930     {
 931       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 932
 933       if (!bb
 934           || !flow_bb_inside_loop_p (data->current_loop, bb))
 935         set_iv (data, var, var, NULL_TREE);
 936     }
 937
 938   return name_info (data, var)->iv;
 939 }
 940
 941 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 942    not define a simple affine biv with nonzero step.  */
 943
 944 static tree
 945 determine_biv_step (tree phi)
 946 {
 947   struct loop *loop = bb_for_stmt (phi)->loop_father;
 948   tree name = PHI_RESULT (phi);
 949   affine_iv iv;
 950
 951   if (!is_gimple_reg (name))
 952     return NULL_TREE;
 953
 954   if (!simple_iv (loop, phi, name, &iv, true))
 955     return NULL_TREE;
 956
 957   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 958 }
 959
 960 /* Finds basic ivs.  */
 961
 962 static bool
 963 find_bivs (struct ivopts_data *data)
 964 {
 965   tree phi, step, type, base;
 966   bool found = false;
 967   struct loop *loop = data->current_loop;
 968
 969   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 970     {
 971       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 972         continue;
 973
 974       step = determine_biv_step (phi);
 975       if (!step)
 976         continue;
 977
 978       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 979       base = expand_simple_operations (base);
 980       if (contains_abnormal_ssa_name_p (base)
 981           || contains_abnormal_ssa_name_p (step))
 982         continue;
 983
 984       type = TREE_TYPE (PHI_RESULT (phi));
 985       base = fold_convert (type, base);
 986       if (step)
 987         step = fold_convert (type, step);
 988
 989       set_iv (data, PHI_RESULT (phi), base, step);
 990       found = true;
 991     }
 992
 993   return found;
 994 }
 995
 996 /* Marks basic ivs.  */
 997
 998 static void
 999 mark_bivs (struct ivopts_data *data)
1000 {
1001   tree phi, var;
1002   struct iv *iv, *incr_iv;
1003   struct loop *loop = data->current_loop;
1004   basic_block incr_bb;
1005
1006   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007     {
1008       iv = get_iv (data, PHI_RESULT (phi));
1009       if (!iv)
1010         continue;
1011
1012       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1013       incr_iv = get_iv (data, var);
1014       if (!incr_iv)
1015         continue;
1016
1017       /* If the increment is in the subloop, ignore it.  */
1018       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1019       if (incr_bb->loop_father != data->current_loop
1020           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021         continue;
1022
1023       iv->biv_p = true;
1024       incr_iv->biv_p = true;
1025     }
1026 }
1027
1028 /* Checks whether STMT defines a linear induction variable and stores its
1029    parameters to IV.  */
1030
1031 static bool
1032 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1033 {
1034   tree lhs;
1035   struct loop *loop = data->current_loop;
1036
1037   iv->base = NULL_TREE;
1038   iv->step = NULL_TREE;
1039
1040   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1041     return false;
1042
1043   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1044   if (TREE_CODE (lhs) != SSA_NAME)
1045     return false;
1046
1047   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1048     return false;
1049   iv->base = expand_simple_operations (iv->base);
1050
1051   if (contains_abnormal_ssa_name_p (iv->base)
1052       || contains_abnormal_ssa_name_p (iv->step))
1053     return false;
1054
1055   return true;
1056 }
1057
1058 /* Finds general ivs in statement STMT.  */
1059
1060 static void
1061 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1062 {
1063   affine_iv iv;
1064
1065   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1066     return;
1067
1068   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1069 }
1070
1071 /* Finds general ivs in basic block BB.  */
1072
1073 static void
1074 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1075 {
1076   block_stmt_iterator bsi;
1077
1078   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1079     find_givs_in_stmt (data, bsi_stmt (bsi));
1080 }
1081
1082 /* Finds general ivs.  */
1083
1084 static void
1085 find_givs (struct ivopts_data *data)
1086 {
1087   struct loop *loop = data->current_loop;
1088   basic_block *body = get_loop_body_in_dom_order (loop);
1089   unsigned i;
1090
1091   for (i = 0; i < loop->num_nodes; i++)
1092     find_givs_in_bb (data, body[i]);
1093   free (body);
1094 }
1095
1096 /* For each ssa name defined in LOOP determines whether it is an induction
1097    variable and if so, its initial value and step.  */
1098
1099 static bool
1100 find_induction_variables (struct ivopts_data *data)
1101 {
1102   unsigned i;
1103   bitmap_iterator bi;
1104
1105   if (!find_bivs (data))
1106     return false;
1107
1108   find_givs (data);
1109   mark_bivs (data);
1110
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     {
1113       tree niter = niter_for_single_dom_exit (data);
1114
1115       if (niter)
1116         {
1117           fprintf (dump_file, "  number of iterations ");
1118           print_generic_expr (dump_file, niter, TDF_SLIM);
1119           fprintf (dump_file, "\n\n");
1120         };
1121
1122       fprintf (dump_file, "Induction variables:\n\n");
1123
1124       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125         {
1126           if (ver_info (data, i)->iv)
1127             dump_iv (dump_file, ver_info (data, i)->iv);
1128         }
1129     }
1130
1131   return true;
1132 }
1133
1134 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1135
1136 static struct iv_use *
1137 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1138             tree stmt, enum use_type use_type)
1139 {
1140   struct iv_use *use = XCNEW (struct iv_use);
1141
1142   use->id = n_iv_uses (data);
1143   use->type = use_type;
1144   use->iv = iv;
1145   use->stmt = stmt;
1146   use->op_p = use_p;
1147   use->related_cands = BITMAP_ALLOC (NULL);
1148
1149   /* To avoid showing ssa name in the dumps, if it was not reset by the
1150      caller.  */
1151   iv->ssa_name = NULL_TREE;
1152
1153   if (dump_file && (dump_flags & TDF_DETAILS))
1154     dump_use (dump_file, use);
1155
1156   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157
1158   return use;
1159 }
1160
1161 /* Checks whether OP is a loop-level invariant and if so, records it.
1162    NONLINEAR_USE is true if the invariant is used in a way we do not
1163    handle specially.  */
1164
1165 static void
1166 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 {
1168   basic_block bb;
1169   struct version_info *info;
1170
1171   if (TREE_CODE (op) != SSA_NAME
1172       || !is_gimple_reg (op))
1173     return;
1174
1175   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1176   if (bb
1177       && flow_bb_inside_loop_p (data->current_loop, bb))
1178     return;
1179
1180   info = name_info (data, op);
1181   info->name = op;
1182   info->has_nonlin_use |= nonlinear_use;
1183   if (!info->inv_id)
1184     info->inv_id = ++data->max_inv_id;
1185   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1186 }
1187
1188 /* Checks whether the use OP is interesting and if so, records it.  */
1189
1190 static struct iv_use *
1191 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 {
1193   struct iv *iv;
1194   struct iv *civ;
1195   tree stmt;
1196   struct iv_use *use;
1197
1198   if (TREE_CODE (op) != SSA_NAME)
1199     return NULL;
1200
1201   iv = get_iv (data, op);
1202   if (!iv)
1203     return NULL;
1204
1205   if (iv->have_use_for)
1206     {
1207       use = iv_use (data, iv->use_id);
1208
1209       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1210       return use;
1211     }
1212
1213   if (zero_p (iv->step))
1214     {
1215       record_invariant (data, op, true);
1216       return NULL;
1217     }
1218   iv->have_use_for = true;
1219
1220   civ = XNEW (struct iv);
1221   *civ = *iv;
1222
1223   stmt = SSA_NAME_DEF_STMT (op);
1224   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1225               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1226
1227   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1228   iv->use_id = use->id;
1229
1230   return use;
1231 }
1232
1233 /* Checks whether the condition *COND_P in STMT is interesting
1234    and if so, records it.  */
1235
1236 static void
1237 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 {
1239   tree *op0_p;
1240   tree *op1_p;
1241   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1242   struct iv const_iv;
1243   tree zero = integer_zero_node;
1244
1245   const_iv.step = NULL_TREE;
1246
1247   if (TREE_CODE (*cond_p) != SSA_NAME
1248       && !COMPARISON_CLASS_P (*cond_p))
1249     return;
1250
1251   if (TREE_CODE (*cond_p) == SSA_NAME)
1252     {
1253       op0_p = cond_p;
1254       op1_p = &zero;
1255     }
1256   else
1257     {
1258       op0_p = &TREE_OPERAND (*cond_p, 0);
1259       op1_p = &TREE_OPERAND (*cond_p, 1);
1260     }
1261
1262   if (TREE_CODE (*op0_p) == SSA_NAME)
1263     iv0 = get_iv (data, *op0_p);
1264   else
1265     iv0 = &const_iv;
1266
1267   if (TREE_CODE (*op1_p) == SSA_NAME)
1268     iv1 = get_iv (data, *op1_p);
1269   else
1270     iv1 = &const_iv;
1271
1272   if (/* When comparing with non-invariant value, we may not do any senseful
1273          induction variable elimination.  */
1274       (!iv0 || !iv1)
1275       /* Eliminating condition based on two ivs would be nontrivial.
1276          ??? TODO -- it is not really important to handle this case.  */
1277       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278     {
1279       find_interesting_uses_op (data, *op0_p);
1280       find_interesting_uses_op (data, *op1_p);
1281       return;
1282     }
1283
1284   if (zero_p (iv0->step) && zero_p (iv1->step))
1285     {
1286       /* If both are invariants, this is a work for unswitching.  */
1287       return;
1288     }
1289
1290   civ = XNEW (struct iv);
1291   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1292   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1293 }
1294
1295 /* Returns true if expression EXPR is obviously invariant in LOOP,
1296    i.e. if all its operands are defined outside of the LOOP.  */
1297
1298 bool
1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 {
1301   basic_block def_bb;
1302   unsigned i, len;
1303
1304   if (is_gimple_min_invariant (expr))
1305     return true;
1306
1307   if (TREE_CODE (expr) == SSA_NAME)
1308     {
1309       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1310       if (def_bb
1311           && flow_bb_inside_loop_p (loop, def_bb))
1312         return false;
1313
1314       return true;
1315     }
1316
1317   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1318     return false;
1319
1320   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1321   for (i = 0; i < len; i++)
1322     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1323       return false;
1324
1325   return true;
1326 }
1327
1328 /* Cumulates the steps of indices into DATA and replaces their values with the
1329    initial ones.  Returns false when the value of the index cannot be determined.
1330    Callback for for_each_index.  */
1331
1332 struct ifs_ivopts_data
1333 {
1334   struct ivopts_data *ivopts_data;
1335   tree stmt;
1336   tree *step_p;
1337 };
1338
1339 static bool
1340 idx_find_step (tree base, tree *idx, void *data)
1341 {
1342   struct ifs_ivopts_data *dta = data;
1343   struct iv *iv;
1344   tree step, iv_base, iv_step, lbound, off;
1345   struct loop *loop = dta->ivopts_data->current_loop;
1346
1347   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1348       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1349     return false;
1350
1351   /* If base is a component ref, require that the offset of the reference
1352      be invariant.  */
1353   if (TREE_CODE (base) == COMPONENT_REF)
1354     {
1355       off = component_ref_field_offset (base);
1356       return expr_invariant_in_loop_p (loop, off);
1357     }
1358
1359   /* If base is array, first check whether we will be able to move the
1360      reference out of the loop (in order to take its address in strength
1361      reduction).  In order for this to work we need both lower bound
1362      and step to be loop invariants.  */
1363   if (TREE_CODE (base) == ARRAY_REF)
1364     {
1365       step = array_ref_element_size (base);
1366       lbound = array_ref_low_bound (base);
1367
1368       if (!expr_invariant_in_loop_p (loop, step)
1369           || !expr_invariant_in_loop_p (loop, lbound))
1370         return false;
1371     }
1372
1373   if (TREE_CODE (*idx) != SSA_NAME)
1374     return true;
1375
1376   iv = get_iv (dta->ivopts_data, *idx);
1377   if (!iv)
1378     return false;
1379
1380   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1381           *&x[0], which is not folded and does not trigger the
1382           ARRAY_REF path below.  */
1383   *idx = iv->base;
1384
1385   if (!iv->step)
1386     return true;
1387
1388   if (TREE_CODE (base) == ARRAY_REF)
1389     {
1390       step = array_ref_element_size (base);
1391
1392       /* We only handle addresses whose step is an integer constant.  */
1393       if (TREE_CODE (step) != INTEGER_CST)
1394         return false;
1395     }
1396   else
1397     /* The step for pointer arithmetics already is 1 byte.  */
1398     step = build_int_cst (sizetype, 1);
1399
1400   iv_base = iv->base;
1401   iv_step = iv->step;
1402   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1403                             sizetype, &iv_base, &iv_step, dta->stmt,
1404                             false))
1405     {
1406       /* The index might wrap.  */
1407       return false;
1408     }
1409
1410   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1411
1412   if (!*dta->step_p)
1413     *dta->step_p = step;
1414   else
1415     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1416
1417   return true;
1418 }
1419
1420 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1421    object is passed to it in DATA.  */
1422
1423 static bool
1424 idx_record_use (tree base, tree *idx,
1425                 void *data)
1426 {
1427   find_interesting_uses_op (data, *idx);
1428   if (TREE_CODE (base) == ARRAY_REF)
1429     {
1430       find_interesting_uses_op (data, array_ref_element_size (base));
1431       find_interesting_uses_op (data, array_ref_low_bound (base));
1432     }
1433   return true;
1434 }
1435
1436 /* Returns true if memory reference REF may be unaligned.  */
1437
1438 static bool
1439 may_be_unaligned_p (tree ref)
1440 {
1441   tree base;
1442   tree base_type;
1443   HOST_WIDE_INT bitsize;
1444   HOST_WIDE_INT bitpos;
1445   tree toffset;
1446   enum machine_mode mode;
1447   int unsignedp, volatilep;
1448   unsigned base_align;
1449
1450   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451      thus they are not misaligned.  */
1452   if (TREE_CODE (ref) == TARGET_MEM_REF)
1453     return false;
1454
1455   /* The test below is basically copy of what expr.c:normal_inner_ref
1456      does to check whether the object must be loaded by parts when
1457      STRICT_ALIGNMENT is true.  */
1458   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1459                               &unsignedp, &volatilep, true);
1460   base_type = TREE_TYPE (base);
1461   base_align = TYPE_ALIGN (base_type);
1462
1463   if (mode != BLKmode
1464       && (base_align < GET_MODE_ALIGNMENT (mode)
1465           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1466           || bitpos % BITS_PER_UNIT != 0))
1467     return true;
1468
1469   return false;
1470 }
1471
1472 /* Return true if EXPR may be non-addressable.   */
1473
1474 static bool
1475 may_be_nonaddressable_p (tree expr)
1476 {
1477   switch (TREE_CODE (expr))
1478     {
1479     case COMPONENT_REF:
1480       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1481              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1482
1483     case ARRAY_REF:
1484     case ARRAY_RANGE_REF:
1485       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1486
1487     case VIEW_CONVERT_EXPR:
1488       /* This kind of view-conversions may wrap non-addressable objects
1489          and make them look addressable.  After some processing the
1490          non-addressability may be uncovered again, causing ADDR_EXPRs
1491          of inappropriate objects to be built.  */
1492       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1493              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1494
1495     default:
1496       break;
1497     }
1498
1499   return false;
1500 }
1501
1502 /* Finds addresses in *OP_P inside STMT.  */
1503
1504 static void
1505 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 {
1507   tree base = *op_p, step = NULL;
1508   struct iv *civ;
1509   struct ifs_ivopts_data ifs_ivopts_data;
1510
1511   /* Do not play with volatile memory references.  A bit too conservative,
1512      perhaps, but safe.  */
1513   if (stmt_ann (stmt)->has_volatile_ops)
1514     goto fail;
1515
1516   /* Ignore bitfields for now.  Not really something terribly complicated
1517      to handle.  TODO.  */
1518   if (TREE_CODE (base) == BIT_FIELD_REF)
1519     goto fail;
1520
1521   if (may_be_nonaddressable_p (base))
1522     goto fail;
1523
1524   if (STRICT_ALIGNMENT
1525       && may_be_unaligned_p (base))
1526     goto fail;
1527
1528   base = unshare_expr (base);
1529
1530   if (TREE_CODE (base) == TARGET_MEM_REF)
1531     {
1532       tree type = build_pointer_type (TREE_TYPE (base));
1533       tree astep;
1534
1535       if (TMR_BASE (base)
1536           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1537         {
1538           civ = get_iv (data, TMR_BASE (base));
1539           if (!civ)
1540             goto fail;
1541
1542           TMR_BASE (base) = civ->base;
1543           step = civ->step;
1544         }
1545       if (TMR_INDEX (base)
1546           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1547         {
1548           civ = get_iv (data, TMR_INDEX (base));
1549           if (!civ)
1550             goto fail;
1551
1552           TMR_INDEX (base) = civ->base;
1553           astep = civ->step;
1554
1555           if (astep)
1556             {
1557               if (TMR_STEP (base))
1558                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1559
1560               if (step)
1561                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1562               else
1563                 step = astep;
1564             }
1565         }
1566
1567       if (zero_p (step))
1568         goto fail;
1569       base = tree_mem_ref_addr (type, base);
1570     }
1571   else
1572     {
1573       ifs_ivopts_data.ivopts_data = data;
1574       ifs_ivopts_data.stmt = stmt;
1575       ifs_ivopts_data.step_p = &step;
1576       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1577           || zero_p (step))
1578         goto fail;
1579
1580       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1581       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1582
1583       base = build_fold_addr_expr (base);
1584
1585       /* Substituting bases of IVs into the base expression might
1586          have caused folding opportunities.  */
1587       if (TREE_CODE (base) == ADDR_EXPR)
1588         {
1589           tree *ref = &TREE_OPERAND (base, 0);
1590           while (handled_component_p (*ref))
1591             ref = &TREE_OPERAND (*ref, 0);
1592           if (TREE_CODE (*ref) == INDIRECT_REF)
1593             *ref = fold_indirect_ref (*ref);
1594         }
1595     }
1596
1597   civ = alloc_iv (base, step);
1598   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599   return;
1600
1601 fail:
1602   for_each_index (op_p, idx_record_use, data);
1603 }
1604
1605 /* Finds and records invariants used in STMT.  */
1606
1607 static void
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 {
1610   ssa_op_iter iter;
1611   use_operand_p use_p;
1612   tree op;
1613
1614   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615     {
1616       op = USE_FROM_PTR (use_p);
1617       record_invariant (data, op, false);
1618     }
1619 }
1620
1621 /* Finds interesting uses of induction variables in the statement STMT.  */
1622
1623 static void
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 {
1626   struct iv *iv;
1627   tree op, lhs, rhs;
1628   ssa_op_iter iter;
1629   use_operand_p use_p;
1630
1631   find_invariants_stmt (data, stmt);
1632
1633   if (TREE_CODE (stmt) == COND_EXPR)
1634     {
1635       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636       return;
1637     }
1638
1639   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1640     {
1641       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1642       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1643
1644       if (TREE_CODE (lhs) == SSA_NAME)
1645         {
1646           /* If the statement defines an induction variable, the uses are not
1647              interesting by themselves.  */
1648
1649           iv = get_iv (data, lhs);
1650
1651           if (iv && !zero_p (iv->step))
1652             return;
1653         }
1654
1655       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656         {
1657         case tcc_comparison:
1658           find_interesting_uses_cond (data, stmt,
1659                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1660           return;
1661
1662         case tcc_reference:
1663           find_interesting_uses_address (data, stmt,
1664                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1665           if (REFERENCE_CLASS_P (lhs))
1666             find_interesting_uses_address (data, stmt,
1667                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1668           return;
1669
1670         default: ;
1671         }
1672
1673       if (REFERENCE_CLASS_P (lhs)
1674           && is_gimple_val (rhs))
1675         {
1676           find_interesting_uses_address (data, stmt,
1677                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1678           find_interesting_uses_op (data, rhs);
1679           return;
1680         }
1681
1682       /* TODO -- we should also handle address uses of type
1683
1684          memory = call (whatever);
1685
1686          and
1687
1688          call (memory).  */
1689     }
1690
1691   if (TREE_CODE (stmt) == PHI_NODE
1692       && bb_for_stmt (stmt) == data->current_loop->header)
1693     {
1694       lhs = PHI_RESULT (stmt);
1695       iv = get_iv (data, lhs);
1696
1697       if (iv && !zero_p (iv->step))
1698         return;
1699     }
1700
1701   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1702     {
1703       op = USE_FROM_PTR (use_p);
1704
1705       if (TREE_CODE (op) != SSA_NAME)
1706         continue;
1707
1708       iv = get_iv (data, op);
1709       if (!iv)
1710         continue;
1711
1712       find_interesting_uses_op (data, op);
1713     }
1714 }
1715
1716 /* Finds interesting uses of induction variables outside of loops
1717    on loop exit edge EXIT.  */
1718
1719 static void
1720 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1721 {
1722   tree phi, def;
1723
1724   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1725     {
1726       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1727       find_interesting_uses_op (data, def);
1728     }
1729 }
1730
1731 /* Finds uses of the induction variables that are interesting.  */
1732
1733 static void
1734 find_interesting_uses (struct ivopts_data *data)
1735 {
1736   basic_block bb;
1737   block_stmt_iterator bsi;
1738   tree phi;
1739   basic_block *body = get_loop_body (data->current_loop);
1740   unsigned i;
1741   struct version_info *info;
1742   edge e;
1743
1744   if (dump_file && (dump_flags & TDF_DETAILS))
1745     fprintf (dump_file, "Uses:\n\n");
1746
1747   for (i = 0; i < data->current_loop->num_nodes; i++)
1748     {
1749       edge_iterator ei;
1750       bb = body[i];
1751
1752       FOR_EACH_EDGE (e, ei, bb->succs)
1753         if (e->dest != EXIT_BLOCK_PTR
1754             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1755           find_interesting_uses_outside (data, e);
1756
1757       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1758         find_interesting_uses_stmt (data, phi);
1759       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1760         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1761     }
1762
1763   if (dump_file && (dump_flags & TDF_DETAILS))
1764     {
1765       bitmap_iterator bi;
1766
1767       fprintf (dump_file, "\n");
1768
1769       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1770         {
1771           info = ver_info (data, i);
1772           if (info->inv_id)
1773             {
1774               fprintf (dump_file, "  ");
1775               print_generic_expr (dump_file, info->name, TDF_SLIM);
1776               fprintf (dump_file, " is invariant (%d)%s\n",
1777                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1778             }
1779         }
1780
1781       fprintf (dump_file, "\n");
1782     }
1783
1784   free (body);
1785 }
1786
1787 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1788    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1789    we are at the top-level of the processed address.  */
1790
1791 static tree
1792 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1793                 unsigned HOST_WIDE_INT *offset)
1794 {
1795   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1796   enum tree_code code;
1797   tree type, orig_type = TREE_TYPE (expr);
1798   unsigned HOST_WIDE_INT off0, off1, st;
1799   tree orig_expr = expr;
1800
1801   STRIP_NOPS (expr);
1802
1803   type = TREE_TYPE (expr);
1804   code = TREE_CODE (expr);
1805   *offset = 0;
1806
1807   switch (code)
1808     {
1809     case INTEGER_CST:
1810       if (!cst_and_fits_in_hwi (expr)
1811           || zero_p (expr))
1812         return orig_expr;
1813
1814       *offset = int_cst_value (expr);
1815       return build_int_cst (orig_type, 0);
1816
1817     case PLUS_EXPR:
1818     case MINUS_EXPR:
1819       op0 = TREE_OPERAND (expr, 0);
1820       op1 = TREE_OPERAND (expr, 1);
1821
1822       op0 = strip_offset_1 (op0, false, false, &off0);
1823       op1 = strip_offset_1 (op1, false, false, &off1);
1824
1825       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1826       if (op0 == TREE_OPERAND (expr, 0)
1827           && op1 == TREE_OPERAND (expr, 1))
1828         return orig_expr;
1829
1830       if (zero_p (op1))
1831         expr = op0;
1832       else if (zero_p (op0))
1833         {
1834           if (code == PLUS_EXPR)
1835             expr = op1;
1836           else
1837             expr = fold_build1 (NEGATE_EXPR, type, op1);
1838         }
1839       else
1840         expr = fold_build2 (code, type, op0, op1);
1841
1842       return fold_convert (orig_type, expr);
1843
1844     case ARRAY_REF:
1845       if (!inside_addr)
1846         return orig_expr;
1847
1848       step = array_ref_element_size (expr);
1849       if (!cst_and_fits_in_hwi (step))
1850         break;
1851
1852       st = int_cst_value (step);
1853       op1 = TREE_OPERAND (expr, 1);
1854       op1 = strip_offset_1 (op1, false, false, &off1);
1855       *offset = off1 * st;
1856
1857       if (top_compref
1858           && zero_p (op1))
1859         {
1860           /* Strip the component reference completely.  */
1861           op0 = TREE_OPERAND (expr, 0);
1862           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1863           *offset += off0;
1864           return op0;
1865         }
1866       break;
1867
1868     case COMPONENT_REF:
1869       if (!inside_addr)
1870         return orig_expr;
1871
1872       tmp = component_ref_field_offset (expr);
1873       if (top_compref
1874           && cst_and_fits_in_hwi (tmp))
1875         {
1876           /* Strip the component reference completely.  */
1877           op0 = TREE_OPERAND (expr, 0);
1878           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1879           *offset = off0 + int_cst_value (tmp);
1880           return op0;
1881         }
1882       break;
1883
1884     case ADDR_EXPR:
1885       op0 = TREE_OPERAND (expr, 0);
1886       op0 = strip_offset_1 (op0, true, true, &off0);
1887       *offset += off0;
1888
1889       if (op0 == TREE_OPERAND (expr, 0))
1890         return orig_expr;
1891
1892       expr = build_fold_addr_expr (op0);
1893       return fold_convert (orig_type, expr);
1894
1895     case INDIRECT_REF:
1896       inside_addr = false;
1897       break;
1898
1899     default:
1900       return orig_expr;
1901     }
1902
1903   /* Default handling of expressions for that we want to recurse into
1904      the first operand.  */
1905   op0 = TREE_OPERAND (expr, 0);
1906   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1907   *offset += off0;
1908
1909   if (op0 == TREE_OPERAND (expr, 0)
1910       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1911     return orig_expr;
1912
1913   expr = copy_node (expr);
1914   TREE_OPERAND (expr, 0) = op0;
1915   if (op1)
1916     TREE_OPERAND (expr, 1) = op1;
1917
1918   /* Inside address, we might strip the top level component references,
1919      thus changing type of the expression.  Handling of ADDR_EXPR
1920      will fix that.  */
1921   expr = fold_convert (orig_type, expr);
1922
1923   return expr;
1924 }
1925
1926 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1927
1928 static tree
1929 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1930 {
1931   return strip_offset_1 (expr, false, false, offset);
1932 }
1933
1934 /* Returns variant of TYPE that can be used as base for different uses.
1935    We return unsigned type with the same precision, which avoids problems
1936    with overflows.  */
1937
1938 static tree
1939 generic_type_for (tree type)
1940 {
1941   if (POINTER_TYPE_P (type))
1942     return unsigned_type_for (type);
1943
1944   if (TYPE_UNSIGNED (type))
1945     return type;
1946
1947   return unsigned_type_for (type);
1948 }
1949
1950 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1951    the bitmap to that we should store it.  */
1952
1953 static struct ivopts_data *fd_ivopts_data;
1954 static tree
1955 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1956 {
1957   bitmap *depends_on = data;
1958   struct version_info *info;
1959
1960   if (TREE_CODE (*expr_p) != SSA_NAME)
1961     return NULL_TREE;
1962   info = name_info (fd_ivopts_data, *expr_p);
1963
1964   if (!info->inv_id || info->has_nonlin_use)
1965     return NULL_TREE;
1966
1967   if (!*depends_on)
1968     *depends_on = BITMAP_ALLOC (NULL);
1969   bitmap_set_bit (*depends_on, info->inv_id);
1970
1971   return NULL_TREE;
1972 }
1973
1974 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1975    position to POS.  If USE is not NULL, the candidate is set as related to
1976    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1977    replacement of the final value of the iv by a direct computation.  */
1978
1979 static struct iv_cand *
1980 add_candidate_1 (struct ivopts_data *data,
1981                  tree base, tree step, bool important, enum iv_position pos,
1982                  struct iv_use *use, tree incremented_at)
1983 {
1984   unsigned i;
1985   struct iv_cand *cand = NULL;
1986   tree type, orig_type;
1987
1988   if (base)
1989     {
1990       orig_type = TREE_TYPE (base);
1991       type = generic_type_for (orig_type);
1992       if (type != orig_type)
1993         {
1994           base = fold_convert (type, base);
1995           if (step)
1996             step = fold_convert (type, step);
1997         }
1998     }
1999
2000   for (i = 0; i < n_iv_cands (data); i++)
2001     {
2002       cand = iv_cand (data, i);
2003
2004       if (cand->pos != pos)
2005         continue;
2006
2007       if (cand->incremented_at != incremented_at)
2008         continue;
2009
2010       if (!cand->iv)
2011         {
2012           if (!base && !step)
2013             break;
2014
2015           continue;
2016         }
2017
2018       if (!base && !step)
2019         continue;
2020
2021       if (!operand_equal_p (base, cand->iv->base, 0))
2022         continue;
2023
2024       if (zero_p (cand->iv->step))
2025         {
2026           if (zero_p (step))
2027             break;
2028         }
2029       else
2030         {
2031           if (step && operand_equal_p (step, cand->iv->step, 0))
2032             break;
2033         }
2034     }
2035
2036   if (i == n_iv_cands (data))
2037     {
2038       cand = XCNEW (struct iv_cand);
2039       cand->id = i;
2040
2041       if (!base && !step)
2042         cand->iv = NULL;
2043       else
2044         cand->iv = alloc_iv (base, step);
2045
2046       cand->pos = pos;
2047       if (pos != IP_ORIGINAL && cand->iv)
2048         {
2049           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2050           cand->var_after = cand->var_before;
2051         }
2052       cand->important = important;
2053       cand->incremented_at = incremented_at;
2054       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2055
2056       if (step
2057           && TREE_CODE (step) != INTEGER_CST)
2058         {
2059           fd_ivopts_data = data;
2060           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2061         }
2062
2063       if (dump_file && (dump_flags & TDF_DETAILS))
2064         dump_cand (dump_file, cand);
2065     }
2066
2067   if (important && !cand->important)
2068     {
2069       cand->important = true;
2070       if (dump_file && (dump_flags & TDF_DETAILS))
2071         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2072     }
2073
2074   if (use)
2075     {
2076       bitmap_set_bit (use->related_cands, i);
2077       if (dump_file && (dump_flags & TDF_DETAILS))
2078         fprintf (dump_file, "Candidate %d is related to use %d\n",
2079                  cand->id, use->id);
2080     }
2081
2082   return cand;
2083 }
2084
2085 /* Returns true if incrementing the induction variable at the end of the LOOP
2086    is allowed.
2087
2088    The purpose is to avoid splitting latch edge with a biv increment, thus
2089    creating a jump, possibly confusing other optimization passes and leaving
2090    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2091    is not available (so we do not have a better alternative), or if the latch
2092    edge is already nonempty.  */
2093
2094 static bool
2095 allow_ip_end_pos_p (struct loop *loop)
2096 {
2097   if (!ip_normal_pos (loop))
2098     return true;
2099
2100   if (!empty_block_p (ip_end_pos (loop)))
2101     return true;
2102
2103   return false;
2104 }
2105
2106 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2107    position to POS.  If USE is not NULL, the candidate is set as related to
2108    it.  The candidate computation is scheduled on all available positions.  */
2109
2110 static void
2111 add_candidate (struct ivopts_data *data,
2112                tree base, tree step, bool important, struct iv_use *use)
2113 {
2114   if (ip_normal_pos (data->current_loop))
2115     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2116   if (ip_end_pos (data->current_loop)
2117       && allow_ip_end_pos_p (data->current_loop))
2118     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2119 }
2120
2121 /* Add a standard "0 + 1 * iteration" iv candidate for a
2122    type with SIZE bits.  */
2123
2124 static void
2125 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2126                                      unsigned int size)
2127 {
2128   tree type = lang_hooks.types.type_for_size (size, true);
2129   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2130                  true, NULL);
2131 }
2132
2133 /* Adds standard iv candidates.  */
2134
2135 static void
2136 add_standard_iv_candidates (struct ivopts_data *data)
2137 {
2138   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2139
2140   /* The same for a double-integer type if it is still fast enough.  */
2141   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2142     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2143 }
2144
2145
2146 /* Adds candidates bases on the old induction variable IV.  */
2147
2148 static void
2149 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2150 {
2151   tree phi, def;
2152   struct iv_cand *cand;
2153
2154   add_candidate (data, iv->base, iv->step, true, NULL);
2155
2156   /* The same, but with initial value zero.  */
2157   add_candidate (data,
2158                  build_int_cst (TREE_TYPE (iv->base), 0),
2159                  iv->step, true, NULL);
2160
2161   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2162   if (TREE_CODE (phi) == PHI_NODE)
2163     {
2164       /* Additionally record the possibility of leaving the original iv
2165          untouched.  */
2166       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2167       cand = add_candidate_1 (data,
2168                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2169                               SSA_NAME_DEF_STMT (def));
2170       cand->var_before = iv->ssa_name;
2171       cand->var_after = def;
2172     }
2173 }
2174
2175 /* Adds candidates based on the old induction variables.  */
2176
2177 static void
2178 add_old_ivs_candidates (struct ivopts_data *data)
2179 {
2180   unsigned i;
2181   struct iv *iv;
2182   bitmap_iterator bi;
2183
2184   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2185     {
2186       iv = ver_info (data, i)->iv;
2187       if (iv && iv->biv_p && !zero_p (iv->step))
2188         add_old_iv_candidates (data, iv);
2189     }
2190 }
2191
2192 /* Adds candidates based on the value of the induction variable IV and USE.  */
2193
2194 static void
2195 add_iv_value_candidates (struct ivopts_data *data,
2196                          struct iv *iv, struct iv_use *use)
2197 {
2198   unsigned HOST_WIDE_INT offset;
2199   tree base;
2200
2201   add_candidate (data, iv->base, iv->step, false, use);
2202
2203   /* The same, but with initial value zero.  Make such variable important,
2204      since it is generic enough so that possibly many uses may be based
2205      on it.  */
2206   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2207                  iv->step, true, use);
2208
2209   /* Third, try removing the constant offset.  */
2210   base = strip_offset (iv->base, &offset);
2211   if (offset)
2212     add_candidate (data, base, iv->step, false, use);
2213 }
2214
2215 /* Adds candidates based on the uses.  */
2216
2217 static void
2218 add_derived_ivs_candidates (struct ivopts_data *data)
2219 {
2220   unsigned i;
2221
2222   for (i = 0; i < n_iv_uses (data); i++)
2223     {
2224       struct iv_use *use = iv_use (data, i);
2225
2226       if (!use)
2227         continue;
2228
2229       switch (use->type)
2230         {
2231         case USE_NONLINEAR_EXPR:
2232         case USE_COMPARE:
2233         case USE_ADDRESS:
2234           /* Just add the ivs based on the value of the iv used here.  */
2235           add_iv_value_candidates (data, use->iv, use);
2236           break;
2237
2238         default:
2239           gcc_unreachable ();
2240         }
2241     }
2242 }
2243
2244 /* Record important candidates and add them to related_cands bitmaps
2245    if needed.  */
2246
2247 static void
2248 record_important_candidates (struct ivopts_data *data)
2249 {
2250   unsigned i;
2251   struct iv_use *use;
2252
2253   for (i = 0; i < n_iv_cands (data); i++)
2254     {
2255       struct iv_cand *cand = iv_cand (data, i);
2256
2257       if (cand->important)
2258         bitmap_set_bit (data->important_candidates, i);
2259     }
2260
2261   data->consider_all_candidates = (n_iv_cands (data)
2262                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2263
2264   if (data->consider_all_candidates)
2265     {
2266       /* We will not need "related_cands" bitmaps in this case,
2267          so release them to decrease peak memory consumption.  */
2268       for (i = 0; i < n_iv_uses (data); i++)
2269         {
2270           use = iv_use (data, i);
2271           BITMAP_FREE (use->related_cands);
2272         }
2273     }
2274   else
2275     {
2276       /* Add important candidates to the related_cands bitmaps.  */
2277       for (i = 0; i < n_iv_uses (data); i++)
2278         bitmap_ior_into (iv_use (data, i)->related_cands,
2279                          data->important_candidates);
2280     }
2281 }
2282
2283 /* Finds the candidates for the induction variables.  */
2284
2285 static void
2286 find_iv_candidates (struct ivopts_data *data)
2287 {
2288   /* Add commonly used ivs.  */
2289   add_standard_iv_candidates (data);
2290
2291   /* Add old induction variables.  */
2292   add_old_ivs_candidates (data);
2293
2294   /* Add induction variables derived from uses.  */
2295   add_derived_ivs_candidates (data);
2296
2297   /* Record the important candidates.  */
2298   record_important_candidates (data);
2299 }
2300
2301 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2302    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2303    we allocate a simple list to every use.  */
2304
2305 static void
2306 alloc_use_cost_map (struct ivopts_data *data)
2307 {
2308   unsigned i, size, s, j;
2309
2310   for (i = 0; i < n_iv_uses (data); i++)
2311     {
2312       struct iv_use *use = iv_use (data, i);
2313       bitmap_iterator bi;
2314
2315       if (data->consider_all_candidates)
2316         size = n_iv_cands (data);
2317       else
2318         {
2319           s = 0;
2320           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2321             {
2322               s++;
2323             }
2324
2325           /* Round up to the power of two, so that moduling by it is fast.  */
2326           for (size = 1; size < s; size <<= 1)
2327             continue;
2328         }
2329
2330       use->n_map_members = size;
2331       use->cost_map = XCNEWVEC (struct cost_pair, size);
2332     }
2333 }
2334
2335 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2336    on invariants DEPENDS_ON and that the value used in expressing it
2337    is VALUE.*/
2338
2339 static void
2340 set_use_iv_cost (struct ivopts_data *data,
2341                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2342                  bitmap depends_on, tree value)
2343 {
2344   unsigned i, s;
2345
2346   if (cost == INFTY)
2347     {
2348       BITMAP_FREE (depends_on);
2349       return;
2350     }
2351
2352   if (data->consider_all_candidates)
2353     {
2354       use->cost_map[cand->id].cand = cand;
2355       use->cost_map[cand->id].cost = cost;
2356       use->cost_map[cand->id].depends_on = depends_on;
2357       use->cost_map[cand->id].value = value;
2358       return;
2359     }
2360
2361   /* n_map_members is a power of two, so this computes modulo.  */
2362   s = cand->id & (use->n_map_members - 1);
2363   for (i = s; i < use->n_map_members; i++)
2364     if (!use->cost_map[i].cand)
2365       goto found;
2366   for (i = 0; i < s; i++)
2367     if (!use->cost_map[i].cand)
2368       goto found;
2369
2370   gcc_unreachable ();
2371
2372 found:
2373   use->cost_map[i].cand = cand;
2374   use->cost_map[i].cost = cost;
2375   use->cost_map[i].depends_on = depends_on;
2376   use->cost_map[i].value = value;
2377 }
2378
2379 /* Gets cost of (USE, CANDIDATE) pair.  */
2380
2381 static struct cost_pair *
2382 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2383                  struct iv_cand *cand)
2384 {
2385   unsigned i, s;
2386   struct cost_pair *ret;
2387
2388   if (!cand)
2389     return NULL;
2390
2391   if (data->consider_all_candidates)
2392     {
2393       ret = use->cost_map + cand->id;
2394       if (!ret->cand)
2395         return NULL;
2396
2397       return ret;
2398     }
2399
2400   /* n_map_members is a power of two, so this computes modulo.  */
2401   s = cand->id & (use->n_map_members - 1);
2402   for (i = s; i < use->n_map_members; i++)
2403     if (use->cost_map[i].cand == cand)
2404       return use->cost_map + i;
2405
2406   for (i = 0; i < s; i++)
2407     if (use->cost_map[i].cand == cand)
2408       return use->cost_map + i;
2409
2410   return NULL;
2411 }
2412
2413 /* Returns estimate on cost of computing SEQ.  */
2414
2415 static unsigned
2416 seq_cost (rtx seq)
2417 {
2418   unsigned cost = 0;
2419   rtx set;
2420
2421   for (; seq; seq = NEXT_INSN (seq))
2422     {
2423       set = single_set (seq);
2424       if (set)
2425         cost += rtx_cost (set, SET);
2426       else
2427         cost++;
2428     }
2429
2430   return cost;
2431 }
2432
2433 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2434 static rtx
2435 produce_memory_decl_rtl (tree obj, int *regno)
2436 {
2437   rtx x;
2438
2439   gcc_assert (obj);
2440   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2441     {
2442       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2443       x = gen_rtx_SYMBOL_REF (Pmode, name);
2444     }
2445   else
2446     x = gen_raw_REG (Pmode, (*regno)++);
2447
2448   return gen_rtx_MEM (DECL_MODE (obj), x);
2449 }
2450
2451 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2452    walk_tree.  DATA contains the actual fake register number.  */
2453
2454 static tree
2455 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2456 {
2457   tree obj = NULL_TREE;
2458   rtx x = NULL_RTX;
2459   int *regno = data;
2460
2461   switch (TREE_CODE (*expr_p))
2462     {
2463     case ADDR_EXPR:
2464       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2465            handled_component_p (*expr_p);
2466            expr_p = &TREE_OPERAND (*expr_p, 0))
2467         continue;
2468       obj = *expr_p;
2469       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2470         x = produce_memory_decl_rtl (obj, regno);
2471       break;
2472
2473     case SSA_NAME:
2474       *ws = 0;
2475       obj = SSA_NAME_VAR (*expr_p);
2476       if (!DECL_RTL_SET_P (obj))
2477         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2478       break;
2479
2480     case VAR_DECL:
2481     case PARM_DECL:
2482     case RESULT_DECL:
2483       *ws = 0;
2484       obj = *expr_p;
2485
2486       if (DECL_RTL_SET_P (obj))
2487         break;
2488
2489       if (DECL_MODE (obj) == BLKmode)
2490         x = produce_memory_decl_rtl (obj, regno);
2491       else
2492         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2493
2494       break;
2495
2496     default:
2497       break;
2498     }
2499
2500   if (x)
2501     {
2502       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2503       SET_DECL_RTL (obj, x);
2504     }
2505
2506   return NULL_TREE;
2507 }
2508
2509 /* Determines cost of the computation of EXPR.  */
2510
2511 static unsigned
2512 computation_cost (tree expr)
2513 {
2514   rtx seq, rslt;
2515   tree type = TREE_TYPE (expr);
2516   unsigned cost;
2517   /* Avoid using hard regs in ways which may be unsupported.  */
2518   int regno = LAST_VIRTUAL_REGISTER + 1;
2519
2520   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2521   start_sequence ();
2522   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2523   seq = get_insns ();
2524   end_sequence ();
2525
2526   cost = seq_cost (seq);
2527   if (MEM_P (rslt))
2528     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2529
2530   return cost;
2531 }
2532
2533 /* Returns variable containing the value of candidate CAND at statement AT.  */
2534
2535 static tree
2536 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2537 {
2538   if (stmt_after_increment (loop, cand, stmt))
2539     return cand->var_after;
2540   else
2541     return cand->var_before;
2542 }
2543
2544 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2545    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2546
2547 int
2548 tree_int_cst_sign_bit (tree t)
2549 {
2550   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2551   unsigned HOST_WIDE_INT w;
2552
2553   if (bitno < HOST_BITS_PER_WIDE_INT)
2554     w = TREE_INT_CST_LOW (t);
2555   else
2556     {
2557       w = TREE_INT_CST_HIGH (t);
2558       bitno -= HOST_BITS_PER_WIDE_INT;
2559     }
2560
2561   return (w >> bitno) & 1;
2562 }
2563
2564 /* If we can prove that TOP = cst * BOT for some constant cst,
2565    store cst to MUL and return true.  Otherwise return false.
2566    The returned value is always sign-extended, regardless of the
2567    signedness of TOP and BOT.  */
2568
2569 static bool
2570 constant_multiple_of (tree top, tree bot, double_int *mul)
2571 {
2572   tree mby;
2573   enum tree_code code;
2574   double_int res, p0, p1;
2575   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2576
2577   STRIP_NOPS (top);
2578   STRIP_NOPS (bot);
2579
2580   if (operand_equal_p (top, bot, 0))
2581     {
2582       *mul = double_int_one;
2583       return true;
2584     }
2585
2586   code = TREE_CODE (top);
2587   switch (code)
2588     {
2589     case MULT_EXPR:
2590       mby = TREE_OPERAND (top, 1);
2591       if (TREE_CODE (mby) != INTEGER_CST)
2592         return false;
2593
2594       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2595         return false;
2596
2597       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2598                               precision);
2599       return true;
2600
2601     case PLUS_EXPR:
2602     case MINUS_EXPR:
2603       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2604           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2605         return false;
2606
2607       if (code == MINUS_EXPR)
2608         p1 = double_int_neg (p1);
2609       *mul = double_int_sext (double_int_add (p0, p1), precision);
2610       return true;
2611
2612     case INTEGER_CST:
2613       if (TREE_CODE (bot) != INTEGER_CST)
2614         return false;
2615
2616       p0 = double_int_sext (tree_to_double_int (bot), precision);
2617       p1 = double_int_sext (tree_to_double_int (top), precision);
2618       if (double_int_zero_p (p1))
2619         return false;
2620       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2621                               precision);
2622       return double_int_zero_p (res);
2623
2624     default:
2625       return false;
2626     }
2627 }
2628
2629 /* Sets COMB to CST.  */
2630
2631 static void
2632 aff_combination_const (struct affine_tree_combination *comb, tree type,
2633                        unsigned HOST_WIDE_INT cst)
2634 {
2635   unsigned prec = TYPE_PRECISION (type);
2636
2637   comb->type = type;
2638   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2639
2640   comb->n = 0;
2641   comb->rest = NULL_TREE;
2642   comb->offset = cst & comb->mask;
2643 }
2644
2645 /* Sets COMB to single element ELT.  */
2646
2647 static void
2648 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2649 {
2650   unsigned prec = TYPE_PRECISION (type);
2651
2652   comb->type = type;
2653   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2654
2655   comb->n = 1;
2656   comb->elts[0] = elt;
2657   comb->coefs[0] = 1;
2658   comb->rest = NULL_TREE;
2659   comb->offset = 0;
2660 }
2661
2662 /* Scales COMB by SCALE.  */
2663
2664 static void
2665 aff_combination_scale (struct affine_tree_combination *comb,
2666                        unsigned HOST_WIDE_INT scale)
2667 {
2668   unsigned i, j;
2669
2670   if (scale == 1)
2671     return;
2672
2673   if (scale == 0)
2674     {
2675       aff_combination_const (comb, comb->type, 0);
2676       return;
2677     }
2678
2679   comb->offset = (scale * comb->offset) & comb->mask;
2680   for (i = 0, j = 0; i < comb->n; i++)
2681     {
2682       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2683       comb->elts[j] = comb->elts[i];
2684       if (comb->coefs[j] != 0)
2685         j++;
2686     }
2687   comb->n = j;
2688
2689   if (comb->rest)
2690     {
2691       if (comb->n < MAX_AFF_ELTS)
2692         {
2693           comb->coefs[comb->n] = scale;
2694           comb->elts[comb->n] = comb->rest;
2695           comb->rest = NULL_TREE;
2696           comb->n++;
2697         }
2698       else
2699         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2700                                   build_int_cst_type (comb->type, scale));
2701     }
2702 }
2703
2704 /* Adds ELT * SCALE to COMB.  */
2705
2706 static void
2707 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2708                          unsigned HOST_WIDE_INT scale)
2709 {
2710   unsigned i;
2711
2712   if (scale == 0)
2713     return;
2714
2715   for (i = 0; i < comb->n; i++)
2716     if (operand_equal_p (comb->elts[i], elt, 0))
2717       {
2718         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2719         if (comb->coefs[i])
2720           return;
2721
2722         comb->n--;
2723         comb->coefs[i] = comb->coefs[comb->n];
2724         comb->elts[i] = comb->elts[comb->n];
2725
2726         if (comb->rest)
2727           {
2728             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2729             comb->coefs[comb->n] = 1;
2730             comb->elts[comb->n] = comb->rest;
2731             comb->rest = NULL_TREE;
2732             comb->n++;
2733           }
2734         return;
2735       }
2736   if (comb->n < MAX_AFF_ELTS)
2737     {
2738       comb->coefs[comb->n] = scale;
2739       comb->elts[comb->n] = elt;
2740       comb->n++;
2741       return;
2742     }
2743
2744   if (scale == 1)
2745     elt = fold_convert (comb->type, elt);
2746   else
2747     elt = fold_build2 (MULT_EXPR, comb->type,
2748                        fold_convert (comb->type, elt),
2749                        build_int_cst_type (comb->type, scale));
2750
2751   if (comb->rest)
2752     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2753   else
2754     comb->rest = elt;
2755 }
2756
2757 /* Adds COMB2 to COMB1.  */
2758
2759 static void
2760 aff_combination_add (struct affine_tree_combination *comb1,
2761                      struct affine_tree_combination *comb2)
2762 {
2763   unsigned i;
2764
2765   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2766   for (i = 0; i < comb2->n; i++)
2767     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2768   if (comb2->rest)
2769     aff_combination_add_elt (comb1, comb2->rest, 1);
2770 }
2771
2772 /* Convert COMB to TYPE.  */
2773
2774 static void
2775 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2776 {
2777   unsigned prec = TYPE_PRECISION (type);
2778   unsigned i;
2779
2780   /* If the precision of both types is the same, it suffices to change the type
2781      of the whole combination -- the elements are allowed to have another type
2782      equivalent wrto STRIP_NOPS.  */
2783   if (prec == TYPE_PRECISION (comb->type))
2784     {
2785       comb->type = type;
2786       return;
2787     }
2788
2789   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2790   comb->offset = comb->offset & comb->mask;
2791
2792   /* The type of the elements can be different from comb->type only as
2793      much as what STRIP_NOPS would remove.  We can just directly cast
2794      to TYPE.  */
2795   for (i = 0; i < comb->n; i++)
2796     comb->elts[i] = fold_convert (type, comb->elts[i]);
2797   if (comb->rest)
2798     comb->rest = fold_convert (type, comb->rest);
2799
2800   comb->type = type;
2801 }
2802
2803 /* Splits EXPR into an affine combination of parts.  */
2804
2805 static void
2806 tree_to_aff_combination (tree expr, tree type,
2807                          struct affine_tree_combination *comb)
2808 {
2809   struct affine_tree_combination tmp;
2810   enum tree_code code;
2811   tree cst, core, toffset;
2812   HOST_WIDE_INT bitpos, bitsize;
2813   enum machine_mode mode;
2814   int unsignedp, volatilep;
2815
2816   STRIP_NOPS (expr);
2817
2818   code = TREE_CODE (expr);
2819   switch (code)
2820     {
2821     case INTEGER_CST:
2822       aff_combination_const (comb, type, int_cst_value (expr));
2823       return;
2824
2825     case PLUS_EXPR:
2826     case MINUS_EXPR:
2827       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2828       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2829       if (code == MINUS_EXPR)
2830         aff_combination_scale (&tmp, -1);
2831       aff_combination_add (comb, &tmp);
2832       return;
2833
2834     case MULT_EXPR:
2835       cst = TREE_OPERAND (expr, 1);
2836       if (TREE_CODE (cst) != INTEGER_CST)
2837         break;
2838       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2839       aff_combination_scale (comb, int_cst_value (cst));
2840       return;
2841
2842     case NEGATE_EXPR:
2843       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2844       aff_combination_scale (comb, -1);
2845       return;
2846
2847     case ADDR_EXPR:
2848       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2849                                   &toffset, &mode, &unsignedp, &volatilep,
2850                                   false);
2851       if (bitpos % BITS_PER_UNIT != 0)
2852         break;
2853       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2854       core = build_fold_addr_expr (core);
2855       if (TREE_CODE (core) == ADDR_EXPR)
2856         aff_combination_add_elt (comb, core, 1);
2857       else
2858         {
2859           tree_to_aff_combination (core, type, &tmp);
2860           aff_combination_add (comb, &tmp);
2861         }
2862       if (toffset)
2863         {
2864           tree_to_aff_combination (toffset, type, &tmp);
2865           aff_combination_add (comb, &tmp);
2866         }
2867       return;
2868
2869     default:
2870       break;
2871     }
2872
2873   aff_combination_elt (comb, type, expr);
2874 }
2875
2876 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2877
2878 static tree
2879 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2880                  unsigned HOST_WIDE_INT mask)
2881 {
2882   enum tree_code code;
2883
2884   scale &= mask;
2885   elt = fold_convert (type, elt);
2886
2887   if (scale == 1)
2888     {
2889       if (!expr)
2890         return elt;
2891
2892       return fold_build2 (PLUS_EXPR, type, expr, elt);
2893     }
2894
2895   if (scale == mask)
2896     {
2897       if (!expr)
2898         return fold_build1 (NEGATE_EXPR, type, elt);
2899
2900       return fold_build2 (MINUS_EXPR, type, expr, elt);
2901     }
2902
2903   if (!expr)
2904     return fold_build2 (MULT_EXPR, type, elt,
2905                         build_int_cst_type (type, scale));
2906
2907   if ((scale | (mask >> 1)) == mask)
2908     {
2909       /* Scale is negative.  */
2910       code = MINUS_EXPR;
2911       scale = (-scale) & mask;
2912     }
2913   else
2914     code = PLUS_EXPR;
2915
2916   elt = fold_build2 (MULT_EXPR, type, elt,
2917                      build_int_cst_type (type, scale));
2918   return fold_build2 (code, type, expr, elt);
2919 }
2920
2921 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2922
2923 static void
2924 unshare_aff_combination (struct affine_tree_combination *comb)
2925 {
2926   unsigned i;
2927
2928   for (i = 0; i < comb->n; i++)
2929     comb->elts[i] = unshare_expr (comb->elts[i]);
2930   if (comb->rest)
2931     comb->rest = unshare_expr (comb->rest);
2932 }
2933
2934 /* Makes tree from the affine combination COMB.  */
2935
2936 static tree
2937 aff_combination_to_tree (struct affine_tree_combination *comb)
2938 {
2939   tree type = comb->type;
2940   tree expr = comb->rest;
2941   unsigned i;
2942   unsigned HOST_WIDE_INT off, sgn;
2943
2944   if (comb->n == 0 && comb->offset == 0)
2945     {
2946       if (expr)
2947         {
2948           /* Handle the special case produced by get_computation_aff when
2949              the type does not fit in HOST_WIDE_INT.  */
2950           return fold_convert (type, expr);
2951         }
2952       else
2953         return build_int_cst (type, 0);
2954     }
2955
2956   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2957
2958   for (i = 0; i < comb->n; i++)
2959     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2960                             comb->mask);
2961
2962   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2963     {
2964       /* Offset is negative.  */
2965       off = (-comb->offset) & comb->mask;
2966       sgn = comb->mask;
2967     }
2968   else
2969     {
2970       off = comb->offset;
2971       sgn = 1;
2972     }
2973   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2974                           comb->mask);
2975 }
2976
2977 /* Folds EXPR using the affine expressions framework.  */
2978
2979 static tree
2980 fold_affine_expr (tree expr)
2981 {
2982   tree type = TREE_TYPE (expr);
2983   struct affine_tree_combination comb;
2984
2985   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2986     return expr;
2987
2988   tree_to_aff_combination (expr, type, &comb);
2989   return aff_combination_to_tree (&comb);
2990 }
2991
2992 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2993    same precision that is at least as wide as the precision of TYPE, stores
2994    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2995    type of A and B.  */
2996
2997 static tree
2998 determine_common_wider_type (tree *a, tree *b)
2999 {
3000   tree wider_type = NULL;
3001   tree suba, subb;
3002   tree atype = TREE_TYPE (*a);
3003
3004   if ((TREE_CODE (*a) == NOP_EXPR
3005        || TREE_CODE (*a) == CONVERT_EXPR))
3006     {
3007       suba = TREE_OPERAND (*a, 0);
3008       wider_type = TREE_TYPE (suba);
3009       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3010         return atype;
3011     }
3012   else
3013     return atype;
3014
3015   if ((TREE_CODE (*b) == NOP_EXPR
3016        || TREE_CODE (*b) == CONVERT_EXPR))
3017     {
3018       subb = TREE_OPERAND (*b, 0);
3019       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3020         return atype;
3021     }
3022   else
3023     return atype;
3024
3025   *a = suba;
3026   *b = subb;
3027   return wider_type;
3028 }
3029
3030 /* Determines the expression by that USE is expressed from induction variable
3031    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3032    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3033
3034 static bool
3035 get_computation_aff (struct loop *loop,
3036                      struct iv_use *use, struct iv_cand *cand, tree at,
3037                      struct affine_tree_combination *aff)
3038 {
3039   tree ubase = use->iv->base;
3040   tree ustep = use->iv->step;
3041   tree cbase = cand->iv->base;
3042   tree cstep = cand->iv->step;
3043   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3044   tree common_type;
3045   tree uutype;
3046   tree expr, delta;
3047   tree ratio;
3048   unsigned HOST_WIDE_INT ustepi, cstepi;
3049   HOST_WIDE_INT ratioi;
3050   struct affine_tree_combination cbase_aff, expr_aff;
3051   tree cstep_orig = cstep, ustep_orig = ustep;
3052   double_int rat;
3053
3054   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3055     {
3056       /* We do not have a precision to express the values of use.  */
3057       return false;
3058     }
3059
3060   expr = var_at_stmt (loop, cand, at);
3061
3062   if (TREE_TYPE (expr) != ctype)
3063     {
3064       /* This may happen with the original ivs.  */
3065       expr = fold_convert (ctype, expr);
3066     }
3067
3068   if (TYPE_UNSIGNED (utype))
3069     uutype = utype;
3070   else
3071     {
3072       uutype = unsigned_type_for (utype);
3073       ubase = fold_convert (uutype, ubase);
3074       ustep = fold_convert (uutype, ustep);
3075     }
3076
3077   if (uutype != ctype)
3078     {
3079       expr = fold_convert (uutype, expr);
3080       cbase = fold_convert (uutype, cbase);
3081       cstep = fold_convert (uutype, cstep);
3082
3083       /* If the conversion is not noop, we must take it into account when
3084          considering the value of the step.  */
3085       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3086         cstep_orig = cstep;
3087     }
3088
3089   if (cst_and_fits_in_hwi (cstep_orig)
3090       && cst_and_fits_in_hwi (ustep_orig))
3091     {
3092       ustepi = int_cst_value (ustep_orig);
3093       cstepi = int_cst_value (cstep_orig);
3094
3095       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3096         {
3097           /* TODO maybe consider case when ustep divides cstep and the ratio is
3098              a power of 2 (so that the division is fast to execute)?  We would
3099              need to be much more careful with overflows etc. then.  */
3100           return false;
3101         }
3102
3103       ratio = build_int_cst_type (uutype, ratioi);
3104     }
3105   else
3106     {
3107       if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3108         return false;
3109       ratio = double_int_to_tree (uutype, rat);
3110
3111       /* Ratioi is only used to detect special cases when the multiplicative
3112          factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3113          set it to 0.  */
3114       if (double_int_fits_in_shwi_p (rat))
3115         ratioi = double_int_to_shwi (rat);
3116       else
3117         ratioi = 0;
3118     }
3119
3120   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3121      type, we achieve better folding by computing their difference in this
3122      wider type, and cast the result to UUTYPE.  We do not need to worry about
3123      overflows, as all the arithmetics will in the end be performed in UUTYPE
3124      anyway.  */
3125   common_type = determine_common_wider_type (&ubase, &cbase);
3126
3127   /* We may need to shift the value if we are after the increment.  */
3128   if (stmt_after_increment (loop, cand, at))
3129     {
3130       if (uutype != common_type)
3131         cstep = fold_convert (common_type, cstep);
3132       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3133     }
3134
3135   /* use = ubase - ratio * cbase + ratio * var.
3136
3137      In general case ubase + ratio * (var - cbase) could be better (one less
3138      multiplication), but often it is possible to eliminate redundant parts
3139      of computations from (ubase - ratio * cbase) term, and if it does not
3140      happen, fold is able to apply the distributive law to obtain this form
3141      anyway.  */
3142
3143   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3144     {
3145       /* Let's compute in trees and just return the result in AFF.  This case
3146          should not be very common, and fold itself is not that bad either,
3147          so making the aff. functions more complicated to handle this case
3148          is not that urgent.  */
3149       if (ratioi == 1)
3150         {
3151           delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3152           if (uutype != common_type)
3153             delta = fold_convert (uutype, delta);
3154           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3155         }
3156       else if (ratioi == -1)
3157         {
3158           delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3159           if (uutype != common_type)
3160             delta = fold_convert (uutype, delta);
3161           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3162         }
3163       else
3164         {
3165           delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3166           delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3167           if (uutype != common_type)
3168             delta = fold_convert (uutype, delta);
3169           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3170           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3171         }
3172
3173       aff->type = uutype;
3174       aff->n = 0;
3175       aff->offset = 0;
3176       aff->mask = 0;
3177       aff->rest = expr;
3178       return true;
3179     }
3180
3181   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3182      possible to compute ratioi.  */
3183   gcc_assert (ratioi);
3184
3185   tree_to_aff_combination (ubase, common_type, aff);
3186   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3187   tree_to_aff_combination (expr, uutype, &expr_aff);
3188   aff_combination_scale (&cbase_aff, -ratioi);
3189   aff_combination_scale (&expr_aff, ratioi);
3190   aff_combination_add (aff, &cbase_aff);
3191   if (common_type != uutype)
3192     aff_combination_convert (uutype, aff);
3193   aff_combination_add (aff, &expr_aff);
3194
3195   return true;
3196 }
3197
3198 /* Determines the expression by that USE is expressed from induction variable
3199    CAND at statement AT in LOOP.  The computation is unshared.  */
3200
3201 static tree
3202 get_computation_at (struct loop *loop,
3203                     struct iv_use *use, struct iv_cand *cand, tree at)
3204 {
3205   struct affine_tree_combination aff;
3206   tree type = TREE_TYPE (use->iv->base);
3207
3208   if (!get_computation_aff (loop, use, cand, at, &aff))
3209     return NULL_TREE;
3210   unshare_aff_combination (&aff);
3211   return fold_convert (type, aff_combination_to_tree (&aff));
3212 }
3213
3214 /* Determines the expression by that USE is expressed from induction variable
3215    CAND in LOOP.  The computation is unshared.  */
3216
3217 static tree
3218 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3219 {
3220   return get_computation_at (loop, use, cand, use->stmt);
3221 }
3222
3223 /* Returns cost of addition in MODE.  */
3224
3225 static unsigned
3226 add_cost (enum machine_mode mode)
3227 {
3228   static unsigned costs[NUM_MACHINE_MODES];
3229   rtx seq;
3230   unsigned cost;
3231
3232   if (costs[mode])
3233     return costs[mode];
3234
3235   start_sequence ();
3236   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3237                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3238                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3239                  NULL_RTX);
3240   seq = get_insns ();
3241   end_sequence ();
3242
3243   cost = seq_cost (seq);
3244   if (!cost)
3245     cost = 1;
3246
3247   costs[mode] = cost;
3248
3249   if (dump_file && (dump_flags & TDF_DETAILS))
3250     fprintf (dump_file, "Addition in %s costs %d\n",
3251              GET_MODE_NAME (mode), cost);
3252   return cost;
3253 }
3254
3255 /* Entry in a hashtable of already known costs for multiplication.  */
3256 struct mbc_entry
3257 {
3258   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3259   enum machine_mode mode;       /* In mode.  */
3260   unsigned cost;                /* The cost.  */
3261 };
3262
3263 /* Counts hash value for the ENTRY.  */
3264
3265 static hashval_t
3266 mbc_entry_hash (const void *entry)
3267 {
3268   const struct mbc_entry *e = entry;
3269
3270   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3271 }
3272
3273 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3274
3275 static int
3276 mbc_entry_eq (const void *entry1, const void *entry2)
3277 {
3278   const struct mbc_entry *e1 = entry1;
3279   const struct mbc_entry *e2 = entry2;
3280
3281   return (e1->mode == e2->mode
3282           && e1->cst == e2->cst);
3283 }
3284
3285 /* Returns cost of multiplication by constant CST in MODE.  */
3286
3287 unsigned
3288 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3289 {
3290   static htab_t costs;
3291   struct mbc_entry **cached, act;
3292   rtx seq;
3293   unsigned cost;
3294
3295   if (!costs)
3296     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3297
3298   act.mode = mode;
3299   act.cst = cst;
3300   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3301   if (*cached)
3302     return (*cached)->cost;
3303
3304   *cached = XNEW (struct mbc_entry);
3305   (*cached)->mode = mode;
3306   (*cached)->cst = cst;
3307
3308   start_sequence ();
3309   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3310                gen_int_mode (cst, mode), NULL_RTX, 0);
3311   seq = get_insns ();
3312   end_sequence ();
3313
3314   cost = seq_cost (seq);
3315
3316   if (dump_file && (dump_flags & TDF_DETAILS))
3317     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3318              (int) cst, GET_MODE_NAME (mode), cost);
3319
3320   (*cached)->cost = cost;
3321
3322   return cost;
3323 }
3324
3325 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3326    validity for a memory reference accessing memory of mode MODE.  */
3327
3328 bool
3329 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
3330 {
3331 #define MAX_RATIO 128
3332   static sbitmap valid_mult[MAX_MACHINE_MODE];
3333
3334   if (!valid_mult[mode])
3335     {
3336       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3337       rtx addr;
3338       HOST_WIDE_INT i;
3339
3340       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
3341       sbitmap_zero (valid_mult[mode]);
3342       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3343       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3344         {
3345           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3346           if (memory_address_p (mode, addr))
3347             SET_BIT (valid_mult[mode], i + MAX_RATIO);
3348         }
3349
3350       if (dump_file && (dump_flags & TDF_DETAILS))
3351         {
3352           fprintf (dump_file, "  allowed multipliers:");
3353           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3354             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
3355               fprintf (dump_file, " %d", (int) i);
3356           fprintf (dump_file, "\n");
3357           fprintf (dump_file, "\n");
3358         }
3359     }
3360
3361   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3362     return false;
3363
3364   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3365 }
3366
3367 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3368    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3369    variable is omitted.  Compute the cost for a memory reference that accesses
3370    a memory location of mode MEM_MODE.
3371
3372    TODO -- there must be some better way.  This all is quite crude.  */
3373
3374 static unsigned
3375 get_address_cost (bool symbol_present, bool var_present,
3376                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3377                   enum machine_mode mem_mode)
3378 {
3379   static bool initialized[MAX_MACHINE_MODE];
3380   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3381   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3382   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3383   unsigned cost, acost;
3384   bool offset_p, ratio_p;
3385   HOST_WIDE_INT s_offset;
3386   unsigned HOST_WIDE_INT mask;
3387   unsigned bits;
3388
3389   if (!initialized[mem_mode])
3390     {
3391       HOST_WIDE_INT i;
3392       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3393       int old_cse_not_expected;
3394       unsigned sym_p, var_p, off_p, rat_p, add_c;
3395       rtx seq, addr, base;
3396       rtx reg0, reg1;
3397
3398       initialized[mem_mode] = true;
3399
3400       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3401
3402       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3403       for (i = start; i <= 1 << 20; i <<= 1)
3404         {
3405           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3406           if (!memory_address_p (mem_mode, addr))
3407             break;
3408         }
3409       max_offset[mem_mode] = i == start ? 0 : i >> 1;
3410       off[mem_mode] = max_offset[mem_mode];
3411
3412       for (i = start; i <= 1 << 20; i <<= 1)
3413         {
3414           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3415           if (!memory_address_p (mem_mode, addr))
3416             break;
3417         }
3418       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3419
3420       if (dump_file && (dump_flags & TDF_DETAILS))
3421         {
3422           fprintf (dump_file, "get_address_cost:\n");
3423           fprintf (dump_file, "  min offset %s %d\n",
3424                    GET_MODE_NAME (mem_mode),
3425                    (int) min_offset[mem_mode]);
3426           fprintf (dump_file, "  max offset %s %d\n",
3427                    GET_MODE_NAME (mem_mode),
3428                    (int) max_offset[mem_mode]);
3429         }
3430
3431       rat[mem_mode] = 1;
3432       for (i = 2; i <= MAX_RATIO; i++)
3433         if (multiplier_allowed_in_address_p (i, mem_mode))
3434           {
3435             rat[mem_mode] = i;
3436             break;
3437           }
3438
3439       /* Compute the cost of various addressing modes.  */
3440       acost = 0;
3441       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3442       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3443
3444       for (i = 0; i < 16; i++)
3445         {
3446           sym_p = i & 1;
3447           var_p = (i >> 1) & 1;
3448           off_p = (i >> 2) & 1;
3449           rat_p = (i >> 3) & 1;
3450
3451           addr = reg0;
3452           if (rat_p)
3453             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3454                                    gen_int_mode (rat[mem_mode], Pmode));
3455
3456           if (var_p)
3457             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3458
3459           if (sym_p)
3460             {
3461               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3462               if (off_p)
3463                 base = gen_rtx_fmt_e (CONST, Pmode,
3464                                       gen_rtx_fmt_ee (PLUS, Pmode,
3465                                                       base,
3466                                                       gen_int_mode (off[mem_mode],
3467                                                                     Pmode)));
3468             }
3469           else if (off_p)
3470             base = gen_int_mode (off[mem_mode], Pmode);
3471           else
3472             base = NULL_RTX;
3473
3474           if (base)
3475             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3476
3477           start_sequence ();
3478           /* To avoid splitting addressing modes, pretend that no cse will
3479              follow.  */
3480           old_cse_not_expected = cse_not_expected;
3481           cse_not_expected = true;
3482           addr = memory_address (mem_mode, addr);
3483           cse_not_expected = old_cse_not_expected;
3484           seq = get_insns ();
3485           end_sequence ();
3486
3487           acost = seq_cost (seq);
3488           acost += address_cost (addr, mem_mode);
3489
3490           if (!acost)
3491             acost = 1;
3492           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3493         }
3494
3495       /* On some targets, it is quite expensive to load symbol to a register,
3496          which makes addresses that contain symbols look much more expensive.
3497          However, the symbol will have to be loaded in any case before the
3498          loop (and quite likely we have it in register already), so it does not
3499          make much sense to penalize them too heavily.  So make some final
3500          tweaks for the SYMBOL_PRESENT modes:
3501
3502          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3503          var is cheaper, use this mode with small penalty.
3504          If VAR_PRESENT is true, try whether the mode with
3505          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3506          if this is the case, use it.  */
3507       add_c = add_cost (Pmode);
3508       for (i = 0; i < 8; i++)
3509         {
3510           var_p = i & 1;
3511           off_p = (i >> 1) & 1;
3512           rat_p = (i >> 2) & 1;
3513
3514           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3515           if (var_p)
3516             acost += add_c;
3517
3518           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3519             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3520         }
3521
3522       if (dump_file && (dump_flags & TDF_DETAILS))
3523         {
3524           fprintf (dump_file, "Address costs:\n");
3525
3526           for (i = 0; i < 16; i++)
3527             {
3528               sym_p = i & 1;
3529               var_p = (i >> 1) & 1;
3530               off_p = (i >> 2) & 1;
3531               rat_p = (i >> 3) & 1;
3532
3533               fprintf (dump_file, "  ");
3534               if (sym_p)
3535                 fprintf (dump_file, "sym + ");
3536               if (var_p)
3537                 fprintf (dump_file, "var + ");
3538               if (off_p)
3539                 fprintf (dump_file, "cst + ");
3540               if (rat_p)
3541                 fprintf (dump_file, "rat * ");
3542
3543               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3544               fprintf (dump_file, "index costs %d\n", acost);
3545             }
3546           fprintf (dump_file, "\n");
3547         }
3548     }
3549
3550   bits = GET_MODE_BITSIZE (Pmode);
3551   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3552   offset &= mask;
3553   if ((offset >> (bits - 1) & 1))
3554     offset |= ~mask;
3555   s_offset = offset;
3556
3557   cost = 0;
3558   offset_p = (s_offset != 0
3559               && min_offset[mem_mode] <= s_offset
3560               && s_offset <= max_offset[mem_mode]);
3561   ratio_p = (ratio != 1
3562              && multiplier_allowed_in_address_p (ratio, mem_mode));
3563
3564   if (ratio != 1 && !ratio_p)
3565     cost += multiply_by_cost (ratio, Pmode);
3566
3567   if (s_offset && !offset_p && !symbol_present)
3568     cost += add_cost (Pmode);
3569
3570   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3571   return cost + acost;
3572 }
3573
3574 /* Estimates cost of forcing expression EXPR into a variable.  */
3575
3576 unsigned
3577 force_expr_to_var_cost (tree expr)
3578 {
3579   static bool costs_initialized = false;
3580   static unsigned integer_cost;
3581   static unsigned symbol_cost;
3582   static unsigned address_cost;
3583   tree op0, op1;
3584   unsigned cost0, cost1, cost;
3585   enum machine_mode mode;
3586
3587   if (!costs_initialized)
3588     {
3589       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3590       rtx x = gen_rtx_MEM (DECL_MODE (var),
3591                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3592       tree addr;
3593       tree type = build_pointer_type (integer_type_node);
3594
3595       integer_cost = computation_cost (build_int_cst (integer_type_node,
3596                                                       2000));
3597
3598       SET_DECL_RTL (var, x);
3599       TREE_STATIC (var) = 1;
3600       addr = build1 (ADDR_EXPR, type, var);
3601       symbol_cost = computation_cost (addr) + 1;
3602
3603       address_cost
3604         = computation_cost (build2 (PLUS_EXPR, type,
3605                                     addr,
3606                                     build_int_cst (type, 2000))) + 1;
3607       if (dump_file && (dump_flags & TDF_DETAILS))
3608         {
3609           fprintf (dump_file, "force_expr_to_var_cost:\n");
3610           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3611           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3612           fprintf (dump_file, "  address %d\n", (int) address_cost);
3613           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3614           fprintf (dump_file, "\n");
3615         }
3616
3617       costs_initialized = true;
3618     }
3619
3620   STRIP_NOPS (expr);
3621
3622   if (SSA_VAR_P (expr))
3623     return 0;
3624
3625   if (TREE_INVARIANT (expr))
3626     {
3627       if (TREE_CODE (expr) == INTEGER_CST)
3628         return integer_cost;
3629
3630       if (TREE_CODE (expr) == ADDR_EXPR)
3631         {
3632           tree obj = TREE_OPERAND (expr, 0);
3633
3634           if (TREE_CODE (obj) == VAR_DECL
3635               || TREE_CODE (obj) == PARM_DECL
3636               || TREE_CODE (obj) == RESULT_DECL)
3637             return symbol_cost;
3638         }
3639
3640       return address_cost;
3641     }
3642
3643   switch (TREE_CODE (expr))
3644     {
3645     case PLUS_EXPR:
3646     case MINUS_EXPR:
3647     case MULT_EXPR:
3648       op0 = TREE_OPERAND (expr, 0);
3649       op1 = TREE_OPERAND (expr, 1);
3650       STRIP_NOPS (op0);
3651       STRIP_NOPS (op1);
3652
3653       if (is_gimple_val (op0))
3654         cost0 = 0;
3655       else
3656         cost0 = force_expr_to_var_cost (op0);
3657
3658       if (is_gimple_val (op1))
3659         cost1 = 0;
3660       else
3661         cost1 = force_expr_to_var_cost (op1);
3662
3663       break;
3664
3665     default:
3666       /* Just an arbitrary value, FIXME.  */
3667       return target_spill_cost;
3668     }
3669
3670   mode = TYPE_MODE (TREE_TYPE (expr));
3671   switch (TREE_CODE (expr))
3672     {
3673     case PLUS_EXPR:
3674     case MINUS_EXPR:
3675       cost = add_cost (mode);
3676       break;
3677
3678     case MULT_EXPR:
3679       if (cst_and_fits_in_hwi (op0))
3680         cost = multiply_by_cost (int_cst_value (op0), mode);
3681       else if (cst_and_fits_in_hwi (op1))
3682         cost = multiply_by_cost (int_cst_value (op1), mode);
3683       else
3684         return target_spill_cost;
3685       break;
3686
3687     default:
3688       gcc_unreachable ();
3689     }
3690
3691   cost += cost0;
3692   cost += cost1;
3693
3694   /* Bound the cost by target_spill_cost.  The parts of complicated
3695      computations often are either loop invariant or at least can
3696      be shared between several iv uses, so letting this grow without
3697      limits would not give reasonable results.  */
3698   return cost < target_spill_cost ? cost : target_spill_cost;
3699 }
3700
3701 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3702    invariants the computation depends on.  */
3703
3704 static unsigned
3705 force_var_cost (struct ivopts_data *data,
3706                 tree expr, bitmap *depends_on)
3707 {
3708   if (depends_on)
3709     {
3710       fd_ivopts_data = data;
3711       walk_tree (&expr, find_depends, depends_on, NULL);
3712     }
3713
3714   return force_expr_to_var_cost (expr);
3715 }
3716
3717 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3718    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3719    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3720    invariants the computation depends on.  */
3721
3722 static unsigned
3723 split_address_cost (struct ivopts_data *data,
3724                     tree addr, bool *symbol_present, bool *var_present,
3725                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3726 {
3727   tree core;
3728   HOST_WIDE_INT bitsize;
3729   HOST_WIDE_INT bitpos;
3730   tree toffset;
3731   enum machine_mode mode;
3732   int unsignedp, volatilep;
3733
3734   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3735                               &unsignedp, &volatilep, false);
3736
3737   if (toffset != 0
3738       || bitpos % BITS_PER_UNIT != 0
3739       || TREE_CODE (core) != VAR_DECL)
3740     {
3741       *symbol_present = false;
3742       *var_present = true;
3743       fd_ivopts_data = data;
3744       walk_tree (&addr, find_depends, depends_on, NULL);
3745       return target_spill_cost;
3746     }
3747
3748   *offset += bitpos / BITS_PER_UNIT;
3749   if (TREE_STATIC (core)
3750       || DECL_EXTERNAL (core))
3751     {
3752       *symbol_present = true;
3753       *var_present = false;
3754       return 0;
3755     }
3756
3757   *symbol_present = false;
3758   *var_present = true;
3759   return 0;
3760 }
3761
3762 /* Estimates cost of expressing difference of addresses E1 - E2 as
3763    var + symbol + offset.  The value of offset is added to OFFSET,
3764    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3765    part is missing.  DEPENDS_ON is a set of the invariants the computation
3766    depends on.  */
3767
3768 static unsigned
3769 ptr_difference_cost (struct ivopts_data *data,
3770                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3771                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3772 {
3773   HOST_WIDE_INT diff = 0;
3774   unsigned cost;
3775
3776   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3777
3778   if (ptr_difference_const (e1, e2, &diff))
3779     {
3780       *offset += diff;
3781       *symbol_present = false;
3782       *var_present = false;
3783       return 0;
3784     }
3785
3786   if (e2 == integer_zero_node)
3787     return split_address_cost (data, TREE_OPERAND (e1, 0),
3788                                symbol_present, var_present, offset, depends_on);
3789
3790   *symbol_present = false;
3791   *var_present = true;
3792
3793   cost = force_var_cost (data, e1, depends_on);
3794   cost += force_var_cost (data, e2, depends_on);
3795   cost += add_cost (Pmode);
3796
3797   return cost;
3798 }
3799
3800 /* Estimates cost of expressing difference E1 - E2 as
3801    var + symbol + offset.  The value of offset is added to OFFSET,
3802    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3803    part is missing.  DEPENDS_ON is a set of the invariants the computation
3804    depends on.  */
3805
3806 static unsigned
3807 difference_cost (struct ivopts_data *data,
3808                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3809                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3810 {
3811   unsigned cost;
3812   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3813   unsigned HOST_WIDE_INT off1, off2;
3814
3815   e1 = strip_offset (e1, &off1);
3816   e2 = strip_offset (e2, &off2);
3817   *offset += off1 - off2;
3818
3819   STRIP_NOPS (e1);
3820   STRIP_NOPS (e2);
3821
3822   if (TREE_CODE (e1) == ADDR_EXPR)
3823     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3824                                 depends_on);
3825   *symbol_present = false;
3826
3827   if (operand_equal_p (e1, e2, 0))
3828     {
3829       *var_present = false;
3830       return 0;
3831     }
3832   *var_present = true;
3833   if (zero_p (e2))
3834     return force_var_cost (data, e1, depends_on);
3835
3836   if (zero_p (e1))
3837     {
3838       cost = force_var_cost (data, e2, depends_on);
3839       cost += multiply_by_cost (-1, mode);
3840
3841       return cost;
3842     }
3843
3844   cost = force_var_cost (data, e1, depends_on);
3845   cost += force_var_cost (data, e2, depends_on);
3846   cost += add_cost (mode);
3847
3848   return cost;
3849 }
3850
3851 /* Determines the cost of the computation by that USE is expressed
3852    from induction variable CAND.  If ADDRESS_P is true, we just need
3853    to create an address from it, otherwise we want to get it into
3854    register.  A set of invariants we depend on is stored in
3855    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3856
3857 static unsigned
3858 get_computation_cost_at (struct ivopts_data *data,
3859                          struct iv_use *use, struct iv_cand *cand,
3860                          bool address_p, bitmap *depends_on, tree at)
3861 {
3862   tree ubase = use->iv->base, ustep = use->iv->step;
3863   tree cbase, cstep;
3864   tree utype = TREE_TYPE (ubase), ctype;
3865   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3866   HOST_WIDE_INT ratio, aratio;
3867   bool var_present, symbol_present;
3868   unsigned cost = 0, n_sums;
3869
3870   *depends_on = NULL;
3871
3872   /* Only consider real candidates.  */
3873   if (!cand->iv)
3874     return INFTY;
3875
3876   cbase = cand->iv->base;
3877   cstep = cand->iv->step;
3878   ctype = TREE_TYPE (cbase);
3879
3880   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3881     {
3882       /* We do not have a precision to express the values of use.  */
3883       return INFTY;
3884     }
3885
3886   if (address_p)
3887     {
3888       /* Do not try to express address of an object with computation based
3889          on address of a different object.  This may cause problems in rtl
3890          level alias analysis (that does not expect this to be happening,
3891          as this is illegal in C), and would be unlikely to be useful
3892          anyway.  */
3893       if (use->iv->base_object
3894           && cand->iv->base_object
3895           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3896         return INFTY;
3897     }
3898
3899   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3900     {
3901       /* TODO -- add direct handling of this case.  */
3902       goto fallback;
3903     }
3904
3905   /* CSTEPI is removed from the offset in case statement is after the
3906      increment.  If the step is not constant, we use zero instead.
3907      This is a bit imprecise (there is the extra addition), but
3908      redundancy elimination is likely to transform the code so that
3909      it uses value of the variable before increment anyway,
3910      so it is not that much unrealistic.  */
3911   if (cst_and_fits_in_hwi (cstep))
3912     cstepi = int_cst_value (cstep);
3913   else
3914     cstepi = 0;
3915
3916   if (cst_and_fits_in_hwi (ustep)
3917       && cst_and_fits_in_hwi (cstep))
3918     {
3919       ustepi = int_cst_value (ustep);
3920
3921       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3922         return INFTY;
3923     }
3924   else
3925     {
3926       double_int rat;
3927
3928       if (!constant_multiple_of (ustep, cstep, &rat))
3929         return INFTY;
3930
3931       if (double_int_fits_in_shwi_p (rat))
3932         ratio = double_int_to_shwi (rat);
3933       else
3934         return INFTY;
3935     }
3936
3937   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3938      or ratio == 1, it is better to handle this like
3939
3940      ubase - ratio * cbase + ratio * var
3941
3942      (also holds in the case ratio == -1, TODO.  */
3943
3944   if (cst_and_fits_in_hwi (cbase))
3945     {
3946       offset = - ratio * int_cst_value (cbase);
3947       cost += difference_cost (data,
3948                                ubase, integer_zero_node,
3949                                &symbol_present, &var_present, &offset,
3950                                depends_on);
3951     }
3952   else if (ratio == 1)
3953     {
3954       cost += difference_cost (data,
3955                                ubase, cbase,
3956                                &symbol_present, &var_present, &offset,
3957                                depends_on);
3958     }
3959   else
3960     {
3961       cost += force_var_cost (data, cbase, depends_on);
3962       cost += add_cost (TYPE_MODE (ctype));
3963       cost += difference_cost (data,
3964                                ubase, integer_zero_node,
3965                                &symbol_present, &var_present, &offset,
3966                                depends_on);
3967     }
3968
3969   /* If we are after the increment, the value of the candidate is higher by
3970      one iteration.  */
3971   if (stmt_after_increment (data->current_loop, cand, at))
3972     offset -= ratio * cstepi;
3973
3974   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3975      (symbol/var/const parts may be omitted).  If we are looking for an address,
3976      find the cost of addressing this.  */
3977   if (address_p)
3978     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3979                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3980
3981   /* Otherwise estimate the costs for computing the expression.  */
3982   aratio = ratio > 0 ? ratio : -ratio;
3983   if (!symbol_present && !var_present && !offset)
3984     {
3985       if (ratio != 1)
3986         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3987
3988       return cost;
3989     }
3990
3991   if (aratio != 1)
3992     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3993
3994   n_sums = 1;
3995   if (var_present
3996       /* Symbol + offset should be compile-time computable.  */
3997       && (symbol_present || offset))
3998     n_sums++;
3999
4000   return cost + n_sums * add_cost (TYPE_MODE (ctype));
4001
4002 fallback:
4003   {
4004     /* Just get the expression, expand it and measure the cost.  */
4005     tree comp = get_computation_at (data->current_loop, use, cand, at);
4006
4007     if (!comp)
4008       return INFTY;
4009
4010     if (address_p)
4011       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
4012
4013     return computation_cost (comp);
4014   }
4015 }
4016
4017 /* Determines the cost of the computation by that USE is expressed
4018    from induction variable CAND.  If ADDRESS_P is true, we just need
4019    to create an address from it, otherwise we want to get it into
4020    register.  A set of invariants we depend on is stored in
4021    DEPENDS_ON.  */
4022
4023 static unsigned
4024 get_computation_cost (struct ivopts_data *data,
4025                       struct iv_use *use, struct iv_cand *cand,
4026                       bool address_p, bitmap *depends_on)
4027 {
4028   return get_computation_cost_at (data,
4029                                   use, cand, address_p, depends_on, use->stmt);
4030 }
4031
4032 /* Determines cost of basing replacement of USE on CAND in a generic
4033    expression.  */
4034
4035 static bool
4036 determine_use_iv_cost_generic (struct ivopts_data *data,
4037                                struct iv_use *use, struct iv_cand *cand)
4038 {
4039   bitmap depends_on;
4040   unsigned cost;
4041
4042   /* The simple case first -- if we need to express value of the preserved
4043      original biv, the cost is 0.  This also prevents us from counting the
4044      cost of increment twice -- once at this use and once in the cost of
4045      the candidate.  */
4046   if (cand->pos == IP_ORIGINAL
4047       && cand->incremented_at == use->stmt)
4048     {
4049       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4050       return true;
4051     }
4052
4053   cost = get_computation_cost (data, use, cand, false, &depends_on);
4054   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4055
4056   return cost != INFTY;
4057 }
4058
4059 /* Determines cost of basing replacement of USE on CAND in an address.  */
4060
4061 static bool
4062 determine_use_iv_cost_address (struct ivopts_data *data,
4063                                struct iv_use *use, struct iv_cand *cand)
4064 {
4065   bitmap depends_on;
4066   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4067
4068   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4069
4070   return cost != INFTY;
4071 }
4072
4073 /* Computes value of induction variable IV in iteration NITER.  */
4074
4075 static tree
4076 iv_value (struct iv *iv, tree niter)
4077 {
4078   tree val;
4079   tree type = TREE_TYPE (iv->base);
4080
4081   niter = fold_convert (type, niter);
4082   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4083
4084   return fold_build2 (PLUS_EXPR, type, iv->base, val);
4085 }
4086
4087 /* Computes value of candidate CAND at position AT in iteration NITER.  */
4088
4089 static tree
4090 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4091 {
4092   tree val = iv_value (cand->iv, niter);
4093   tree type = TREE_TYPE (cand->iv->base);
4094
4095   if (stmt_after_increment (loop, cand, at))
4096     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4097
4098   return val;
4099 }
4100
4101 /* Returns period of induction variable iv.  */
4102
4103 static tree
4104 iv_period (struct iv *iv)
4105 {
4106   tree step = iv->step, period, type;
4107   tree pow2div;
4108
4109   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4110
4111   /* Period of the iv is gcd (step, type range).  Since type range is power
4112      of two, it suffices to determine the maximum power of two that divides
4113      step.  */
4114   pow2div = num_ending_zeros (step);
4115   type = unsigned_type_for (TREE_TYPE (step));
4116
4117   period = build_low_bits_mask (type,
4118                                 (TYPE_PRECISION (type)
4119                                  - tree_low_cst (pow2div, 1)));
4120
4121   return period;
4122 }
4123
4124 /* Returns the comparison operator used when eliminating the iv USE.  */
4125
4126 static enum tree_code
4127 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4128 {
4129   struct loop *loop = data->current_loop;
4130   basic_block ex_bb;
4131   edge exit;
4132
4133   ex_bb = bb_for_stmt (use->stmt);
4134   exit = EDGE_SUCC (ex_bb, 0);
4135   if (flow_bb_inside_loop_p (loop, exit->dest))
4136     exit = EDGE_SUCC (ex_bb, 1);
4137
4138   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4139 }
4140
4141 /* Check whether it is possible to express the condition in USE by comparison
4142    of candidate CAND.  If so, store the value compared with to BOUND.  */
4143
4144 static bool
4145 may_eliminate_iv (struct ivopts_data *data,
4146                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4147 {
4148   basic_block ex_bb;
4149   edge exit;
4150   tree nit, nit_type;
4151   tree wider_type, period, per_type;
4152   struct loop *loop = data->current_loop;
4153
4154   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4155     return false;
4156
4157   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4158      for other conditions inside loop body.  */
4159   ex_bb = bb_for_stmt (use->stmt);
4160   if (use->stmt != last_stmt (ex_bb)
4161       || TREE_CODE (use->stmt) != COND_EXPR)
4162     return false;
4163   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4164     return false;
4165
4166   exit = EDGE_SUCC (ex_bb, 0);
4167   if (flow_bb_inside_loop_p (loop, exit->dest))
4168     exit = EDGE_SUCC (ex_bb, 1);
4169   if (flow_bb_inside_loop_p (loop, exit->dest))
4170     return false;
4171
4172   nit = niter_for_exit (data, exit);
4173   if (!nit)
4174     return false;
4175
4176   nit_type = TREE_TYPE (nit);
4177
4178   /* Determine whether we may use the variable to test whether niter iterations
4179      elapsed.  This is the case iff the period of the induction variable is
4180      greater than the number of iterations.  */
4181   period = iv_period (cand->iv);
4182   if (!period)
4183     return false;
4184   per_type = TREE_TYPE (period);
4185
4186   wider_type = TREE_TYPE (period);
4187   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4188     wider_type = per_type;
4189   else
4190     wider_type = nit_type;
4191
4192   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4193                                       fold_convert (wider_type, period),
4194                                       fold_convert (wider_type, nit))))
4195     return false;
4196
4197   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4198   return true;
4199 }
4200
4201 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4202
4203 static bool
4204 determine_use_iv_cost_condition (struct ivopts_data *data,
4205                                  struct iv_use *use, struct iv_cand *cand)
4206 {
4207   tree bound = NULL_TREE, op, cond;
4208   bitmap depends_on = NULL;
4209   unsigned cost;
4210
4211   /* Only consider real candidates.  */
4212   if (!cand->iv)
4213     {
4214       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4215       return false;
4216     }
4217
4218   if (may_eliminate_iv (data, use, cand, &bound))
4219     {
4220       cost = force_var_cost (data, bound, &depends_on);
4221
4222       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4223       return cost != INFTY;
4224     }
4225
4226   /* The induction variable elimination failed; just express the original
4227      giv.  If it is compared with an invariant, note that we cannot get
4228      rid of it.  */
4229   cost = get_computation_cost (data, use, cand, false, &depends_on);
4230
4231   cond = *use->op_p;
4232   if (TREE_CODE (cond) != SSA_NAME)
4233     {
4234       op = TREE_OPERAND (cond, 0);
4235       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4236         op = TREE_OPERAND (cond, 1);
4237       if (TREE_CODE (op) == SSA_NAME)
4238         {
4239           op = get_iv (data, op)->base;
4240           fd_ivopts_data = data;
4241           walk_tree (&op, find_depends, &depends_on, NULL);
4242         }
4243     }
4244
4245   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4246   return cost != INFTY;
4247 }
4248
4249 /* Determines cost of basing replacement of USE on CAND.  Returns false
4250    if USE cannot be based on CAND.  */
4251
4252 static bool
4253 determine_use_iv_cost (struct ivopts_data *data,
4254                        struct iv_use *use, struct iv_cand *cand)
4255 {
4256   switch (use->type)
4257     {
4258     case USE_NONLINEAR_EXPR:
4259       return determine_use_iv_cost_generic (data, use, cand);
4260
4261     case USE_ADDRESS:
4262       return determine_use_iv_cost_address (data, use, cand);
4263
4264     case USE_COMPARE:
4265       return determine_use_iv_cost_condition (data, use, cand);
4266
4267     default:
4268       gcc_unreachable ();
4269     }
4270 }
4271
4272 /* Determines costs of basing the use of the iv on an iv candidate.  */
4273
4274 static void
4275 determine_use_iv_costs (struct ivopts_data *data)
4276 {
4277   unsigned i, j;
4278   struct iv_use *use;
4279   struct iv_cand *cand;
4280   bitmap to_clear = BITMAP_ALLOC (NULL);
4281
4282   alloc_use_cost_map (data);
4283
4284   for (i = 0; i < n_iv_uses (data); i++)
4285     {
4286       use = iv_use (data, i);
4287
4288       if (data->consider_all_candidates)
4289         {
4290           for (j = 0; j < n_iv_cands (data); j++)
4291             {
4292               cand = iv_cand (data, j);
4293               determine_use_iv_cost (data, use, cand);
4294             }
4295         }
4296       else
4297         {
4298           bitmap_iterator bi;
4299
4300           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4301             {
4302               cand = iv_cand (data, j);
4303               if (!determine_use_iv_cost (data, use, cand))
4304                 bitmap_set_bit (to_clear, j);
4305             }
4306
4307           /* Remove the candidates for that the cost is infinite from
4308              the list of related candidates.  */
4309           bitmap_and_compl_into (use->related_cands, to_clear);
4310           bitmap_clear (to_clear);
4311         }
4312     }
4313
4314   BITMAP_FREE (to_clear);
4315
4316   if (dump_file && (dump_flags & TDF_DETAILS))
4317     {
4318       fprintf (dump_file, "Use-candidate costs:\n");
4319
4320       for (i = 0; i < n_iv_uses (data); i++)
4321         {
4322           use = iv_use (data, i);
4323
4324           fprintf (dump_file, "Use %d:\n", i);
4325           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4326           for (j = 0; j < use->n_map_members; j++)
4327             {
4328               if (!use->cost_map[j].cand
4329                   || use->cost_map[j].cost == INFTY)
4330                 continue;
4331
4332               fprintf (dump_file, "  %d\t%d\t",
4333                        use->cost_map[j].cand->id,
4334                        use->cost_map[j].cost);
4335               if (use->cost_map[j].depends_on)
4336                 bitmap_print (dump_file,
4337                               use->cost_map[j].depends_on, "","");
4338               fprintf (dump_file, "\n");
4339             }
4340
4341           fprintf (dump_file, "\n");
4342         }
4343       fprintf (dump_file, "\n");
4344     }
4345 }
4346
4347 /* Determines cost of the candidate CAND.  */
4348
4349 static void
4350 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4351 {
4352   unsigned cost_base, cost_step;
4353   tree base;
4354
4355   if (!cand->iv)
4356     {
4357       cand->cost = 0;
4358       return;
4359     }
4360
4361   /* There are two costs associated with the candidate -- its increment
4362      and its initialization.  The second is almost negligible for any loop
4363      that rolls enough, so we take it just very little into account.  */
4364
4365   base = cand->iv->base;
4366   cost_base = force_var_cost (data, base, NULL);
4367   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4368
4369   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4370
4371   /* Prefer the original iv unless we may gain something by replacing it;
4372      this is not really relevant for artificial ivs created by other
4373      passes.  */
4374   if (cand->pos == IP_ORIGINAL
4375       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4376     cand->cost--;
4377
4378   /* Prefer not to insert statements into latch unless there are some
4379      already (so that we do not create unnecessary jumps).  */
4380   if (cand->pos == IP_END
4381       && empty_block_p (ip_end_pos (data->current_loop)))
4382     cand->cost++;
4383 }
4384
4385 /* Determines costs of computation of the candidates.  */
4386
4387 static void
4388 determine_iv_costs (struct ivopts_data *data)
4389 {
4390   unsigned i;
4391
4392   if (dump_file && (dump_flags & TDF_DETAILS))
4393     {
4394       fprintf (dump_file, "Candidate costs:\n");
4395       fprintf (dump_file, "  cand\tcost\n");
4396     }
4397
4398   for (i = 0; i < n_iv_cands (data); i++)
4399     {
4400       struct iv_cand *cand = iv_cand (data, i);
4401
4402       determine_iv_cost (data, cand);
4403
4404       if (dump_file && (dump_flags & TDF_DETAILS))
4405         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4406     }
4407
4408   if (dump_file && (dump_flags & TDF_DETAILS))
4409     fprintf (dump_file, "\n");
4410 }
4411
4412 /* Calculates cost for having SIZE induction variables.  */
4413
4414 static unsigned
4415 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4416 {
4417   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4418 }
4419
4420 /* For each size of the induction variable set determine the penalty.  */
4421
4422 static void
4423 determine_set_costs (struct ivopts_data *data)
4424 {
4425   unsigned j, n;
4426   tree phi, op;
4427   struct loop *loop = data->current_loop;
4428   bitmap_iterator bi;
4429
4430   /* We use the following model (definitely improvable, especially the
4431      cost function -- TODO):
4432
4433      We estimate the number of registers available (using MD data), name it A.
4434
4435      We estimate the number of registers used by the loop, name it U.  This
4436      number is obtained as the number of loop phi nodes (not counting virtual
4437      registers and bivs) + the number of variables from outside of the loop.
4438
4439      We set a reserve R (free regs that are used for temporary computations,
4440      etc.).  For now the reserve is a constant 3.
4441
4442      Let I be the number of induction variables.
4443
4444      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4445         make a lot of ivs without a reason).
4446      -- if A - R < U + I <= A, the cost is I * PRES_COST
4447      -- if U + I > A, the cost is I * PRES_COST and
4448         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4449
4450   if (dump_file && (dump_flags & TDF_DETAILS))
4451     {
4452       fprintf (dump_file, "Global costs:\n");
4453       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4454       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4455       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4456       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4457     }
4458
4459   n = 0;
4460   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4461     {
4462       op = PHI_RESULT (phi);
4463
4464       if (!is_gimple_reg (op))
4465         continue;
4466
4467       if (get_iv (data, op))
4468         continue;
4469
4470       n++;
4471     }
4472
4473   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4474     {
4475       struct version_info *info = ver_info (data, j);
4476
4477       if (info->inv_id && info->has_nonlin_use)
4478         n++;
4479     }
4480
4481   data->regs_used = n;
4482   if (dump_file && (dump_flags & TDF_DETAILS))
4483     fprintf (dump_file, "  regs_used %d\n", n);
4484
4485   if (dump_file && (dump_flags & TDF_DETAILS))
4486     {
4487       fprintf (dump_file, "  cost for size:\n");
4488       fprintf (dump_file, "  ivs\tcost\n");
4489       for (j = 0; j <= 2 * target_avail_regs; j++)
4490         fprintf (dump_file, "  %d\t%d\n", j,
4491                  ivopts_global_cost_for_size (data, j));
4492       fprintf (dump_file, "\n");
4493     }
4494 }
4495
4496 /* Returns true if A is a cheaper cost pair than B.  */
4497
4498 static bool
4499 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4500 {
4501   if (!a)
4502     return false;
4503
4504   if (!b)
4505     return true;
4506
4507   if (a->cost < b->cost)
4508     return true;
4509
4510   if (a->cost > b->cost)
4511     return false;
4512
4513   /* In case the costs are the same, prefer the cheaper candidate.  */
4514   if (a->cand->cost < b->cand->cost)
4515     return true;
4516
4517   return false;
4518 }
4519
4520 /* Computes the cost field of IVS structure.  */
4521
4522 static void
4523 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4524 {
4525   unsigned cost = 0;
4526
4527   cost += ivs->cand_use_cost;
4528   cost += ivs->cand_cost;
4529   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4530
4531   ivs->cost = cost;
4532 }
4533
4534 /* Remove invariants in set INVS to set IVS.  */
4535
4536 static void
4537 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4538 {
4539   bitmap_iterator bi;
4540   unsigned iid;
4541
4542   if (!invs)
4543     return;
4544
4545   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4546     {
4547       ivs->n_invariant_uses[iid]--;
4548       if (ivs->n_invariant_uses[iid] == 0)
4549         ivs->n_regs--;
4550     }
4551 }
4552
4553 /* Set USE not to be expressed by any candidate in IVS.  */
4554
4555 static void
4556 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4557                  struct iv_use *use)
4558 {
4559   unsigned uid = use->id, cid;
4560   struct cost_pair *cp;
4561
4562   cp = ivs->cand_for_use[uid];
4563   if (!cp)
4564     return;
4565   cid = cp->cand->id;
4566
4567   ivs->bad_uses++;
4568   ivs->cand_for_use[uid] = NULL;
4569   ivs->n_cand_uses[cid]--;
4570
4571   if (ivs->n_cand_uses[cid] == 0)
4572     {
4573       bitmap_clear_bit (ivs->cands, cid);
4574       /* Do not count the pseudocandidates.  */
4575       if (cp->cand->iv)
4576         ivs->n_regs--;
4577       ivs->n_cands--;
4578       ivs->cand_cost -= cp->cand->cost;
4579
4580       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4581     }
4582
4583   ivs->cand_use_cost -= cp->cost;
4584
4585   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4586   iv_ca_recount_cost (data, ivs);
4587 }
4588
4589 /* Add invariants in set INVS to set IVS.  */
4590
4591 static void
4592 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4593 {
4594   bitmap_iterator bi;
4595   unsigned iid;
4596
4597   if (!invs)
4598     return;
4599
4600   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4601     {
4602       ivs->n_invariant_uses[iid]++;
4603       if (ivs->n_invariant_uses[iid] == 1)
4604         ivs->n_regs++;
4605     }
4606 }
4607
4608 /* Set cost pair for USE in set IVS to CP.  */
4609
4610 static void
4611 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4612               struct iv_use *use, struct cost_pair *cp)
4613 {
4614   unsigned uid = use->id, cid;
4615
4616   if (ivs->cand_for_use[uid] == cp)
4617     return;
4618
4619   if (ivs->cand_for_use[uid])
4620     iv_ca_set_no_cp (data, ivs, use);
4621
4622   if (cp)
4623     {
4624       cid = cp->cand->id;
4625
4626       ivs->bad_uses--;
4627       ivs->cand_for_use[uid] = cp;
4628       ivs->n_cand_uses[cid]++;
4629       if (ivs->n_cand_uses[cid] == 1)
4630         {
4631           bitmap_set_bit (ivs->cands, cid);
4632           /* Do not count the pseudocandidates.  */
4633           if (cp->cand->iv)
4634             ivs->n_regs++;
4635           ivs->n_cands++;
4636           ivs->cand_cost += cp->cand->cost;
4637
4638           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4639         }
4640
4641       ivs->cand_use_cost += cp->cost;
4642       iv_ca_set_add_invariants (ivs, cp->depends_on);
4643       iv_ca_recount_cost (data, ivs);
4644     }
4645 }
4646
4647 /* Extend set IVS by expressing USE by some of the candidates in it
4648    if possible.  */
4649
4650 static void
4651 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4652                struct iv_use *use)
4653 {
4654   struct cost_pair *best_cp = NULL, *cp;
4655   bitmap_iterator bi;
4656   unsigned i;
4657
4658   gcc_assert (ivs->upto >= use->id);
4659
4660   if (ivs->upto == use->id)
4661     {
4662       ivs->upto++;
4663       ivs->bad_uses++;
4664     }
4665
4666   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4667     {
4668       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4669
4670       if (cheaper_cost_pair (cp, best_cp))
4671         best_cp = cp;
4672     }
4673
4674   iv_ca_set_cp (data, ivs, use, best_cp);
4675 }
4676
4677 /* Get cost for assignment IVS.  */
4678
4679 static unsigned
4680 iv_ca_cost (struct iv_ca *ivs)
4681 {
4682   return (ivs->bad_uses ? INFTY : ivs->cost);
4683 }
4684
4685 /* Returns true if all dependences of CP are among invariants in IVS.  */
4686
4687 static bool
4688 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4689 {
4690   unsigned i;
4691   bitmap_iterator bi;
4692
4693   if (!cp->depends_on)
4694     return true;
4695
4696   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4697     {
4698       if (ivs->n_invariant_uses[i] == 0)
4699         return false;
4700     }
4701
4702   return true;
4703 }
4704
4705 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4706    it before NEXT_CHANGE.  */
4707
4708 static struct iv_ca_delta *
4709 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4710                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4711 {
4712   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4713
4714   change->use = use;
4715   change->old_cp = old_cp;
4716   change->new_cp = new_cp;
4717   change->next_change = next_change;
4718
4719   return change;
4720 }
4721
4722 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4723    are rewritten.  */
4724
4725 static struct iv_ca_delta *
4726 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4727 {
4728   struct iv_ca_delta *last;
4729
4730   if (!l2)
4731     return l1;
4732
4733   if (!l1)
4734     return l2;
4735
4736   for (last = l1; last->next_change; last = last->next_change)
4737     continue;
4738   last->next_change = l2;
4739
4740   return l1;
4741 }
4742
4743 /* Returns candidate by that USE is expressed in IVS.  */
4744
4745 static struct cost_pair *
4746 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4747 {
4748   return ivs->cand_for_use[use->id];
4749 }
4750
4751 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4752
4753 static struct iv_ca_delta *
4754 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4755 {
4756   struct iv_ca_delta *act, *next, *prev = NULL;
4757   struct cost_pair *tmp;
4758
4759   for (act = delta; act; act = next)
4760     {
4761       next = act->next_change;
4762       act->next_change = prev;
4763       prev = act;
4764
4765       tmp = act->old_cp;
4766       act->old_cp = act->new_cp;
4767       act->new_cp = tmp;
4768     }
4769
4770   return prev;
4771 }
4772
4773 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4774    reverted instead.  */
4775
4776 static void
4777 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4778                     struct iv_ca_delta *delta, bool forward)
4779 {
4780   struct cost_pair *from, *to;
4781   struct iv_ca_delta *act;
4782
4783   if (!forward)
4784     delta = iv_ca_delta_reverse (delta);
4785
4786   for (act = delta; act; act = act->next_change)
4787     {
4788       from = act->old_cp;
4789       to = act->new_cp;
4790       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4791       iv_ca_set_cp (data, ivs, act->use, to);
4792     }
4793
4794   if (!forward)
4795     iv_ca_delta_reverse (delta);
4796 }
4797
4798 /* Returns true if CAND is used in IVS.  */
4799
4800 static bool
4801 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4802 {
4803   return ivs->n_cand_uses[cand->id] > 0;
4804 }
4805
4806 /* Returns number of induction variable candidates in the set IVS.  */
4807
4808 static unsigned
4809 iv_ca_n_cands (struct iv_ca *ivs)
4810 {
4811   return ivs->n_cands;
4812 }
4813
4814 /* Free the list of changes DELTA.  */
4815
4816 static void
4817 iv_ca_delta_free (struct iv_ca_delta **delta)
4818 {
4819   struct iv_ca_delta *act, *next;
4820
4821   for (act = *delta; act; act = next)
4822     {
4823       next = act->next_change;
4824       free (act);
4825     }
4826
4827   *delta = NULL;
4828 }
4829
4830 /* Allocates new iv candidates assignment.  */
4831
4832 static struct iv_ca *
4833 iv_ca_new (struct ivopts_data *data)
4834 {
4835   struct iv_ca *nw = XNEW (struct iv_ca);
4836
4837   nw->upto = 0;
4838   nw->bad_uses = 0;
4839   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4840   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4841   nw->cands = BITMAP_ALLOC (NULL);
4842   nw->n_cands = 0;
4843   nw->n_regs = 0;
4844   nw->cand_use_cost = 0;
4845   nw->cand_cost = 0;
4846   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4847   nw->cost = 0;
4848
4849   return nw;
4850 }
4851
4852 /* Free memory occupied by the set IVS.  */
4853
4854 static void
4855 iv_ca_free (struct iv_ca **ivs)
4856 {
4857   free ((*ivs)->cand_for_use);
4858   free ((*ivs)->n_cand_uses);
4859   BITMAP_FREE ((*ivs)->cands);
4860   free ((*ivs)->n_invariant_uses);
4861   free (*ivs);
4862   *ivs = NULL;
4863 }
4864
4865 /* Dumps IVS to FILE.  */
4866
4867 static void
4868 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4869 {
4870   const char *pref = "  invariants ";
4871   unsigned i;
4872
4873   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4874   bitmap_print (file, ivs->cands, "  candidates ","\n");
4875
4876   for (i = 1; i <= data->max_inv_id; i++)
4877     if (ivs->n_invariant_uses[i])
4878       {
4879         fprintf (file, "%s%d", pref, i);
4880         pref = ", ";
4881       }
4882   fprintf (file, "\n");
4883 }
4884
4885 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4886    new set, and store differences in DELTA.  Number of induction variables
4887    in the new set is stored to N_IVS.  */
4888
4889 static unsigned
4890 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4891               struct iv_cand *cand, struct iv_ca_delta **delta,
4892               unsigned *n_ivs)
4893 {
4894   unsigned i, cost;
4895   struct iv_use *use;
4896   struct cost_pair *old_cp, *new_cp;
4897
4898   *delta = NULL;
4899   for (i = 0; i < ivs->upto; i++)
4900     {
4901       use = iv_use (data, i);
4902       old_cp = iv_ca_cand_for_use (ivs, use);
4903
4904       if (old_cp
4905           && old_cp->cand == cand)
4906         continue;
4907
4908       new_cp = get_use_iv_cost (data, use, cand);
4909       if (!new_cp)
4910         continue;
4911
4912       if (!iv_ca_has_deps (ivs, new_cp))
4913         continue;
4914
4915       if (!cheaper_cost_pair (new_cp, old_cp))
4916         continue;
4917
4918       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4919     }
4920
4921   iv_ca_delta_commit (data, ivs, *delta, true);
4922   cost = iv_ca_cost (ivs);
4923   if (n_ivs)
4924     *n_ivs = iv_ca_n_cands (ivs);
4925   iv_ca_delta_commit (data, ivs, *delta, false);
4926
4927   return cost;
4928 }
4929
4930 /* Try narrowing set IVS by removing CAND.  Return the cost of
4931    the new set and store the differences in DELTA.  */
4932
4933 static unsigned
4934 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4935               struct iv_cand *cand, struct iv_ca_delta **delta)
4936 {
4937   unsigned i, ci;
4938   struct iv_use *use;
4939   struct cost_pair *old_cp, *new_cp, *cp;
4940   bitmap_iterator bi;
4941   struct iv_cand *cnd;
4942   unsigned cost;
4943
4944   *delta = NULL;
4945   for (i = 0; i < n_iv_uses (data); i++)
4946     {
4947       use = iv_use (data, i);
4948
4949       old_cp = iv_ca_cand_for_use (ivs, use);
4950       if (old_cp->cand != cand)
4951         continue;
4952
4953       new_cp = NULL;
4954
4955       if (data->consider_all_candidates)
4956         {
4957           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4958             {
4959               if (ci == cand->id)
4960                 continue;
4961
4962               cnd = iv_cand (data, ci);
4963
4964               cp = get_use_iv_cost (data, use, cnd);
4965               if (!cp)
4966                 continue;
4967               if (!iv_ca_has_deps (ivs, cp))
4968                 continue;
4969
4970               if (!cheaper_cost_pair (cp, new_cp))
4971                 continue;
4972
4973               new_cp = cp;
4974             }
4975         }
4976       else
4977         {
4978           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4979             {
4980               if (ci == cand->id)
4981                 continue;
4982
4983               cnd = iv_cand (data, ci);
4984
4985               cp = get_use_iv_cost (data, use, cnd);
4986               if (!cp)
4987                 continue;
4988               if (!iv_ca_has_deps (ivs, cp))
4989                 continue;
4990
4991               if (!cheaper_cost_pair (cp, new_cp))
4992                 continue;
4993
4994               new_cp = cp;
4995             }
4996         }
4997
4998       if (!new_cp)
4999         {
5000           iv_ca_delta_free (delta);
5001           return INFTY;
5002         }
5003
5004       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5005     }
5006
5007   iv_ca_delta_commit (data, ivs, *delta, true);
5008   cost = iv_ca_cost (ivs);
5009   iv_ca_delta_commit (data, ivs, *delta, false);
5010
5011   return cost;
5012 }
5013
5014 /* Try optimizing the set of candidates IVS by removing candidates different
5015    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5016    differences in DELTA.  */
5017
5018 static unsigned
5019 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5020              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5021 {
5022   bitmap_iterator bi;
5023   struct iv_ca_delta *act_delta, *best_delta;
5024   unsigned i, best_cost, acost;
5025   struct iv_cand *cand;
5026
5027   best_delta = NULL;
5028   best_cost = iv_ca_cost (ivs);
5029
5030   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5031     {
5032       cand = iv_cand (data, i);
5033
5034       if (cand == except_cand)
5035         continue;
5036
5037       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5038
5039       if (acost < best_cost)
5040         {
5041           best_cost = acost;
5042           iv_ca_delta_free (&best_delta);
5043           best_delta = act_delta;
5044         }
5045       else
5046         iv_ca_delta_free (&act_delta);
5047     }
5048
5049   if (!best_delta)
5050     {
5051       *delta = NULL;
5052       return best_cost;
5053     }
5054
5055   /* Recurse to possibly remove other unnecessary ivs.  */
5056   iv_ca_delta_commit (data, ivs, best_delta, true);
5057   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5058   iv_ca_delta_commit (data, ivs, best_delta, false);
5059   *delta = iv_ca_delta_join (best_delta, *delta);
5060   return best_cost;
5061 }
5062
5063 /* Tries to extend the sets IVS in the best possible way in order
5064    to express the USE.  */
5065
5066 static bool
5067 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5068                   struct iv_use *use)
5069 {
5070   unsigned best_cost, act_cost;
5071   unsigned i;
5072   bitmap_iterator bi;
5073   struct iv_cand *cand;
5074   struct iv_ca_delta *best_delta = NULL, *act_delta;
5075   struct cost_pair *cp;
5076
5077   iv_ca_add_use (data, ivs, use);
5078   best_cost = iv_ca_cost (ivs);
5079
5080   cp = iv_ca_cand_for_use (ivs, use);
5081   if (cp)
5082     {
5083       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5084       iv_ca_set_no_cp (data, ivs, use);
5085     }
5086
5087   /* First try important candidates.  Only if it fails, try the specific ones.
5088      Rationale -- in loops with many variables the best choice often is to use
5089      just one generic biv.  If we added here many ivs specific to the uses,
5090      the optimization algorithm later would be likely to get stuck in a local
5091      minimum, thus causing us to create too many ivs.  The approach from
5092      few ivs to more seems more likely to be successful -- starting from few
5093      ivs, replacing an expensive use by a specific iv should always be a
5094      win.  */
5095   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5096     {
5097       cand = iv_cand (data, i);
5098
5099       if (iv_ca_cand_used_p (ivs, cand))
5100         continue;
5101
5102       cp = get_use_iv_cost (data, use, cand);
5103       if (!cp)
5104         continue;
5105
5106       iv_ca_set_cp (data, ivs, use, cp);
5107       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5108       iv_ca_set_no_cp (data, ivs, use);
5109       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5110
5111       if (act_cost < best_cost)
5112         {
5113           best_cost = act_cost;
5114
5115           iv_ca_delta_free (&best_delta);
5116           best_delta = act_delta;
5117         }
5118       else
5119         iv_ca_delta_free (&act_delta);
5120     }
5121
5122   if (best_cost == INFTY)
5123     {
5124       for (i = 0; i < use->n_map_members; i++)
5125         {
5126           cp = use->cost_map + i;
5127           cand = cp->cand;
5128           if (!cand)
5129             continue;
5130
5131           /* Already tried this.  */
5132           if (cand->important)
5133             continue;
5134
5135           if (iv_ca_cand_used_p (ivs, cand))
5136             continue;
5137
5138           act_delta = NULL;
5139           iv_ca_set_cp (data, ivs, use, cp);
5140           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5141           iv_ca_set_no_cp (data, ivs, use);
5142           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5143                                        cp, act_delta);
5144
5145           if (act_cost < best_cost)
5146             {
5147               best_cost = act_cost;
5148
5149               if (best_delta)
5150                 iv_ca_delta_free (&best_delta);
5151               best_delta = act_delta;
5152             }
5153           else
5154             iv_ca_delta_free (&act_delta);
5155         }
5156     }
5157
5158   iv_ca_delta_commit (data, ivs, best_delta, true);
5159   iv_ca_delta_free (&best_delta);
5160
5161   return (best_cost != INFTY);
5162 }
5163
5164 /* Finds an initial assignment of candidates to uses.  */
5165
5166 static struct iv_ca *
5167 get_initial_solution (struct ivopts_data *data)
5168 {
5169   struct iv_ca *ivs = iv_ca_new (data);
5170   unsigned i;
5171
5172   for (i = 0; i < n_iv_uses (data); i++)
5173     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5174       {
5175         iv_ca_free (&ivs);
5176         return NULL;
5177       }
5178
5179   return ivs;
5180 }
5181
5182 /* Tries to improve set of induction variables IVS.  */
5183
5184 static bool
5185 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5186 {
5187   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5188   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5189   struct iv_cand *cand;
5190
5191   /* Try extending the set of induction variables by one.  */
5192   for (i = 0; i < n_iv_cands (data); i++)
5193     {
5194       cand = iv_cand (data, i);
5195
5196       if (iv_ca_cand_used_p (ivs, cand))
5197         continue;
5198
5199       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5200       if (!act_delta)
5201         continue;
5202
5203       /* If we successfully added the candidate and the set is small enough,
5204          try optimizing it by removing other candidates.  */
5205       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5206         {
5207           iv_ca_delta_commit (data, ivs, act_delta, true);
5208           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5209           iv_ca_delta_commit (data, ivs, act_delta, false);
5210           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5211         }
5212
5213       if (acost < best_cost)
5214         {
5215           best_cost = acost;
5216           iv_ca_delta_free (&best_delta);
5217           best_delta = act_delta;
5218         }
5219       else
5220         iv_ca_delta_free (&act_delta);
5221     }
5222
5223   if (!best_delta)
5224     {
5225       /* Try removing the candidates from the set instead.  */
5226       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5227
5228       /* Nothing more we can do.  */
5229       if (!best_delta)
5230         return false;
5231     }
5232
5233   iv_ca_delta_commit (data, ivs, best_delta, true);
5234   gcc_assert (best_cost == iv_ca_cost (ivs));
5235   iv_ca_delta_free (&best_delta);
5236   return true;
5237 }
5238
5239 /* Attempts to find the optimal set of induction variables.  We do simple
5240    greedy heuristic -- we try to replace at most one candidate in the selected
5241    solution and remove the unused ivs while this improves the cost.  */
5242
5243 static struct iv_ca *
5244 find_optimal_iv_set (struct ivopts_data *data)
5245 {
5246   unsigned i;
5247   struct iv_ca *set;
5248   struct iv_use *use;
5249
5250   /* Get the initial solution.  */
5251   set = get_initial_solution (data);
5252   if (!set)
5253     {
5254       if (dump_file && (dump_flags & TDF_DETAILS))
5255         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5256       return NULL;
5257     }
5258
5259   if (dump_file && (dump_flags & TDF_DETAILS))
5260     {
5261       fprintf (dump_file, "Initial set of candidates:\n");
5262       iv_ca_dump (data, dump_file, set);
5263     }
5264
5265   while (try_improve_iv_set (data, set))
5266     {
5267       if (dump_file && (dump_flags & TDF_DETAILS))
5268         {
5269           fprintf (dump_file, "Improved to:\n");
5270           iv_ca_dump (data, dump_file, set);
5271         }
5272     }
5273
5274   if (dump_file && (dump_flags & TDF_DETAILS))
5275     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5276
5277   for (i = 0; i < n_iv_uses (data); i++)
5278     {
5279       use = iv_use (data, i);
5280       use->selected = iv_ca_cand_for_use (set, use)->cand;
5281     }
5282
5283   return set;
5284 }
5285
5286 /* Creates a new induction variable corresponding to CAND.  */
5287
5288 static void
5289 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5290 {
5291   block_stmt_iterator incr_pos;
5292   tree base;
5293   bool after = false;
5294
5295   if (!cand->iv)
5296     return;
5297
5298   switch (cand->pos)
5299     {
5300     case IP_NORMAL:
5301       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5302       break;
5303
5304     case IP_END:
5305       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5306       after = true;
5307       break;
5308
5309     case IP_ORIGINAL:
5310       /* Mark that the iv is preserved.  */
5311       name_info (data, cand->var_before)->preserve_biv = true;
5312       name_info (data, cand->var_after)->preserve_biv = true;
5313
5314       /* Rewrite the increment so that it uses var_before directly.  */
5315       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5316
5317       return;
5318     }
5319
5320   gimple_add_tmp_var (cand->var_before);
5321   add_referenced_var (cand->var_before);
5322
5323   base = unshare_expr (cand->iv->base);
5324
5325   create_iv (base, unshare_expr (cand->iv->step),
5326              cand->var_before, data->current_loop,
5327              &incr_pos, after, &cand->var_before, &cand->var_after);
5328 }
5329
5330 /* Creates new induction variables described in SET.  */
5331
5332 static void
5333 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5334 {
5335   unsigned i;
5336   struct iv_cand *cand;
5337   bitmap_iterator bi;
5338
5339   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5340     {
5341       cand = iv_cand (data, i);
5342       create_new_iv (data, cand);
5343     }
5344 }
5345
5346 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5347    is true, remove also the ssa name defined by the statement.  */
5348
5349 static void
5350 remove_statement (tree stmt, bool including_defined_name)
5351 {
5352   if (TREE_CODE (stmt) == PHI_NODE)
5353     {
5354       remove_phi_node (stmt, NULL_TREE, including_defined_name);
5355     }
5356   else
5357     {
5358       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5359
5360       bsi_remove (&bsi, true);
5361     }
5362 }
5363
5364 /* Rewrites USE (definition of iv used in a nonlinear expression)
5365    using candidate CAND.  */
5366
5367 static void
5368 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5369                             struct iv_use *use, struct iv_cand *cand)
5370 {
5371   tree comp;
5372   tree op, stmts, tgt, ass;
5373   block_stmt_iterator bsi, pbsi;
5374
5375   /* An important special case -- if we are asked to express value of
5376      the original iv by itself, just exit; there is no need to
5377      introduce a new computation (that might also need casting the
5378      variable to unsigned and back).  */
5379   if (cand->pos == IP_ORIGINAL
5380       && cand->incremented_at == use->stmt)
5381     {
5382       tree step, ctype, utype;
5383       enum tree_code incr_code = PLUS_EXPR;
5384
5385       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5386       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5387
5388       step = cand->iv->step;
5389       ctype = TREE_TYPE (step);
5390       utype = TREE_TYPE (cand->var_after);
5391       if (TREE_CODE (step) == NEGATE_EXPR)
5392         {
5393           incr_code = MINUS_EXPR;
5394           step = TREE_OPERAND (step, 0);
5395         }
5396
5397       /* Check whether we may leave the computation unchanged.
5398          This is the case only if it does not rely on other
5399          computations in the loop -- otherwise, the computation
5400          we rely upon may be removed in remove_unused_ivs,
5401          thus leading to ICE.  */
5402       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5403       if (TREE_CODE (op) == PLUS_EXPR
5404           || TREE_CODE (op) == MINUS_EXPR)
5405         {
5406           if (TREE_OPERAND (op, 0) == cand->var_before)
5407             op = TREE_OPERAND (op, 1);
5408           else if (TREE_CODE (op) == PLUS_EXPR
5409                    && TREE_OPERAND (op, 1) == cand->var_before)
5410             op = TREE_OPERAND (op, 0);
5411           else
5412             op = NULL_TREE;
5413         }
5414       else
5415         op = NULL_TREE;
5416
5417       if (op
5418           && (TREE_CODE (op) == INTEGER_CST
5419               || operand_equal_p (op, step, 0)))
5420         return;
5421
5422       /* Otherwise, add the necessary computations to express
5423          the iv.  */
5424       op = fold_convert (ctype, cand->var_before);
5425       comp = fold_convert (utype,
5426                            build2 (incr_code, ctype, op,
5427                                    unshare_expr (step)));
5428     }
5429   else
5430     comp = get_computation (data->current_loop, use, cand);
5431
5432   switch (TREE_CODE (use->stmt))
5433     {
5434     case PHI_NODE:
5435       tgt = PHI_RESULT (use->stmt);
5436
5437       /* If we should keep the biv, do not replace it.  */
5438       if (name_info (data, tgt)->preserve_biv)
5439         return;
5440
5441       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5442       while (!bsi_end_p (pbsi)
5443              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5444         {
5445           bsi = pbsi;
5446           bsi_next (&pbsi);
5447         }
5448       break;
5449
5450     case GIMPLE_MODIFY_STMT:
5451       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5452       bsi = bsi_for_stmt (use->stmt);
5453       break;
5454
5455     default:
5456       gcc_unreachable ();
5457     }
5458
5459   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5460
5461   if (TREE_CODE (use->stmt) == PHI_NODE)
5462     {
5463       if (stmts)
5464         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5465       ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
5466       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5467       remove_statement (use->stmt, false);
5468       SSA_NAME_DEF_STMT (tgt) = ass;
5469     }
5470   else
5471     {
5472       if (stmts)
5473         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5474       GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5475     }
5476 }
5477
5478 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5479    for_each_index.  */
5480
5481 static bool
5482 idx_remove_ssa_names (tree base, tree *idx,
5483                       void *data ATTRIBUTE_UNUSED)
5484 {
5485   tree *op;
5486
5487   if (TREE_CODE (*idx) == SSA_NAME)
5488     *idx = SSA_NAME_VAR (*idx);
5489
5490   if (TREE_CODE (base) == ARRAY_REF)
5491     {
5492       op = &TREE_OPERAND (base, 2);
5493       if (*op
5494           && TREE_CODE (*op) == SSA_NAME)
5495         *op = SSA_NAME_VAR (*op);
5496       op = &TREE_OPERAND (base, 3);
5497       if (*op
5498           && TREE_CODE (*op) == SSA_NAME)
5499         *op = SSA_NAME_VAR (*op);
5500     }
5501
5502   return true;
5503 }
5504
5505 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5506
5507 static tree
5508 unshare_and_remove_ssa_names (tree ref)
5509 {
5510   ref = unshare_expr (ref);
5511   for_each_index (&ref, idx_remove_ssa_names, NULL);
5512
5513   return ref;
5514 }
5515
5516 /* Extract the alias analysis info for the memory reference REF.  There are
5517    several ways how this information may be stored and what precisely is
5518    its semantics depending on the type of the reference, but there always is
5519    somewhere hidden one _DECL node that is used to determine the set of
5520    virtual operands for the reference.  The code below deciphers this jungle
5521    and extracts this single useful piece of information.  */
5522
5523 static tree
5524 get_ref_tag (tree ref, tree orig)
5525 {
5526   tree var = get_base_address (ref);
5527   tree aref = NULL_TREE, tag, sv;
5528   HOST_WIDE_INT offset, size, maxsize;
5529
5530   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5531     {
5532       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5533       if (ref)
5534         break;
5535     }
5536
5537   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5538     return unshare_expr (sv);
5539
5540   if (!var)
5541     return NULL_TREE;
5542
5543   if (TREE_CODE (var) == INDIRECT_REF)
5544     {
5545       /* If the base is a dereference of a pointer, first check its name memory
5546          tag.  If it does not have one, use its symbol memory tag.  */
5547       var = TREE_OPERAND (var, 0);
5548       if (TREE_CODE (var) != SSA_NAME)
5549         return NULL_TREE;
5550
5551       if (SSA_NAME_PTR_INFO (var))
5552         {
5553           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5554           if (tag)
5555             return tag;
5556         }
5557
5558       var = SSA_NAME_VAR (var);
5559       tag = symbol_mem_tag (var);
5560       gcc_assert (tag != NULL_TREE);
5561       return tag;
5562     }
5563   else
5564     {
5565       if (!DECL_P (var))
5566         return NULL_TREE;
5567
5568       tag = symbol_mem_tag (var);
5569       if (tag)
5570         return tag;
5571
5572       return var;
5573     }
5574 }
5575
5576 /* Copies the reference information from OLD_REF to NEW_REF.  */
5577
5578 static void
5579 copy_ref_info (tree new_ref, tree old_ref)
5580 {
5581   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5582     copy_mem_ref_info (new_ref, old_ref);
5583   else
5584     {
5585       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5586       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5587     }
5588 }
5589
5590 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5591
5592 static void
5593 rewrite_use_address (struct ivopts_data *data,
5594                      struct iv_use *use, struct iv_cand *cand)
5595 {
5596   struct affine_tree_combination aff;
5597   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5598   tree ref;
5599
5600   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5601   unshare_aff_combination (&aff);
5602
5603   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5604   copy_ref_info (ref, *use->op_p);
5605   *use->op_p = ref;
5606 }
5607
5608 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5609    candidate CAND.  */
5610
5611 static void
5612 rewrite_use_compare (struct ivopts_data *data,
5613                      struct iv_use *use, struct iv_cand *cand)
5614 {
5615   tree comp;
5616   tree *op_p, cond, op, stmts, bound;
5617   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5618   enum tree_code compare;
5619   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5620
5621   bound = cp->value;
5622   if (bound)
5623     {
5624       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5625       tree var_type = TREE_TYPE (var);
5626
5627       compare = iv_elimination_compare (data, use);
5628       bound = fold_convert (var_type, bound);
5629       op = force_gimple_operand (unshare_expr (bound), &stmts,
5630                                  true, NULL_TREE);
5631
5632       if (stmts)
5633         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5634
5635       *use->op_p = build2 (compare, boolean_type_node, var, op);
5636       update_stmt (use->stmt);
5637       return;
5638     }
5639
5640   /* The induction variable elimination failed; just express the original
5641      giv.  */
5642   comp = get_computation (data->current_loop, use, cand);
5643
5644   cond = *use->op_p;
5645   op_p = &TREE_OPERAND (cond, 0);
5646   if (TREE_CODE (*op_p) != SSA_NAME
5647       || zero_p (get_iv (data, *op_p)->step))
5648     op_p = &TREE_OPERAND (cond, 1);
5649
5650   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5651   if (stmts)
5652     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5653
5654   *op_p = op;
5655 }
5656
5657 /* Rewrites USE using candidate CAND.  */
5658
5659 static void
5660 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5661 {
5662   push_stmt_changes (&use->stmt);
5663
5664   switch (use->type)
5665     {
5666       case USE_NONLINEAR_EXPR:
5667         rewrite_use_nonlinear_expr (data, use, cand);
5668         break;
5669
5670       case USE_ADDRESS:
5671         rewrite_use_address (data, use, cand);
5672         break;
5673
5674       case USE_COMPARE:
5675         rewrite_use_compare (data, use, cand);
5676         break;
5677
5678       default:
5679         gcc_unreachable ();
5680     }
5681
5682   pop_stmt_changes (&use->stmt);
5683 }
5684
5685 /* Rewrite the uses using the selected induction variables.  */
5686
5687 static void
5688 rewrite_uses (struct ivopts_data *data)
5689 {
5690   unsigned i;
5691   struct iv_cand *cand;
5692   struct iv_use *use;
5693
5694   for (i = 0; i < n_iv_uses (data); i++)
5695     {
5696       use = iv_use (data, i);
5697       cand = use->selected;
5698       gcc_assert (cand);
5699
5700       rewrite_use (data, use, cand);
5701     }
5702 }
5703
5704 /* Removes the ivs that are not used after rewriting.  */
5705
5706 static void
5707 remove_unused_ivs (struct ivopts_data *data)
5708 {
5709   unsigned j;
5710   bitmap_iterator bi;
5711
5712   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5713     {
5714       struct version_info *info;
5715
5716       info = ver_info (data, j);
5717       if (info->iv
5718           && !zero_p (info->iv->step)
5719           && !info->inv_id
5720           && !info->iv->have_use_for
5721           && !info->preserve_biv)
5722         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5723     }
5724 }
5725
5726 /* Frees data allocated by the optimization of a single loop.  */
5727
5728 static void
5729 free_loop_data (struct ivopts_data *data)
5730 {
5731   unsigned i, j;
5732   bitmap_iterator bi;
5733   tree obj;
5734
5735   htab_empty (data->niters);
5736
5737   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5738     {
5739       struct version_info *info;
5740
5741       info = ver_info (data, i);
5742       if (info->iv)
5743         free (info->iv);
5744       info->iv = NULL;
5745       info->has_nonlin_use = false;
5746       info->preserve_biv = false;
5747       info->inv_id = 0;
5748     }
5749   bitmap_clear (data->relevant);
5750   bitmap_clear (data->important_candidates);
5751
5752   for (i = 0; i < n_iv_uses (data); i++)
5753     {
5754       struct iv_use *use = iv_use (data, i);
5755
5756       free (use->iv);
5757       BITMAP_FREE (use->related_cands);
5758       for (j = 0; j < use->n_map_members; j++)
5759         if (use->cost_map[j].depends_on)
5760           BITMAP_FREE (use->cost_map[j].depends_on);
5761       free (use->cost_map);
5762       free (use);
5763     }
5764   VEC_truncate (iv_use_p, data->iv_uses, 0);
5765
5766   for (i = 0; i < n_iv_cands (data); i++)
5767     {
5768       struct iv_cand *cand = iv_cand (data, i);
5769
5770       if (cand->iv)
5771         free (cand->iv);
5772       if (cand->depends_on)
5773         BITMAP_FREE (cand->depends_on);
5774       free (cand);
5775     }
5776   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5777
5778   if (data->version_info_size < num_ssa_names)
5779     {
5780       data->version_info_size = 2 * num_ssa_names;
5781       free (data->version_info);
5782       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5783     }
5784
5785   data->max_inv_id = 0;
5786
5787   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5788     SET_DECL_RTL (obj, NULL_RTX);
5789
5790   VEC_truncate (tree, decl_rtl_to_reset, 0);
5791 }
5792
5793 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5794    loop tree.  */
5795
5796 static void
5797 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5798 {
5799   free_loop_data (data);
5800   free (data->version_info);
5801   BITMAP_FREE (data->relevant);
5802   BITMAP_FREE (data->important_candidates);
5803   htab_delete (data->niters);
5804
5805   VEC_free (tree, heap, decl_rtl_to_reset);
5806   VEC_free (iv_use_p, heap, data->iv_uses);
5807   VEC_free (iv_cand_p, heap, data->iv_candidates);
5808 }
5809
5810 /* Optimizes the LOOP.  Returns true if anything changed.  */
5811
5812 static bool
5813 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5814 {
5815   bool changed = false;
5816   struct iv_ca *iv_ca;
5817   edge exit;
5818
5819   data->current_loop = loop;
5820
5821   if (dump_file && (dump_flags & TDF_DETAILS))
5822     {
5823       fprintf (dump_file, "Processing loop %d\n", loop->num);
5824
5825       exit = single_dom_exit (loop);
5826       if (exit)
5827         {
5828           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5829                    exit->src->index, exit->dest->index);
5830           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5831           fprintf (dump_file, "\n");
5832         }
5833
5834       fprintf (dump_file, "\n");
5835     }
5836
5837   /* For each ssa name determines whether it behaves as an induction variable
5838      in some loop.  */
5839   if (!find_induction_variables (data))
5840     goto finish;
5841
5842   /* Finds interesting uses (item 1).  */
5843   find_interesting_uses (data);
5844   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5845     goto finish;
5846
5847   /* Finds candidates for the induction variables (item 2).  */
5848   find_iv_candidates (data);
5849
5850   /* Calculates the costs (item 3, part 1).  */
5851   determine_use_iv_costs (data);
5852   determine_iv_costs (data);
5853   determine_set_costs (data);
5854
5855   /* Find the optimal set of induction variables (item 3, part 2).  */
5856   iv_ca = find_optimal_iv_set (data);
5857   if (!iv_ca)
5858     goto finish;
5859   changed = true;
5860
5861   /* Create the new induction variables (item 4, part 1).  */
5862   create_new_ivs (data, iv_ca);
5863   iv_ca_free (&iv_ca);
5864
5865   /* Rewrite the uses (item 4, part 2).  */
5866   rewrite_uses (data);
5867
5868   /* Remove the ivs that are unused after rewriting.  */
5869   remove_unused_ivs (data);
5870
5871   /* We have changed the structure of induction variables; it might happen
5872      that definitions in the scev database refer to some of them that were
5873      eliminated.  */
5874   scev_reset ();
5875
5876 finish:
5877   free_loop_data (data);
5878
5879   return changed;
5880 }
5881
5882 /* Main entry point.  Optimizes induction variables in loops.  */
5883
5884 void
5885 tree_ssa_iv_optimize (void)
5886 {
5887   struct loop *loop;
5888   struct ivopts_data data;
5889   loop_iterator li;
5890
5891   tree_ssa_iv_optimize_init (&data);
5892
5893   /* Optimize the loops starting with the innermost ones.  */
5894   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5895     {
5896       if (dump_file && (dump_flags & TDF_DETAILS))
5897         flow_loop_dump (loop, dump_file, NULL, 1);
5898
5899       tree_ssa_iv_optimize_loop (&data, loop);
5900     }
5901
5902   tree_ssa_iv_optimize_finalize (&data);
5903 }